diff options
author | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2013-04-23 19:35:18 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2013-04-23 19:35:18 -0400 |
commit | 6d347f1ce078dede3da0e1498f75e357351c6543 (patch) | |
tree | 8e872b8747c530e741e55e25e9917c1bd8b32c5b /extractor/phrase.h | |
parent | d11b76def6899790161c47a73018146311356d8b (diff) | |
parent | 5e9605b65202f4e5fc59843b197d88c4774f0ac8 (diff) |
merge paul's extractor code
Diffstat (limited to 'extractor/phrase.h')
-rw-r--r-- | extractor/phrase.h | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/extractor/phrase.h b/extractor/phrase.h new file mode 100644 index 00000000..a8e91e3c --- /dev/null +++ b/extractor/phrase.h @@ -0,0 +1,52 @@ +#ifndef _PHRASE_H_ +#define _PHRASE_H_ + +#include <iostream> +#include <string> +#include <vector> + +#include "phrase_builder.h" + +using namespace std; + +namespace extractor { + +/** + * Structure containing the data for a phrase. + */ +class Phrase { + public: + friend Phrase PhraseBuilder::Build(const vector<int>& phrase); + + // Returns the number of nonterminals in the phrase. + int Arity() const; + + // Returns the number of terminals (length) for the given chunk. (A chunk is a + // contiguous sequence of terminals in the phrase). + int GetChunkLen(int index) const; + + // Returns the symbols (word ids) marking up the phrase. + vector<int> Get() const; + + // Returns the symbol located at the given position in the phrase. + int GetSymbol(int position) const; + + // Returns the number of symbols in the phrase. + int GetNumSymbols() const; + + // Returns the words making up the phrase. (Nonterminals are stripped out.) + vector<string> GetWords() const; + + bool operator<(const Phrase& other) const; + + friend ostream& operator<<(ostream& os, const Phrase& phrase); + + private: + vector<int> symbols; + vector<int> var_pos; + vector<string> words; +}; + +} // namespace extractor + +#endif |