summaryrefslogtreecommitdiff
path: root/extractor/phrase.h
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-04-23 19:35:18 -0400
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-04-23 19:35:18 -0400
commit6d347f1ce078dede3da0e1498f75e357351c6543 (patch)
tree8e872b8747c530e741e55e25e9917c1bd8b32c5b /extractor/phrase.h
parentd11b76def6899790161c47a73018146311356d8b (diff)
parent5e9605b65202f4e5fc59843b197d88c4774f0ac8 (diff)
merge paul's extractor code
Diffstat (limited to 'extractor/phrase.h')
-rw-r--r--extractor/phrase.h52
1 files changed, 52 insertions, 0 deletions
diff --git a/extractor/phrase.h b/extractor/phrase.h
new file mode 100644
index 00000000..a8e91e3c
--- /dev/null
+++ b/extractor/phrase.h
@@ -0,0 +1,52 @@
+#ifndef _PHRASE_H_
+#define _PHRASE_H_
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "phrase_builder.h"
+
+using namespace std;
+
+namespace extractor {
+
+/**
+ * Structure containing the data for a phrase.
+ */
+class Phrase {
+ public:
+ friend Phrase PhraseBuilder::Build(const vector<int>& phrase);
+
+ // Returns the number of nonterminals in the phrase.
+ int Arity() const;
+
+ // Returns the number of terminals (length) for the given chunk. (A chunk is a
+ // contiguous sequence of terminals in the phrase).
+ int GetChunkLen(int index) const;
+
+ // Returns the symbols (word ids) marking up the phrase.
+ vector<int> Get() const;
+
+ // Returns the symbol located at the given position in the phrase.
+ int GetSymbol(int position) const;
+
+ // Returns the number of symbols in the phrase.
+ int GetNumSymbols() const;
+
+ // Returns the words making up the phrase. (Nonterminals are stripped out.)
+ vector<string> GetWords() const;
+
+ bool operator<(const Phrase& other) const;
+
+ friend ostream& operator<<(ostream& os, const Phrase& phrase);
+
+ private:
+ vector<int> symbols;
+ vector<int> var_pos;
+ vector<string> words;
+};
+
+} // namespace extractor
+
+#endif