summaryrefslogtreecommitdiff
path: root/extractor/phrase_location.h
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-04-23 19:35:18 -0400
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-04-23 19:35:18 -0400
commitc164dc0ed8a32e4095ba1b36495e0f743b8cc1ea (patch)
tree78b81e4c63adfa67adb7b8f80c3e6be87b4a2b2a /extractor/phrase_location.h
parent0e46089cafa4e8e2f060e370d7afaceeda6b90a9 (diff)
parentd467e14b28085809c31431be0478eb3d9322fe96 (diff)
merge paul's extractor code
Diffstat (limited to 'extractor/phrase_location.h')
-rw-r--r--extractor/phrase_location.h41
1 files changed, 41 insertions, 0 deletions
diff --git a/extractor/phrase_location.h b/extractor/phrase_location.h
new file mode 100644
index 00000000..91950e03
--- /dev/null
+++ b/extractor/phrase_location.h
@@ -0,0 +1,41 @@
+#ifndef _PHRASE_LOCATION_H_
+#define _PHRASE_LOCATION_H_
+
+#include <memory>
+#include <vector>
+
+using namespace std;
+
+namespace extractor {
+
+/**
+ * Structure containing information about the occurrences of a phrase in the
+ * source data.
+ *
+ * Every consecutive (disjoint) group of num_subpatterns entries in matchings
+ * vector encodes an occurrence of the phrase. The i-th entry of a group
+ * represents the start of the i-th subpattern of the phrase. If the phrase
+ * doesn't contain any nonterminals, then it may also be represented as the
+ * range in the suffix array which matches the phrase.
+ */
+struct PhraseLocation {
+ PhraseLocation(int sa_low = -1, int sa_high = -1);
+
+ PhraseLocation(const vector<int>& matchings, int num_subpatterns);
+
+ // Checks if a phrase has any occurrences in the source data.
+ bool IsEmpty() const;
+
+ // Returns the number of occurrences of a phrase in the source data.
+ int GetSize() const;
+
+ friend bool operator==(const PhraseLocation& a, const PhraseLocation& b);
+
+ int sa_low, sa_high;
+ shared_ptr<vector<int> > matchings;
+ int num_subpatterns;
+};
+
+} // namespace extractor
+
+#endif