summaryrefslogtreecommitdiff
path: root/extractor/phrase_location.h
diff options
context:
space:
mode:
Diffstat (limited to 'extractor/phrase_location.h')
-rw-r--r--extractor/phrase_location.h12
1 files changed, 12 insertions, 0 deletions
diff --git a/extractor/phrase_location.h b/extractor/phrase_location.h
index e5f3cf08..91950e03 100644
--- a/extractor/phrase_location.h
+++ b/extractor/phrase_location.h
@@ -8,13 +8,25 @@ using namespace std;
namespace extractor {
+/**
+ * Structure containing information about the occurrences of a phrase in the
+ * source data.
+ *
+ * Every consecutive (disjoint) group of num_subpatterns entries in matchings
+ * vector encodes an occurrence of the phrase. The i-th entry of a group
+ * represents the start of the i-th subpattern of the phrase. If the phrase
+ * doesn't contain any nonterminals, then it may also be represented as the
+ * range in the suffix array which matches the phrase.
+ */
struct PhraseLocation {
PhraseLocation(int sa_low = -1, int sa_high = -1);
PhraseLocation(const vector<int>& matchings, int num_subpatterns);
+ // Checks if a phrase has any occurrences in the source data.
bool IsEmpty() const;
+ // Returns the number of occurrences of a phrase in the source data.
int GetSize() const;
friend bool operator==(const PhraseLocation& a, const PhraseLocation& b);