diff options
author | Patrick Simianer <p@simianer.de> | 2013-05-02 09:09:59 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2013-05-02 09:09:59 +0200 |
commit | 9e50f0237413180fba11b500c9dce5c600e3c157 (patch) | |
tree | 556fc31d231353c853a864afffddd43dc525549a /extractor/phrase_location.h | |
parent | d18024a41cbc1b54db88d499571349a6234b6db8 (diff) | |
parent | 14ed53426726202813a8e82d706b44266f015fe1 (diff) |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'extractor/phrase_location.h')
-rw-r--r-- | extractor/phrase_location.h | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/extractor/phrase_location.h b/extractor/phrase_location.h new file mode 100644 index 00000000..91950e03 --- /dev/null +++ b/extractor/phrase_location.h @@ -0,0 +1,41 @@ +#ifndef _PHRASE_LOCATION_H_ +#define _PHRASE_LOCATION_H_ + +#include <memory> +#include <vector> + +using namespace std; + +namespace extractor { + +/** + * Structure containing information about the occurrences of a phrase in the + * source data. + * + * Every consecutive (disjoint) group of num_subpatterns entries in matchings + * vector encodes an occurrence of the phrase. The i-th entry of a group + * represents the start of the i-th subpattern of the phrase. If the phrase + * doesn't contain any nonterminals, then it may also be represented as the + * range in the suffix array which matches the phrase. + */ +struct PhraseLocation { + PhraseLocation(int sa_low = -1, int sa_high = -1); + + PhraseLocation(const vector<int>& matchings, int num_subpatterns); + + // Checks if a phrase has any occurrences in the source data. + bool IsEmpty() const; + + // Returns the number of occurrences of a phrase in the source data. + int GetSize() const; + + friend bool operator==(const PhraseLocation& a, const PhraseLocation& b); + + int sa_low, sa_high; + shared_ptr<vector<int> > matchings; + int num_subpatterns; +}; + +} // namespace extractor + +#endif |