diff options
author | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2013-04-23 19:35:18 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2013-04-23 19:35:18 -0400 |
commit | c164dc0ed8a32e4095ba1b36495e0f743b8cc1ea (patch) | |
tree | 78b81e4c63adfa67adb7b8f80c3e6be87b4a2b2a /extractor/phrase_location.h | |
parent | 0e46089cafa4e8e2f060e370d7afaceeda6b90a9 (diff) | |
parent | d467e14b28085809c31431be0478eb3d9322fe96 (diff) |
merge paul's extractor code
Diffstat (limited to 'extractor/phrase_location.h')
-rw-r--r-- | extractor/phrase_location.h | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/extractor/phrase_location.h b/extractor/phrase_location.h new file mode 100644 index 00000000..91950e03 --- /dev/null +++ b/extractor/phrase_location.h @@ -0,0 +1,41 @@ +#ifndef _PHRASE_LOCATION_H_ +#define _PHRASE_LOCATION_H_ + +#include <memory> +#include <vector> + +using namespace std; + +namespace extractor { + +/** + * Structure containing information about the occurrences of a phrase in the + * source data. + * + * Every consecutive (disjoint) group of num_subpatterns entries in matchings + * vector encodes an occurrence of the phrase. The i-th entry of a group + * represents the start of the i-th subpattern of the phrase. If the phrase + * doesn't contain any nonterminals, then it may also be represented as the + * range in the suffix array which matches the phrase. + */ +struct PhraseLocation { + PhraseLocation(int sa_low = -1, int sa_high = -1); + + PhraseLocation(const vector<int>& matchings, int num_subpatterns); + + // Checks if a phrase has any occurrences in the source data. + bool IsEmpty() const; + + // Returns the number of occurrences of a phrase in the source data. + int GetSize() const; + + friend bool operator==(const PhraseLocation& a, const PhraseLocation& b); + + int sa_low, sa_high; + shared_ptr<vector<int> > matchings; + int num_subpatterns; +}; + +} // namespace extractor + +#endif |