diff options
author | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2013-04-23 19:35:18 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2013-04-23 19:35:18 -0400 |
commit | 6d347f1ce078dede3da0e1498f75e357351c6543 (patch) | |
tree | 8e872b8747c530e741e55e25e9917c1bd8b32c5b /extractor/phrase_location.h | |
parent | d11b76def6899790161c47a73018146311356d8b (diff) | |
parent | 5e9605b65202f4e5fc59843b197d88c4774f0ac8 (diff) |
merge paul's extractor code
Diffstat (limited to 'extractor/phrase_location.h')
-rw-r--r-- | extractor/phrase_location.h | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/extractor/phrase_location.h b/extractor/phrase_location.h new file mode 100644 index 00000000..91950e03 --- /dev/null +++ b/extractor/phrase_location.h @@ -0,0 +1,41 @@ +#ifndef _PHRASE_LOCATION_H_ +#define _PHRASE_LOCATION_H_ + +#include <memory> +#include <vector> + +using namespace std; + +namespace extractor { + +/** + * Structure containing information about the occurrences of a phrase in the + * source data. + * + * Every consecutive (disjoint) group of num_subpatterns entries in matchings + * vector encodes an occurrence of the phrase. The i-th entry of a group + * represents the start of the i-th subpattern of the phrase. If the phrase + * doesn't contain any nonterminals, then it may also be represented as the + * range in the suffix array which matches the phrase. + */ +struct PhraseLocation { + PhraseLocation(int sa_low = -1, int sa_high = -1); + + PhraseLocation(const vector<int>& matchings, int num_subpatterns); + + // Checks if a phrase has any occurrences in the source data. + bool IsEmpty() const; + + // Returns the number of occurrences of a phrase in the source data. + int GetSize() const; + + friend bool operator==(const PhraseLocation& a, const PhraseLocation& b); + + int sa_low, sa_high; + shared_ptr<vector<int> > matchings; + int num_subpatterns; +}; + +} // namespace extractor + +#endif |