diff options
author | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2013-04-23 19:35:18 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2013-04-23 19:35:18 -0400 |
commit | 6d347f1ce078dede3da0e1498f75e357351c6543 (patch) | |
tree | 8e872b8747c530e741e55e25e9917c1bd8b32c5b /extractor/target_phrase_extractor.h | |
parent | d11b76def6899790161c47a73018146311356d8b (diff) | |
parent | 5e9605b65202f4e5fc59843b197d88c4774f0ac8 (diff) |
merge paul's extractor code
Diffstat (limited to 'extractor/target_phrase_extractor.h')
-rw-r--r-- | extractor/target_phrase_extractor.h | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/extractor/target_phrase_extractor.h b/extractor/target_phrase_extractor.h new file mode 100644 index 00000000..289bae2f --- /dev/null +++ b/extractor/target_phrase_extractor.h @@ -0,0 +1,64 @@ +#ifndef _TARGET_PHRASE_EXTRACTOR_H_ +#define _TARGET_PHRASE_EXTRACTOR_H_ + +#include <memory> +#include <unordered_map> +#include <vector> + +using namespace std; + +namespace extractor { + +typedef vector<pair<int, int> > PhraseAlignment; + +class Alignment; +class DataArray; +class Phrase; +class PhraseBuilder; +class RuleExtractorHelper; +class Vocabulary; + +class TargetPhraseExtractor { + public: + TargetPhraseExtractor(shared_ptr<DataArray> target_data_array, + shared_ptr<Alignment> alignment, + shared_ptr<PhraseBuilder> phrase_builder, + shared_ptr<RuleExtractorHelper> helper, + shared_ptr<Vocabulary> vocabulary, + int max_rule_span, + bool require_tight_phrases); + + virtual ~TargetPhraseExtractor(); + + // Finds all the target phrases that can extracted from a span in the + // target sentence (matching the given set of target phrase gaps). + virtual vector<pair<Phrase, PhraseAlignment> > ExtractPhrases( + const vector<pair<int, int> >& target_gaps, const vector<int>& target_low, + int target_phrase_low, int target_phrase_high, + const unordered_map<int, int>& source_indexes, int sentence_id) const; + + protected: + TargetPhraseExtractor(); + + private: + // Computes the cartesian product over the sets of possible target phrase + // chunks. + void GeneratePhrases( + vector<pair<Phrase, PhraseAlignment> >& target_phrases, + const vector<pair<int, int> >& ranges, int index, + vector<int>& subpatterns, const vector<int>& target_gap_order, + int target_phrase_low, int target_phrase_high, + const unordered_map<int, int>& source_indexes, int sentence_id) const; + + shared_ptr<DataArray> target_data_array; + shared_ptr<Alignment> alignment; + shared_ptr<PhraseBuilder> phrase_builder; + shared_ptr<RuleExtractorHelper> helper; + shared_ptr<Vocabulary> vocabulary; + int max_rule_span; + bool require_tight_phrases; +}; + +} // namespace extractor + +#endif |