summaryrefslogtreecommitdiff
path: root/extractor/target_phrase_extractor.h
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-04-23 19:35:18 -0400
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-04-23 19:35:18 -0400
commit6d347f1ce078dede3da0e1498f75e357351c6543 (patch)
tree8e872b8747c530e741e55e25e9917c1bd8b32c5b /extractor/target_phrase_extractor.h
parentd11b76def6899790161c47a73018146311356d8b (diff)
parent5e9605b65202f4e5fc59843b197d88c4774f0ac8 (diff)
merge paul's extractor code
Diffstat (limited to 'extractor/target_phrase_extractor.h')
-rw-r--r--extractor/target_phrase_extractor.h64
1 files changed, 64 insertions, 0 deletions
diff --git a/extractor/target_phrase_extractor.h b/extractor/target_phrase_extractor.h
new file mode 100644
index 00000000..289bae2f
--- /dev/null
+++ b/extractor/target_phrase_extractor.h
@@ -0,0 +1,64 @@
+#ifndef _TARGET_PHRASE_EXTRACTOR_H_
+#define _TARGET_PHRASE_EXTRACTOR_H_
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+using namespace std;
+
+namespace extractor {
+
+typedef vector<pair<int, int> > PhraseAlignment;
+
+class Alignment;
+class DataArray;
+class Phrase;
+class PhraseBuilder;
+class RuleExtractorHelper;
+class Vocabulary;
+
+class TargetPhraseExtractor {
+ public:
+ TargetPhraseExtractor(shared_ptr<DataArray> target_data_array,
+ shared_ptr<Alignment> alignment,
+ shared_ptr<PhraseBuilder> phrase_builder,
+ shared_ptr<RuleExtractorHelper> helper,
+ shared_ptr<Vocabulary> vocabulary,
+ int max_rule_span,
+ bool require_tight_phrases);
+
+ virtual ~TargetPhraseExtractor();
+
+ // Finds all the target phrases that can extracted from a span in the
+ // target sentence (matching the given set of target phrase gaps).
+ virtual vector<pair<Phrase, PhraseAlignment> > ExtractPhrases(
+ const vector<pair<int, int> >& target_gaps, const vector<int>& target_low,
+ int target_phrase_low, int target_phrase_high,
+ const unordered_map<int, int>& source_indexes, int sentence_id) const;
+
+ protected:
+ TargetPhraseExtractor();
+
+ private:
+ // Computes the cartesian product over the sets of possible target phrase
+ // chunks.
+ void GeneratePhrases(
+ vector<pair<Phrase, PhraseAlignment> >& target_phrases,
+ const vector<pair<int, int> >& ranges, int index,
+ vector<int>& subpatterns, const vector<int>& target_gap_order,
+ int target_phrase_low, int target_phrase_high,
+ const unordered_map<int, int>& source_indexes, int sentence_id) const;
+
+ shared_ptr<DataArray> target_data_array;
+ shared_ptr<Alignment> alignment;
+ shared_ptr<PhraseBuilder> phrase_builder;
+ shared_ptr<RuleExtractorHelper> helper;
+ shared_ptr<Vocabulary> vocabulary;
+ int max_rule_span;
+ bool require_tight_phrases;
+};
+
+} // namespace extractor
+
+#endif