summaryrefslogtreecommitdiff
path: root/extractor/target_phrase_extractor.h
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2013-05-02 09:09:59 +0200
committerPatrick Simianer <p@simianer.de>2013-05-02 09:09:59 +0200
commit0ce66778da6079506896739e9d97dc7dff83cd72 (patch)
treef435457bb23dab0c566c9896f9d38cece9d15885 /extractor/target_phrase_extractor.h
parentb6754386f1109b960b05cdf2eabbc97bdd38e8df (diff)
parentb7ea2615bc9bb69031ff714ddce1539c9f1bda2d (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'extractor/target_phrase_extractor.h')
-rw-r--r--extractor/target_phrase_extractor.h64
1 files changed, 64 insertions, 0 deletions
diff --git a/extractor/target_phrase_extractor.h b/extractor/target_phrase_extractor.h
new file mode 100644
index 00000000..289bae2f
--- /dev/null
+++ b/extractor/target_phrase_extractor.h
@@ -0,0 +1,64 @@
+#ifndef _TARGET_PHRASE_EXTRACTOR_H_
+#define _TARGET_PHRASE_EXTRACTOR_H_
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+using namespace std;
+
+namespace extractor {
+
+typedef vector<pair<int, int> > PhraseAlignment;
+
+class Alignment;
+class DataArray;
+class Phrase;
+class PhraseBuilder;
+class RuleExtractorHelper;
+class Vocabulary;
+
+class TargetPhraseExtractor {
+ public:
+ TargetPhraseExtractor(shared_ptr<DataArray> target_data_array,
+ shared_ptr<Alignment> alignment,
+ shared_ptr<PhraseBuilder> phrase_builder,
+ shared_ptr<RuleExtractorHelper> helper,
+ shared_ptr<Vocabulary> vocabulary,
+ int max_rule_span,
+ bool require_tight_phrases);
+
+ virtual ~TargetPhraseExtractor();
+
+ // Finds all the target phrases that can extracted from a span in the
+ // target sentence (matching the given set of target phrase gaps).
+ virtual vector<pair<Phrase, PhraseAlignment> > ExtractPhrases(
+ const vector<pair<int, int> >& target_gaps, const vector<int>& target_low,
+ int target_phrase_low, int target_phrase_high,
+ const unordered_map<int, int>& source_indexes, int sentence_id) const;
+
+ protected:
+ TargetPhraseExtractor();
+
+ private:
+ // Computes the cartesian product over the sets of possible target phrase
+ // chunks.
+ void GeneratePhrases(
+ vector<pair<Phrase, PhraseAlignment> >& target_phrases,
+ const vector<pair<int, int> >& ranges, int index,
+ vector<int>& subpatterns, const vector<int>& target_gap_order,
+ int target_phrase_low, int target_phrase_high,
+ const unordered_map<int, int>& source_indexes, int sentence_id) const;
+
+ shared_ptr<DataArray> target_data_array;
+ shared_ptr<Alignment> alignment;
+ shared_ptr<PhraseBuilder> phrase_builder;
+ shared_ptr<RuleExtractorHelper> helper;
+ shared_ptr<Vocabulary> vocabulary;
+ int max_rule_span;
+ bool require_tight_phrases;
+};
+
+} // namespace extractor
+
+#endif