summaryrefslogtreecommitdiff
path: root/extractor/rule_extractor.h
diff options
context:
space:
mode:
Diffstat (limited to 'extractor/rule_extractor.h')
-rw-r--r--extractor/rule_extractor.h92
1 files changed, 33 insertions, 59 deletions
diff --git a/extractor/rule_extractor.h b/extractor/rule_extractor.h
index f668de24..a087dc6d 100644
--- a/extractor/rule_extractor.h
+++ b/extractor/rule_extractor.h
@@ -2,6 +2,7 @@
#define _RULE_EXTRACTOR_H_
#include <memory>
+#include <unordered_map>
#include <vector>
#include "phrase.h"
@@ -13,8 +14,9 @@ class DataArray;
class PhraseBuilder;
class PhraseLocation;
class Rule;
+class RuleExtractorHelper;
class Scorer;
-class Vocabulary;
+class TargetPhraseExtractor;
typedef vector<pair<int, int> > PhraseAlignment;
@@ -46,84 +48,56 @@ class RuleExtractor {
bool require_aligned_chunks,
bool require_tight_phrases);
- vector<Rule> ExtractRules(const Phrase& phrase,
- const PhraseLocation& location) const;
+ // For testing only.
+ RuleExtractor(shared_ptr<DataArray> source_data_array,
+ shared_ptr<PhraseBuilder> phrase_builder,
+ shared_ptr<Scorer> scorer,
+ shared_ptr<TargetPhraseExtractor> target_phrase_extractor,
+ shared_ptr<RuleExtractorHelper> helper,
+ int max_rule_span,
+ int min_gap_size,
+ int max_nonterminals,
+ int max_rule_symbols,
+ bool require_tight_phrases);
+
+ virtual ~RuleExtractor();
+
+ virtual vector<Rule> ExtractRules(const Phrase& phrase,
+ const PhraseLocation& location) const;
+
+ protected:
+ RuleExtractor();
private:
vector<Extract> ExtractAlignments(const Phrase& phrase,
const vector<int>& matching) const;
- void GetLinksSpans(vector<int>& source_low, vector<int>& source_high,
- vector<int>& target_low, vector<int>& target_high,
- int sentence_id) const;
-
- bool CheckAlignedTerminals(const vector<int>& matching,
- const vector<int>& chunklen,
- const vector<int>& source_low) const;
-
- bool CheckTightPhrases(const vector<int>& matching,
- const vector<int>& chunklen,
- const vector<int>& source_low) const;
-
- bool FindFixPoint(
- int source_phrase_start, int source_phrase_end,
- const vector<int>& source_low, const vector<int>& source_high,
- int& target_phrase_start, int& target_phrase_end,
- const vector<int>& target_low, const vector<int>& target_high,
- int& source_back_low, int& source_back_high, int sentence_id,
- int min_source_gap_size, int min_target_gap_size,
- int max_new_x, int max_low_x, int max_high_x,
- bool allow_arbitrary_expansion) const;
-
- void FindProjection(
- int source_phrase_start, int source_phrase_end,
- const vector<int>& source_low, const vector<int>& source_high,
- int& target_phrase_low, int& target_phrase_end) const;
-
- bool CheckGaps(
- vector<pair<int, int> >& source_gaps, vector<pair<int, int> >& target_gaps,
- const vector<int>& matching, const vector<int>& chunklen,
- const vector<int>& source_low, const vector<int>& source_high,
- const vector<int>& target_low, const vector<int>& target_high,
- int source_phrase_low, int source_phrase_high, int source_back_low,
- int source_back_high, int& num_symbols, bool& met_constraints) const;
-
void AddExtracts(
vector<Extract>& extracts, const Phrase& source_phrase,
+ const unordered_map<int, int>& source_indexes,
const vector<pair<int, int> >& target_gaps, const vector<int>& target_low,
int target_phrase_low, int target_phrase_high, int sentence_id) const;
- vector<pair<Phrase, PhraseAlignment> > ExtractTargetPhrases(
- const vector<pair<int, int> >& target_gaps, const vector<int>& target_low,
- int target_phrase_low, int target_phrase_high, int sentence_id) const;
-
- void GeneratePhrases(
- vector<pair<Phrase, PhraseAlignment> >& target_phrases,
- const vector<pair<int, int> >& ranges, int index,
- vector<int>& subpatterns, const vector<int>& target_gap_order,
- int target_phrase_low, int target_phrase_high, int sentence_id) const;
-
void AddNonterminalExtremities(
- vector<Extract>& extracts, const Phrase& source_phrase,
- int source_phrase_low, int source_phrase_high, int source_back_low,
- int source_back_high, const vector<int>& source_low,
+ vector<Extract>& extracts, const vector<int>& matching,
+ const vector<int>& chunklen, const Phrase& source_phrase,
+ int source_back_low, int source_back_high, const vector<int>& source_low,
const vector<int>& source_high, const vector<int>& target_low,
- const vector<int>& target_high,
- const vector<pair<int, int> >& target_gaps, int sentence_id,
- int extend_left, int extend_right) const;
+ const vector<int>& target_high, vector<pair<int, int> > target_gaps,
+ int sentence_id, int starts_with_x, int ends_with_x, int extend_left,
+ int extend_right) const;
- shared_ptr<DataArray> source_data_array;
+ private:
shared_ptr<DataArray> target_data_array;
- shared_ptr<Alignment> alignment;
+ shared_ptr<DataArray> source_data_array;
shared_ptr<PhraseBuilder> phrase_builder;
shared_ptr<Scorer> scorer;
- shared_ptr<Vocabulary> vocabulary;
+ shared_ptr<TargetPhraseExtractor> target_phrase_extractor;
+ shared_ptr<RuleExtractorHelper> helper;
int max_rule_span;
int min_gap_size;
int max_nonterminals;
int max_rule_symbols;
- bool require_aligned_terminal;
- bool require_aligned_chunks;
bool require_tight_phrases;
};