diff options
author | Paul Baltescu <pauldb89@gmail.com> | 2013-02-14 23:17:15 +0000 |
---|---|---|
committer | Paul Baltescu <pauldb89@gmail.com> | 2013-02-14 23:17:15 +0000 |
commit | 9a026ba2db8fa7723374109e6a4a8dcaff8733cd (patch) | |
tree | 34a60703a53ada76e7213da5940e86d6f476f1e4 /extractor/rule_extractor.h | |
parent | 252fb164c208ec8f3005f8a652eb3b48c0644e3d (diff) |
Working version of the grammar extractor.
Diffstat (limited to 'extractor/rule_extractor.h')
-rw-r--r-- | extractor/rule_extractor.h | 92 |
1 files changed, 33 insertions, 59 deletions
diff --git a/extractor/rule_extractor.h b/extractor/rule_extractor.h index f668de24..a087dc6d 100644 --- a/extractor/rule_extractor.h +++ b/extractor/rule_extractor.h @@ -2,6 +2,7 @@ #define _RULE_EXTRACTOR_H_ #include <memory> +#include <unordered_map> #include <vector> #include "phrase.h" @@ -13,8 +14,9 @@ class DataArray; class PhraseBuilder; class PhraseLocation; class Rule; +class RuleExtractorHelper; class Scorer; -class Vocabulary; +class TargetPhraseExtractor; typedef vector<pair<int, int> > PhraseAlignment; @@ -46,84 +48,56 @@ class RuleExtractor { bool require_aligned_chunks, bool require_tight_phrases); - vector<Rule> ExtractRules(const Phrase& phrase, - const PhraseLocation& location) const; + // For testing only. + RuleExtractor(shared_ptr<DataArray> source_data_array, + shared_ptr<PhraseBuilder> phrase_builder, + shared_ptr<Scorer> scorer, + shared_ptr<TargetPhraseExtractor> target_phrase_extractor, + shared_ptr<RuleExtractorHelper> helper, + int max_rule_span, + int min_gap_size, + int max_nonterminals, + int max_rule_symbols, + bool require_tight_phrases); + + virtual ~RuleExtractor(); + + virtual vector<Rule> ExtractRules(const Phrase& phrase, + const PhraseLocation& location) const; + + protected: + RuleExtractor(); private: vector<Extract> ExtractAlignments(const Phrase& phrase, const vector<int>& matching) const; - void GetLinksSpans(vector<int>& source_low, vector<int>& source_high, - vector<int>& target_low, vector<int>& target_high, - int sentence_id) const; - - bool CheckAlignedTerminals(const vector<int>& matching, - const vector<int>& chunklen, - const vector<int>& source_low) const; - - bool CheckTightPhrases(const vector<int>& matching, - const vector<int>& chunklen, - const vector<int>& source_low) const; - - bool FindFixPoint( - int source_phrase_start, int source_phrase_end, - const vector<int>& source_low, const vector<int>& source_high, - int& target_phrase_start, int& target_phrase_end, - const vector<int>& target_low, const vector<int>& target_high, - int& source_back_low, int& source_back_high, int sentence_id, - int min_source_gap_size, int min_target_gap_size, - int max_new_x, int max_low_x, int max_high_x, - bool allow_arbitrary_expansion) const; - - void FindProjection( - int source_phrase_start, int source_phrase_end, - const vector<int>& source_low, const vector<int>& source_high, - int& target_phrase_low, int& target_phrase_end) const; - - bool CheckGaps( - vector<pair<int, int> >& source_gaps, vector<pair<int, int> >& target_gaps, - const vector<int>& matching, const vector<int>& chunklen, - const vector<int>& source_low, const vector<int>& source_high, - const vector<int>& target_low, const vector<int>& target_high, - int source_phrase_low, int source_phrase_high, int source_back_low, - int source_back_high, int& num_symbols, bool& met_constraints) const; - void AddExtracts( vector<Extract>& extracts, const Phrase& source_phrase, + const unordered_map<int, int>& source_indexes, const vector<pair<int, int> >& target_gaps, const vector<int>& target_low, int target_phrase_low, int target_phrase_high, int sentence_id) const; - vector<pair<Phrase, PhraseAlignment> > ExtractTargetPhrases( - const vector<pair<int, int> >& target_gaps, const vector<int>& target_low, - int target_phrase_low, int target_phrase_high, int sentence_id) const; - - void GeneratePhrases( - vector<pair<Phrase, PhraseAlignment> >& target_phrases, - const vector<pair<int, int> >& ranges, int index, - vector<int>& subpatterns, const vector<int>& target_gap_order, - int target_phrase_low, int target_phrase_high, int sentence_id) const; - void AddNonterminalExtremities( - vector<Extract>& extracts, const Phrase& source_phrase, - int source_phrase_low, int source_phrase_high, int source_back_low, - int source_back_high, const vector<int>& source_low, + vector<Extract>& extracts, const vector<int>& matching, + const vector<int>& chunklen, const Phrase& source_phrase, + int source_back_low, int source_back_high, const vector<int>& source_low, const vector<int>& source_high, const vector<int>& target_low, - const vector<int>& target_high, - const vector<pair<int, int> >& target_gaps, int sentence_id, - int extend_left, int extend_right) const; + const vector<int>& target_high, vector<pair<int, int> > target_gaps, + int sentence_id, int starts_with_x, int ends_with_x, int extend_left, + int extend_right) const; - shared_ptr<DataArray> source_data_array; + private: shared_ptr<DataArray> target_data_array; - shared_ptr<Alignment> alignment; + shared_ptr<DataArray> source_data_array; shared_ptr<PhraseBuilder> phrase_builder; shared_ptr<Scorer> scorer; - shared_ptr<Vocabulary> vocabulary; + shared_ptr<TargetPhraseExtractor> target_phrase_extractor; + shared_ptr<RuleExtractorHelper> helper; int max_rule_span; int min_gap_size; int max_nonterminals; int max_rule_symbols; - bool require_aligned_terminal; - bool require_aligned_chunks; bool require_tight_phrases; }; |