From 9a026ba2db8fa7723374109e6a4a8dcaff8733cd Mon Sep 17 00:00:00 2001 From: Paul Baltescu Date: Thu, 14 Feb 2013 23:17:15 +0000 Subject: Working version of the grammar extractor. --- extractor/rule_extractor.h | 92 +++++++++++++++++----------------------------- 1 file changed, 33 insertions(+), 59 deletions(-) (limited to 'extractor/rule_extractor.h') diff --git a/extractor/rule_extractor.h b/extractor/rule_extractor.h index f668de24..a087dc6d 100644 --- a/extractor/rule_extractor.h +++ b/extractor/rule_extractor.h @@ -2,6 +2,7 @@ #define _RULE_EXTRACTOR_H_ #include +#include #include #include "phrase.h" @@ -13,8 +14,9 @@ class DataArray; class PhraseBuilder; class PhraseLocation; class Rule; +class RuleExtractorHelper; class Scorer; -class Vocabulary; +class TargetPhraseExtractor; typedef vector > PhraseAlignment; @@ -46,84 +48,56 @@ class RuleExtractor { bool require_aligned_chunks, bool require_tight_phrases); - vector ExtractRules(const Phrase& phrase, - const PhraseLocation& location) const; + // For testing only. + RuleExtractor(shared_ptr source_data_array, + shared_ptr phrase_builder, + shared_ptr scorer, + shared_ptr target_phrase_extractor, + shared_ptr helper, + int max_rule_span, + int min_gap_size, + int max_nonterminals, + int max_rule_symbols, + bool require_tight_phrases); + + virtual ~RuleExtractor(); + + virtual vector ExtractRules(const Phrase& phrase, + const PhraseLocation& location) const; + + protected: + RuleExtractor(); private: vector ExtractAlignments(const Phrase& phrase, const vector& matching) const; - void GetLinksSpans(vector& source_low, vector& source_high, - vector& target_low, vector& target_high, - int sentence_id) const; - - bool CheckAlignedTerminals(const vector& matching, - const vector& chunklen, - const vector& source_low) const; - - bool CheckTightPhrases(const vector& matching, - const vector& chunklen, - const vector& source_low) const; - - bool FindFixPoint( - int source_phrase_start, int source_phrase_end, - const vector& source_low, const vector& source_high, - int& target_phrase_start, int& target_phrase_end, - const vector& target_low, const vector& target_high, - int& source_back_low, int& source_back_high, int sentence_id, - int min_source_gap_size, int min_target_gap_size, - int max_new_x, int max_low_x, int max_high_x, - bool allow_arbitrary_expansion) const; - - void FindProjection( - int source_phrase_start, int source_phrase_end, - const vector& source_low, const vector& source_high, - int& target_phrase_low, int& target_phrase_end) const; - - bool CheckGaps( - vector >& source_gaps, vector >& target_gaps, - const vector& matching, const vector& chunklen, - const vector& source_low, const vector& source_high, - const vector& target_low, const vector& target_high, - int source_phrase_low, int source_phrase_high, int source_back_low, - int source_back_high, int& num_symbols, bool& met_constraints) const; - void AddExtracts( vector& extracts, const Phrase& source_phrase, + const unordered_map& source_indexes, const vector >& target_gaps, const vector& target_low, int target_phrase_low, int target_phrase_high, int sentence_id) const; - vector > ExtractTargetPhrases( - const vector >& target_gaps, const vector& target_low, - int target_phrase_low, int target_phrase_high, int sentence_id) const; - - void GeneratePhrases( - vector >& target_phrases, - const vector >& ranges, int index, - vector& subpatterns, const vector& target_gap_order, - int target_phrase_low, int target_phrase_high, int sentence_id) const; - void AddNonterminalExtremities( - vector& extracts, const Phrase& source_phrase, - int source_phrase_low, int source_phrase_high, int source_back_low, - int source_back_high, const vector& source_low, + vector& extracts, const vector& matching, + const vector& chunklen, const Phrase& source_phrase, + int source_back_low, int source_back_high, const vector& source_low, const vector& source_high, const vector& target_low, - const vector& target_high, - const vector >& target_gaps, int sentence_id, - int extend_left, int extend_right) const; + const vector& target_high, vector > target_gaps, + int sentence_id, int starts_with_x, int ends_with_x, int extend_left, + int extend_right) const; - shared_ptr source_data_array; + private: shared_ptr target_data_array; - shared_ptr alignment; + shared_ptr source_data_array; shared_ptr phrase_builder; shared_ptr scorer; - shared_ptr vocabulary; + shared_ptr target_phrase_extractor; + shared_ptr helper; int max_rule_span; int min_gap_size; int max_nonterminals; int max_rule_symbols; - bool require_aligned_terminal; - bool require_aligned_chunks; bool require_tight_phrases; }; -- cgit v1.2.3