From 9a026ba2db8fa7723374109e6a4a8dcaff8733cd Mon Sep 17 00:00:00 2001 From: Paul Baltescu Date: Thu, 14 Feb 2013 23:17:15 +0000 Subject: Working version of the grammar extractor. --- extractor/rule_extractor_helper.h | 82 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 extractor/rule_extractor_helper.h (limited to 'extractor/rule_extractor_helper.h') diff --git a/extractor/rule_extractor_helper.h b/extractor/rule_extractor_helper.h new file mode 100644 index 00000000..3478bfc8 --- /dev/null +++ b/extractor/rule_extractor_helper.h @@ -0,0 +1,82 @@ +#ifndef _RULE_EXTRACTOR_HELPER_H_ +#define _RULE_EXTRACTOR_HELPER_H_ + +#include +#include +#include + +using namespace std; + +class Alignment; +class DataArray; + +class RuleExtractorHelper { + public: + RuleExtractorHelper(shared_ptr source_data_array, + shared_ptr target_data_array, + shared_ptr alignment, + int max_rule_span, + int max_rule_symbols, + bool require_aligned_terminal, + bool require_aligned_chunks, + bool require_tight_phrases); + + virtual ~RuleExtractorHelper(); + + virtual void GetLinksSpans(vector& source_low, vector& source_high, + vector& target_low, vector& target_high, + int sentence_id) const; + + virtual bool CheckAlignedTerminals(const vector& matching, + const vector& chunklen, + const vector& source_low) const; + + virtual bool CheckTightPhrases(const vector& matching, + const vector& chunklen, + const vector& source_low) const; + + virtual bool FindFixPoint( + int source_phrase_low, int source_phrase_high, + const vector& source_low, const vector& source_high, + int& target_phrase_low, int& target_phrase_high, + const vector& target_low, const vector& target_high, + int& source_back_low, int& source_back_high, int sentence_id, + int min_source_gap_size, int min_target_gap_size, + int max_new_x, bool allow_low_x, bool allow_high_x, + bool allow_arbitrary_expansion) const; + + virtual bool GetGaps( + vector >& source_gaps, vector >& target_gaps, + const vector& matching, const vector& chunklen, + const vector& source_low, const vector& source_high, + const vector& target_low, const vector& target_high, + int source_phrase_low, int source_phrase_high, int source_back_low, + int source_back_high, int& num_symbols, bool& met_constraints) const; + + virtual vector GetGapOrder(const vector >& gaps) const; + + // TODO(pauldb): Add unit tests. + virtual unordered_map GetSourceIndexes( + const vector& matching, const vector& chunklen, + int starts_with_x) const; + + protected: + RuleExtractorHelper(); + + private: + void FindProjection( + int source_phrase_low, int source_phrase_high, + const vector& source_low, const vector& source_high, + int& target_phrase_low, int& target_phrase_high) const; + + shared_ptr source_data_array; + shared_ptr target_data_array; + shared_ptr alignment; + int max_rule_span; + int max_rule_symbols; + bool require_aligned_terminal; + bool require_aligned_chunks; + bool require_tight_phrases; +}; + +#endif -- cgit v1.2.3