#ifndef _GRAMMAR_EXTRACTOR_H_ #define _GRAMMAR_EXTRACTOR_H_ #include #include #include using namespace std; namespace extractor { class Alignment; class DataArray; class Grammar; class HieroCachingRuleFactory; class Precomputation; class Rule; class Scorer; class SuffixArray; class Vocabulary; /** * Class wrapping all the logic for extracting the synchronous context free * grammars. */ class GrammarExtractor { public: GrammarExtractor( shared_ptr source_suffix_array, shared_ptr target_data_array, shared_ptr alignment, shared_ptr precomputation, shared_ptr scorer, int min_gap_size, int max_rule_span, int max_nonterminals, int max_rule_symbols, int max_samples, bool require_tight_phrases); // For testing only. GrammarExtractor(shared_ptr vocabulary, shared_ptr rule_factory); // Converts the sentence to a vector of word ids and uses the RuleFactory to // extract the SCFG rules which may be used to decode the sentence. Grammar GetGrammar(const string& sentence); private: // Splits the sentence in a vector of words. vector TokenizeSentence(const string& sentence); // Maps the words to word ids. vector AnnotateWords(const vector& words); shared_ptr vocabulary; shared_ptr rule_factory; }; } // namespace extractor #endif