#include "grammar_extractor.h" #include #include #include using namespace std; vector Tokenize(const string& sentence) { vector result; result.push_back(""); istringstream buffer(sentence); copy(istream_iterator(buffer), istream_iterator(), back_inserter(result)); result.push_back(""); return result; } GrammarExtractor::GrammarExtractor( shared_ptr source_suffix_array, shared_ptr target_data_array, const Alignment& alignment, const Precomputation& precomputation, int min_gap_size, int max_rule_span, int max_nonterminals, int max_rule_symbols, bool use_baeza_yates) : vocabulary(make_shared()), rule_factory(source_suffix_array, target_data_array, alignment, vocabulary, precomputation, min_gap_size, max_rule_span, max_nonterminals, max_rule_symbols, use_baeza_yates) {} void GrammarExtractor::GetGrammar(const string& sentence) { vector words = Tokenize(sentence); vector word_ids = AnnotateWords(words); rule_factory.GetGrammar(word_ids); } vector GrammarExtractor::AnnotateWords(const vector& words) { vector result; for (string word: words) { result.push_back(vocabulary->GetTerminalIndex(word)); } return result; }