From 0a53f7eca74c165b5ce1c238f1999ddf1febea55 Mon Sep 17 00:00:00 2001 From: Paul Baltescu Date: Fri, 1 Feb 2013 16:11:10 +0000 Subject: Second working commit. --- extractor/grammar_extractor.cc | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'extractor/grammar_extractor.cc') diff --git a/extractor/grammar_extractor.cc b/extractor/grammar_extractor.cc index 3014c2e9..15268165 100644 --- a/extractor/grammar_extractor.cc +++ b/extractor/grammar_extractor.cc @@ -4,6 +4,10 @@ #include #include +#include "grammar.h" +#include "rule.h" +#include "vocabulary.h" + using namespace std; vector Tokenize(const string& sentence) { @@ -22,18 +26,20 @@ vector Tokenize(const string& sentence) { GrammarExtractor::GrammarExtractor( shared_ptr source_suffix_array, shared_ptr target_data_array, - const Alignment& alignment, const Precomputation& precomputation, - int min_gap_size, int max_rule_span, int max_nonterminals, - int max_rule_symbols, bool use_baeza_yates) : + shared_ptr alignment, shared_ptr precomputation, + shared_ptr scorer, int min_gap_size, int max_rule_span, + int max_nonterminals, int max_rule_symbols, int max_samples, + bool use_baeza_yates, bool require_tight_phrases) : vocabulary(make_shared()), rule_factory(source_suffix_array, target_data_array, alignment, - vocabulary, precomputation, min_gap_size, max_rule_span, - max_nonterminals, max_rule_symbols, use_baeza_yates) {} + vocabulary, precomputation, scorer, min_gap_size, max_rule_span, + max_nonterminals, max_rule_symbols, max_samples, use_baeza_yates, + require_tight_phrases) {} -void GrammarExtractor::GetGrammar(const string& sentence) { +Grammar GrammarExtractor::GetGrammar(const string& sentence) { vector words = Tokenize(sentence); vector word_ids = AnnotateWords(words); - rule_factory.GetGrammar(word_ids); + return rule_factory.GetGrammar(word_ids); } vector GrammarExtractor::AnnotateWords(const vector& words) { -- cgit v1.2.3