diff options
author | Patrick Simianer <p@simianer.de> | 2013-12-04 20:13:07 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2013-12-04 20:13:07 +0100 |
commit | 9ff43d7c8e076aaa8790bacbd4b2cfe636a55a97 (patch) | |
tree | e1e0265b18ffc854f24209cb36b2c836100f099b /extractor/grammar_extractor.cc | |
parent | e59cdac5253df7ab57296d347245d1a8f4d8b287 (diff) | |
parent | 407b100cd3e4ae987504b53101151fba287ad999 (diff) |
fix merge conflict
Diffstat (limited to 'extractor/grammar_extractor.cc')
-rw-r--r-- | extractor/grammar_extractor.cc | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/extractor/grammar_extractor.cc b/extractor/grammar_extractor.cc index 487abcaf..1dc94c25 100644 --- a/extractor/grammar_extractor.cc +++ b/extractor/grammar_extractor.cc @@ -19,10 +19,11 @@ GrammarExtractor::GrammarExtractor( shared_ptr<SuffixArray> source_suffix_array, shared_ptr<DataArray> target_data_array, shared_ptr<Alignment> alignment, shared_ptr<Precomputation> precomputation, - shared_ptr<Scorer> scorer, int min_gap_size, int max_rule_span, + shared_ptr<Scorer> scorer, shared_ptr<Vocabulary> vocabulary, + int min_gap_size, int max_rule_span, int max_nonterminals, int max_rule_symbols, int max_samples, bool require_tight_phrases) : - vocabulary(make_shared<Vocabulary>()), + vocabulary(vocabulary), rule_factory(make_shared<HieroCachingRuleFactory>( source_suffix_array, target_data_array, alignment, vocabulary, precomputation, scorer, min_gap_size, max_rule_span, max_nonterminals, @@ -34,10 +35,12 @@ GrammarExtractor::GrammarExtractor( vocabulary(vocabulary), rule_factory(rule_factory) {} -Grammar GrammarExtractor::GetGrammar(const string& sentence, const unordered_set<int>& blacklisted_sentence_ids, const shared_ptr<DataArray> source_data_array) { +Grammar GrammarExtractor::GetGrammar( + const string& sentence, + const unordered_set<int>& blacklisted_sentence_ids) { vector<string> words = TokenizeSentence(sentence); vector<int> word_ids = AnnotateWords(words); - return rule_factory->GetGrammar(word_ids, blacklisted_sentence_ids, source_data_array); + return rule_factory->GetGrammar(word_ids, blacklisted_sentence_ids); } vector<string> GrammarExtractor::TokenizeSentence(const string& sentence) { |