diff options
Diffstat (limited to 'extractor/grammar_extractor.cc')
-rw-r--r-- | extractor/grammar_extractor.cc | 20 |
1 files changed, 13 insertions, 7 deletions
diff --git a/extractor/grammar_extractor.cc b/extractor/grammar_extractor.cc index 3014c2e9..15268165 100644 --- a/extractor/grammar_extractor.cc +++ b/extractor/grammar_extractor.cc @@ -4,6 +4,10 @@ #include <sstream> #include <vector> +#include "grammar.h" +#include "rule.h" +#include "vocabulary.h" + using namespace std; vector<string> Tokenize(const string& sentence) { @@ -22,18 +26,20 @@ vector<string> Tokenize(const string& sentence) { GrammarExtractor::GrammarExtractor( shared_ptr<SuffixArray> source_suffix_array, shared_ptr<DataArray> target_data_array, - const Alignment& alignment, const Precomputation& precomputation, - int min_gap_size, int max_rule_span, int max_nonterminals, - int max_rule_symbols, bool use_baeza_yates) : + shared_ptr<Alignment> alignment, shared_ptr<Precomputation> precomputation, + shared_ptr<Scorer> scorer, int min_gap_size, int max_rule_span, + int max_nonterminals, int max_rule_symbols, int max_samples, + bool use_baeza_yates, bool require_tight_phrases) : vocabulary(make_shared<Vocabulary>()), rule_factory(source_suffix_array, target_data_array, alignment, - vocabulary, precomputation, min_gap_size, max_rule_span, - max_nonterminals, max_rule_symbols, use_baeza_yates) {} + vocabulary, precomputation, scorer, min_gap_size, max_rule_span, + max_nonterminals, max_rule_symbols, max_samples, use_baeza_yates, + require_tight_phrases) {} -void GrammarExtractor::GetGrammar(const string& sentence) { +Grammar GrammarExtractor::GetGrammar(const string& sentence) { vector<string> words = Tokenize(sentence); vector<int> word_ids = AnnotateWords(words); - rule_factory.GetGrammar(word_ids); + return rule_factory.GetGrammar(word_ids); } vector<int> GrammarExtractor::AnnotateWords(const vector<string>& words) { |