diff options
author | Paul Baltescu <pauldb89@gmail.com> | 2013-02-01 16:11:10 +0000 |
---|---|---|
committer | Paul Baltescu <pauldb89@gmail.com> | 2013-02-01 16:11:10 +0000 |
commit | 0a53f7eca74c165b5ce1c238f1999ddf1febea55 (patch) | |
tree | 5a5231767bc2f92203711ab4aee75336b8bc2175 /extractor/grammar_extractor.cc | |
parent | 5530575ae0ad939e17f08d6bd49978acea388ab7 (diff) |
Second working commit.
Diffstat (limited to 'extractor/grammar_extractor.cc')
-rw-r--r-- | extractor/grammar_extractor.cc | 20 |
1 files changed, 13 insertions, 7 deletions
diff --git a/extractor/grammar_extractor.cc b/extractor/grammar_extractor.cc index 3014c2e9..15268165 100644 --- a/extractor/grammar_extractor.cc +++ b/extractor/grammar_extractor.cc @@ -4,6 +4,10 @@ #include <sstream> #include <vector> +#include "grammar.h" +#include "rule.h" +#include "vocabulary.h" + using namespace std; vector<string> Tokenize(const string& sentence) { @@ -22,18 +26,20 @@ vector<string> Tokenize(const string& sentence) { GrammarExtractor::GrammarExtractor( shared_ptr<SuffixArray> source_suffix_array, shared_ptr<DataArray> target_data_array, - const Alignment& alignment, const Precomputation& precomputation, - int min_gap_size, int max_rule_span, int max_nonterminals, - int max_rule_symbols, bool use_baeza_yates) : + shared_ptr<Alignment> alignment, shared_ptr<Precomputation> precomputation, + shared_ptr<Scorer> scorer, int min_gap_size, int max_rule_span, + int max_nonterminals, int max_rule_symbols, int max_samples, + bool use_baeza_yates, bool require_tight_phrases) : vocabulary(make_shared<Vocabulary>()), rule_factory(source_suffix_array, target_data_array, alignment, - vocabulary, precomputation, min_gap_size, max_rule_span, - max_nonterminals, max_rule_symbols, use_baeza_yates) {} + vocabulary, precomputation, scorer, min_gap_size, max_rule_span, + max_nonterminals, max_rule_symbols, max_samples, use_baeza_yates, + require_tight_phrases) {} -void GrammarExtractor::GetGrammar(const string& sentence) { +Grammar GrammarExtractor::GetGrammar(const string& sentence) { vector<string> words = Tokenize(sentence); vector<int> word_ids = AnnotateWords(words); - rule_factory.GetGrammar(word_ids); + return rule_factory.GetGrammar(word_ids); } vector<int> GrammarExtractor::AnnotateWords(const vector<string>& words) { |