Second working commit.

author: Paul Baltescu <pauldb89@gmail.com> 2013-02-01 16:11:10 +0000
committer: Paul Baltescu <pauldb89@gmail.com> 2013-02-01 16:11:10 +0000
commit: 0a53f7eca74c165b5ce1c238f1999ddf1febea55 (patch)
tree: 5a5231767bc2f92203711ab4aee75336b8bc2175 /extractor/grammar_extractor.cc
parent: 5530575ae0ad939e17f08d6bd49978acea388ab7 (diff)
1 files changed, 13 insertions, 7 deletions
diff --git a/extractor/grammar_extractor.cc b/extractor/grammar_extractor.cc
index 3014c2e9..15268165 100644
--- a/extractor/grammar_extractor.cc
+++ b/extractor/grammar_extractor.cc
@@ -4,6 +4,10 @@
 #include <sstream>
 #include <vector>
 
+#include "grammar.h"
+#include "rule.h"
+#include "vocabulary.h"
+
 using namespace std;
 
 vector<string> Tokenize(const string& sentence) {
@@ -22,18 +26,20 @@ vector<string> Tokenize(const string& sentence) {
 GrammarExtractor::GrammarExtractor(
     shared_ptr<SuffixArray> source_suffix_array,
     shared_ptr<DataArray> target_data_array,
-    const Alignment& alignment, const Precomputation& precomputation,
-    int min_gap_size, int max_rule_span, int max_nonterminals,
-    int max_rule_symbols, bool use_baeza_yates) :
+    shared_ptr<Alignment> alignment, shared_ptr<Precomputation> precomputation,
+    shared_ptr<Scorer> scorer, int min_gap_size, int max_rule_span,
+    int max_nonterminals, int max_rule_symbols, int max_samples,
+    bool use_baeza_yates, bool require_tight_phrases) :
     vocabulary(make_shared<Vocabulary>()),
     rule_factory(source_suffix_array, target_data_array, alignment,
-        vocabulary, precomputation, min_gap_size, max_rule_span,
-        max_nonterminals, max_rule_symbols, use_baeza_yates) {}
+        vocabulary, precomputation, scorer, min_gap_size, max_rule_span,
+        max_nonterminals, max_rule_symbols, max_samples, use_baeza_yates,
+        require_tight_phrases) {}
 
-void GrammarExtractor::GetGrammar(const string& sentence) {
+Grammar GrammarExtractor::GetGrammar(const string& sentence) {
   vector<string> words = Tokenize(sentence);
   vector<int> word_ids = AnnotateWords(words);
-  rule_factory.GetGrammar(word_ids);
+  return rule_factory.GetGrammar(word_ids);
 }
 
 vector<int> GrammarExtractor::AnnotateWords(const vector<string>& words) {
author	Paul Baltescu <pauldb89@gmail.com>	2013-02-01 16:11:10 +0000
committer	Paul Baltescu <pauldb89@gmail.com>	2013-02-01 16:11:10 +0000
commit	0a53f7eca74c165b5ce1c238f1999ddf1febea55 (patch)
tree	5a5231767bc2f92203711ab4aee75336b8bc2175 /extractor/grammar_extractor.cc
parent	5530575ae0ad939e17f08d6bd49978acea388ab7 (diff)