summaryrefslogtreecommitdiff
path: root/extractor/grammar_extractor.cc
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-02-01 16:11:10 +0000
committerPaul Baltescu <pauldb89@gmail.com>2013-02-01 16:11:10 +0000
commit0a53f7eca74c165b5ce1c238f1999ddf1febea55 (patch)
tree5a5231767bc2f92203711ab4aee75336b8bc2175 /extractor/grammar_extractor.cc
parent5530575ae0ad939e17f08d6bd49978acea388ab7 (diff)
Second working commit.
Diffstat (limited to 'extractor/grammar_extractor.cc')
-rw-r--r--extractor/grammar_extractor.cc20
1 files changed, 13 insertions, 7 deletions
diff --git a/extractor/grammar_extractor.cc b/extractor/grammar_extractor.cc
index 3014c2e9..15268165 100644
--- a/extractor/grammar_extractor.cc
+++ b/extractor/grammar_extractor.cc
@@ -4,6 +4,10 @@
#include <sstream>
#include <vector>
+#include "grammar.h"
+#include "rule.h"
+#include "vocabulary.h"
+
using namespace std;
vector<string> Tokenize(const string& sentence) {
@@ -22,18 +26,20 @@ vector<string> Tokenize(const string& sentence) {
GrammarExtractor::GrammarExtractor(
shared_ptr<SuffixArray> source_suffix_array,
shared_ptr<DataArray> target_data_array,
- const Alignment& alignment, const Precomputation& precomputation,
- int min_gap_size, int max_rule_span, int max_nonterminals,
- int max_rule_symbols, bool use_baeza_yates) :
+ shared_ptr<Alignment> alignment, shared_ptr<Precomputation> precomputation,
+ shared_ptr<Scorer> scorer, int min_gap_size, int max_rule_span,
+ int max_nonterminals, int max_rule_symbols, int max_samples,
+ bool use_baeza_yates, bool require_tight_phrases) :
vocabulary(make_shared<Vocabulary>()),
rule_factory(source_suffix_array, target_data_array, alignment,
- vocabulary, precomputation, min_gap_size, max_rule_span,
- max_nonterminals, max_rule_symbols, use_baeza_yates) {}
+ vocabulary, precomputation, scorer, min_gap_size, max_rule_span,
+ max_nonterminals, max_rule_symbols, max_samples, use_baeza_yates,
+ require_tight_phrases) {}
-void GrammarExtractor::GetGrammar(const string& sentence) {
+Grammar GrammarExtractor::GetGrammar(const string& sentence) {
vector<string> words = Tokenize(sentence);
vector<int> word_ids = AnnotateWords(words);
- rule_factory.GetGrammar(word_ids);
+ return rule_factory.GetGrammar(word_ids);
}
vector<int> GrammarExtractor::AnnotateWords(const vector<string>& words) {