1 files changed, 26 insertions, 19 deletions
diff --git a/extractor/grammar_extractor.cc b/extractor/grammar_extractor.cc
index 15268165..2f008026 100644
--- a/extractor/grammar_extractor.cc
+++ b/extractor/grammar_extractor.cc
@@ -10,19 +10,6 @@
 
 using namespace std;
 
-vector<string> Tokenize(const string& sentence) {
-  vector<string> result;
-  result.push_back("<s>");
-
-  istringstream buffer(sentence);
-  copy(istream_iterator<string>(buffer),
-       istream_iterator<string>(),
-       back_inserter(result));
-
-  result.push_back("</s>");
-  return result;
-}
-
 GrammarExtractor::GrammarExtractor(
     shared_ptr<SuffixArray> source_suffix_array,
     shared_ptr<DataArray> target_data_array,
@@ -31,15 +18,35 @@ GrammarExtractor::GrammarExtractor(
     int max_nonterminals, int max_rule_symbols, int max_samples,
     bool use_baeza_yates, bool require_tight_phrases) :
     vocabulary(make_shared<Vocabulary>()),
-    rule_factory(source_suffix_array, target_data_array, alignment,
-        vocabulary, precomputation, scorer, min_gap_size, max_rule_span,
-        max_nonterminals, max_rule_symbols, max_samples, use_baeza_yates,
-        require_tight_phrases) {}
+    rule_factory(make_shared<HieroCachingRuleFactory>(
+        source_suffix_array, target_data_array, alignment, vocabulary,
+        precomputation, scorer, min_gap_size, max_rule_span, max_nonterminals,
+        max_rule_symbols, max_samples, use_baeza_yates,
+        require_tight_phrases)) {}
+
+GrammarExtractor::GrammarExtractor(
+    shared_ptr<Vocabulary> vocabulary,
+    shared_ptr<HieroCachingRuleFactory> rule_factory) :
+    vocabulary(vocabulary),
+    rule_factory(rule_factory) {}
 
 Grammar GrammarExtractor::GetGrammar(const string& sentence) {
-  vector<string> words = Tokenize(sentence);
+  vector<string> words = TokenizeSentence(sentence);
   vector<int> word_ids = AnnotateWords(words);
-  return rule_factory.GetGrammar(word_ids);
+  return rule_factory->GetGrammar(word_ids);
+}
+
+vector<string> GrammarExtractor::TokenizeSentence(const string& sentence) {
+  vector<string> result;
+  result.push_back("<s>");
+
+  istringstream buffer(sentence);
+  copy(istream_iterator<string>(buffer),
+       istream_iterator<string>(),
+       back_inserter(result));
+
+  result.push_back("</s>");
+  return result;
 }
 
 vector<int> GrammarExtractor::AnnotateWords(const vector<string>& words) {