summaryrefslogtreecommitdiff
path: root/extractor/grammar_extractor.cc
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-11-23 17:33:47 +0000
committerPaul Baltescu <pauldb89@gmail.com>2013-11-23 17:33:47 +0000
commitcc6313b23cac25eb05976b6cf64f96faf1ed4163 (patch)
tree3dc28060ad25b43773e875bea7388ab1cefcd927 /extractor/grammar_extractor.cc
parent7990c750829af93f0a1e0fc14534582f52ee9e8c (diff)
parentf2fb69b10a897e8beb4e6e6d6cbb4327096235ef (diff)
Merge branch 'master' of https://github.com/redpony/cdec
Diffstat (limited to 'extractor/grammar_extractor.cc')
-rw-r--r--extractor/grammar_extractor.cc6
1 files changed, 4 insertions, 2 deletions
diff --git a/extractor/grammar_extractor.cc b/extractor/grammar_extractor.cc
index 8050ce7b..487abcaf 100644
--- a/extractor/grammar_extractor.cc
+++ b/extractor/grammar_extractor.cc
@@ -3,11 +3,13 @@
#include <iterator>
#include <sstream>
#include <vector>
+#include <unordered_set>
#include "grammar.h"
#include "rule.h"
#include "rule_factory.h"
#include "vocabulary.h"
+#include "data_array.h"
using namespace std;
@@ -32,10 +34,10 @@ GrammarExtractor::GrammarExtractor(
vocabulary(vocabulary),
rule_factory(rule_factory) {}
-Grammar GrammarExtractor::GetGrammar(const string& sentence) {
+Grammar GrammarExtractor::GetGrammar(const string& sentence, const unordered_set<int>& blacklisted_sentence_ids, const shared_ptr<DataArray> source_data_array) {
vector<string> words = TokenizeSentence(sentence);
vector<int> word_ids = AnnotateWords(words);
- return rule_factory->GetGrammar(word_ids);
+ return rule_factory->GetGrammar(word_ids, blacklisted_sentence_ids, source_data_array);
}
vector<string> GrammarExtractor::TokenizeSentence(const string& sentence) {