diff options
author | Chris Dyer <redpony@gmail.com> | 2013-11-13 11:22:24 -0800 |
---|---|---|
committer | Chris Dyer <redpony@gmail.com> | 2013-11-13 11:22:24 -0800 |
commit | 9be8d89b5a4065a81b26d8af1f3443d152e7922a (patch) | |
tree | 237090ff519a0419c3ba379ec3a6884f05caa6c2 /extractor/rule_factory.cc | |
parent | 8a24bb77bc2e9fd17a6f6529a2942cde96a6af49 (diff) | |
parent | 4a9449a564e626fe004200b730bfaa44d6152e0f (diff) |
Merge pull request #27 from pks/master
Tidying (soft) syntax features; loo for C++ extractor; updates for dtrain
Diffstat (limited to 'extractor/rule_factory.cc')
-rw-r--r-- | extractor/rule_factory.cc | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/extractor/rule_factory.cc b/extractor/rule_factory.cc index 8c30fb9e..e52019ae 100644 --- a/extractor/rule_factory.cc +++ b/extractor/rule_factory.cc @@ -17,6 +17,7 @@ #include "suffix_array.h" #include "time_util.h" #include "vocabulary.h" +#include "data_array.h" using namespace std; using namespace chrono; @@ -100,7 +101,7 @@ HieroCachingRuleFactory::HieroCachingRuleFactory() {} HieroCachingRuleFactory::~HieroCachingRuleFactory() {} -Grammar HieroCachingRuleFactory::GetGrammar(const vector<int>& word_ids) { +Grammar HieroCachingRuleFactory::GetGrammar(const vector<int>& word_ids, const unordered_set<int> blacklisted_sentence_ids, const shared_ptr<DataArray> source_data_array) { Clock::time_point start_time = Clock::now(); double total_extract_time = 0; double total_intersect_time = 0; @@ -192,7 +193,7 @@ Grammar HieroCachingRuleFactory::GetGrammar(const vector<int>& word_ids) { Clock::time_point extract_start = Clock::now(); if (!state.starts_with_x) { // Extract rules for the sampled set of occurrences. - PhraseLocation sample = sampler->Sample(next_node->matchings); + PhraseLocation sample = sampler->Sample(next_node->matchings, blacklisted_sentence_ids, source_data_array); vector<Rule> new_rules = rule_extractor->ExtractRules(next_phrase, sample); rules.insert(rules.end(), new_rules.begin(), new_rules.end()); |