diff options
Diffstat (limited to 'extractor/sampler.cc')
-rw-r--r-- | extractor/sampler.cc | 12 |
1 files changed, 8 insertions, 4 deletions
diff --git a/extractor/sampler.cc b/extractor/sampler.cc index 963afa7a..fc386ed1 100644 --- a/extractor/sampler.cc +++ b/extractor/sampler.cc @@ -12,7 +12,9 @@ Sampler::Sampler() {} Sampler::~Sampler() {} -PhraseLocation Sampler::Sample(const PhraseLocation& location, const unordered_set<int>& blacklisted_sentence_ids, const shared_ptr<DataArray> source_data_array) const { +PhraseLocation Sampler::Sample( + const PhraseLocation& location, + const unordered_set<int>& blacklisted_sentence_ids) const { vector<int> sample; int num_subpatterns; if (location.matchings == NULL) { @@ -22,10 +24,11 @@ PhraseLocation Sampler::Sample(const PhraseLocation& location, const unordered_s double step = max(1.0, (double) (high - low) / max_samples); double i = low, last = i; bool found; + shared_ptr<DataArray> source_data_array = suffix_array->GetData(); while (sample.size() < max_samples && i < high) { int x = suffix_array->GetSuffix(Round(i)); int id = source_data_array->GetSentenceId(x); - if (find(blacklisted_sentence_ids.begin(), blacklisted_sentence_ids.end(), id) != blacklisted_sentence_ids.end()) { + if (blacklisted_sentence_ids.count(id)) { found = false; double backoff_step = 1; while (true) { @@ -33,13 +36,14 @@ PhraseLocation Sampler::Sample(const PhraseLocation& location, const unordered_s double j = i - backoff_step; x = suffix_array->GetSuffix(Round(j)); id = source_data_array->GetSentenceId(x); - if (x >= 0 && j > last && find(blacklisted_sentence_ids.begin(), blacklisted_sentence_ids.end(), id) == blacklisted_sentence_ids.end()) { + if (x >= 0 && j > last && !blacklisted_sentence_ids.count(id)) { found = true; last = i; break; } double k = i + backoff_step; x = suffix_array->GetSuffix(Round(k)); id = source_data_array->GetSentenceId(x); - if (k < min(i+step, (double)high) && find(blacklisted_sentence_ids.begin(), blacklisted_sentence_ids.end(), id) == blacklisted_sentence_ids.end()) { + if (k < min(i+step, (double)high) && + !blacklisted_sentence_ids.count(id)) { found = true; last = k; break; } if (j <= last && k >= high) break; |