summaryrefslogtreecommitdiff
path: root/extractor/sampler.cc
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2013-11-13 18:00:10 +0100
committerPatrick Simianer <p@simianer.de>2013-11-13 18:00:10 +0100
commit2d2d5eced93d58bc77894d8c328195cd9950b96d (patch)
tree41892c0943a0049611ea6035e181cb9cbd1c7d19 /extractor/sampler.cc
parent2d025c839e474045d81b7490adc8842ad427c4e1 (diff)
unit tests for extractor loo sampling
Diffstat (limited to 'extractor/sampler.cc')
-rw-r--r--extractor/sampler.cc18
1 files changed, 9 insertions, 9 deletions
diff --git a/extractor/sampler.cc b/extractor/sampler.cc
index cb470962..d332dd90 100644
--- a/extractor/sampler.cc
+++ b/extractor/sampler.cc
@@ -19,25 +19,25 @@ PhraseLocation Sampler::Sample(const PhraseLocation& location, unordered_set<int
// Sample suffix array range.
num_subpatterns = 1;
int low = location.sa_low, high = location.sa_high;
- double step = Round(max(1.0, (double) (high - low) / max_samples));
- int i = low, last = i;
+ double step = max(1.0, (double) (high - low) / max_samples);
+ double i = low, last = i;
bool found;
while (sample.size() < max_samples && i < high) {
- int x = suffix_array->GetSuffix(i);
+ int x = suffix_array->GetSuffix(Round(i));
int id = source_data_array->GetSentenceId(x);
if (find(blacklisted_sentence_ids.begin(), blacklisted_sentence_ids.end(), id) != blacklisted_sentence_ids.end()) {
found = false;
- int backoff_step = 1;
+ double backoff_step = 1;
while (true) {
if ((double)backoff_step >= step) break;
- int j = i - backoff_step;
- x = suffix_array->GetSuffix(j);
+ double j = i - backoff_step;
+ x = suffix_array->GetSuffix(Round(j));
id = source_data_array->GetSentenceId(x);
- if (j > last && find(blacklisted_sentence_ids.begin(), blacklisted_sentence_ids.end(), id) == blacklisted_sentence_ids.end()) {
+ if (x >= 0 && j > last && find(blacklisted_sentence_ids.begin(), blacklisted_sentence_ids.end(), id) == blacklisted_sentence_ids.end()) {
found = true; last = i; break;
}
- int k = i + backoff_step;
- x = suffix_array->GetSuffix(k);
+ double k = i + backoff_step;
+ x = suffix_array->GetSuffix(Round(k));
id = source_data_array->GetSentenceId(x);
if (k < min(i+step, (double)high) && find(blacklisted_sentence_ids.begin(), blacklisted_sentence_ids.end(), id) == blacklisted_sentence_ids.end()) {
found = true; last = k; break;