diff options
author | Paul Baltescu <pauldb89@gmail.com> | 2013-11-28 01:39:14 +0000 |
---|---|---|
committer | Paul Baltescu <pauldb89@gmail.com> | 2013-11-28 01:49:10 +0000 |
commit | 9e742b90007c32e9f0cec8940c73bd50e33b8182 (patch) | |
tree | 4deb3815a4f47bfc8bebf0279622a37515632dee | |
parent | d389d25b78e5c99366f49cdcaf788693f3c01c40 (diff) |
Fixes.
-rw-r--r-- | extractor/backoff_sampler.cc | 26 | ||||
-rw-r--r-- | extractor/matchings_sampler.cc | 5 |
2 files changed, 16 insertions, 15 deletions
diff --git a/extractor/backoff_sampler.cc b/extractor/backoff_sampler.cc index 28b12909..891276c6 100644 --- a/extractor/backoff_sampler.cc +++ b/extractor/backoff_sampler.cc @@ -16,47 +16,47 @@ PhraseLocation BackoffSampler::Sample( const unordered_set<int>& blacklisted_sentence_ids) const { vector<int> samples; int low = GetRangeLow(location), high = GetRangeHigh(location); - int last_position = low - 1; + int last = low - 1; double step = max(1.0, (double) (high - low) / max_samples); for (double num_samples = 0, i = low; num_samples < max_samples && i < high; ++num_samples, i += step) { - int position = GetPosition(location, round(i)); + int sample = round(i); + int position = GetPosition(location, sample); int sentence_id = source_data_array->GetSentenceId(position); bool found = false; - if (last_position >= position || + if (last >= sample || blacklisted_sentence_ids.count(sentence_id)) { for (double backoff_step = 1; backoff_step < step; ++backoff_step) { double j = i - backoff_step; - if (round(j) >= 0) { - position = GetPosition(location, round(j)); + sample = round(j); + if (sample >= 0) { + position = GetPosition(location, sample); sentence_id = source_data_array->GetSentenceId(position); - if (position > last_position && - !blacklisted_sentence_ids.count(sentence_id)) { + if (sample > last && !blacklisted_sentence_ids.count(sentence_id)) { found = true; - last_position = position; break; } } double k = i + backoff_step; - if (round(k) < high) { - position = GetPosition(location, round(k)); + sample = round(k); + if (sample < high) { + position = GetPosition(location, sample); sentence_id = source_data_array->GetSentenceId(position); if (!blacklisted_sentence_ids.count(sentence_id)) { found = true; - last_position = position; break; } } } } else { found = true; - last_position = position; } if (found) { - AppendMatching(samples, position, location); + last = sample; + AppendMatching(samples, sample, location); } } diff --git a/extractor/matchings_sampler.cc b/extractor/matchings_sampler.cc index bb916e49..75a62366 100644 --- a/extractor/matchings_sampler.cc +++ b/extractor/matchings_sampler.cc @@ -30,8 +30,9 @@ int MatchingsSampler::GetPosition(const PhraseLocation& location, void MatchingsSampler::AppendMatching(vector<int>& samples, int index, const PhraseLocation& location) const { - copy(location.matchings->begin() + index, - location.matchings->begin() + index + location.num_subpatterns, + int start = index * location.num_subpatterns; + copy(location.matchings->begin() + start, + location.matchings->begin() + start + location.num_subpatterns, back_inserter(samples)); } |