diff options
-rw-r--r-- | extractor/fast_intersector.cc | 7 | ||||
-rw-r--r-- | extractor/precomputation.cc | 7 | ||||
-rw-r--r-- | extractor/precomputation.h | 3 | ||||
-rw-r--r-- | extractor/run_extractor.cc | 1 | ||||
-rw-r--r-- | extractor/sampler.cc | 3 |
5 files changed, 9 insertions, 12 deletions
diff --git a/extractor/fast_intersector.cc b/extractor/fast_intersector.cc index cec3d30b..1b8c32b1 100644 --- a/extractor/fast_intersector.cc +++ b/extractor/fast_intersector.cc @@ -35,12 +35,9 @@ vector<int> FastIntersector::ConvertPhrase(const vector<int>& old_phrase) { vector<int> new_phrase; new_phrase.reserve(old_phrase.size()); shared_ptr<DataArray> data_array = suffix_array->GetData(); - int num_nonterminals = 0; for (int word_id: old_phrase) { - // TODO(pauldb): Remove overhead for relabelling the nonterminals here. - if (word_id == Precomputation::NON_TERMINAL) { - ++num_nonterminals; - new_phrase.push_back(vocabulary->GetNonterminalIndex(num_nonterminals)); + if (word_id < 0) { + new_phrase.push_back(word_id); } else { new_phrase.push_back( vocabulary->GetTerminalIndex(data_array->GetWord(word_id))); diff --git a/extractor/precomputation.cc b/extractor/precomputation.cc index 8cc32ffd..e29018c2 100644 --- a/extractor/precomputation.cc +++ b/extractor/precomputation.cc @@ -10,7 +10,8 @@ using namespace std; namespace extractor { -int Precomputation::NON_TERMINAL = -1; +int Precomputation::FIRST_NONTERMINAL = -1; +int Precomputation::SECOND_NONTERMINAL = -2; Precomputation::Precomputation( shared_ptr<SuffixArray> suffix_array, int num_frequent_patterns, @@ -112,13 +113,13 @@ void Precomputation::AddCollocations( && size1 + size2 + 1 <= max_rule_symbols) { vector<int> pattern(data.begin() + start1, data.begin() + start1 + size1); - pattern.push_back(Precomputation::NON_TERMINAL); + pattern.push_back(Precomputation::FIRST_NONTERMINAL); pattern.insert(pattern.end(), data.begin() + start2, data.begin() + start2 + size2); AddStartPositions(collocations[pattern], start1, start2); if (is_super2) { - pattern.push_back(Precomputation::NON_TERMINAL); + pattern.push_back(Precomputation::SECOND_NONTERMINAL); for (size_t k = j + 1; k < matchings.size(); ++k) { int start3, size3, is_super3; tie(start3, size3, is_super3) = matchings[k]; diff --git a/extractor/precomputation.h b/extractor/precomputation.h index dbd99c14..2c1eccf8 100644 --- a/extractor/precomputation.h +++ b/extractor/precomputation.h @@ -34,7 +34,8 @@ class Precomputation { virtual const Index& GetCollocations() const; - static int NON_TERMINAL; + static int FIRST_NONTERMINAL; + static int SECOND_NONTERMINAL; protected: Precomputation(); diff --git a/extractor/run_extractor.cc b/extractor/run_extractor.cc index c701c8d0..0f91236d 100644 --- a/extractor/run_extractor.cc +++ b/extractor/run_extractor.cc @@ -35,7 +35,6 @@ using namespace extractor; using namespace features; int main(int argc, char** argv) { - // TODO(pauldb): Also take arguments from config file. po::options_description desc("Command line options"); desc.add_options() ("help,h", "Show available options") diff --git a/extractor/sampler.cc b/extractor/sampler.cc index d128913f..f64a408c 100644 --- a/extractor/sampler.cc +++ b/extractor/sampler.cc @@ -38,8 +38,7 @@ PhraseLocation Sampler::Sample(const PhraseLocation& location) const { } int Sampler::Round(double x) const { - // TODO(pauldb): Remove EPS. - return x + 0.5 + 1e-8; + return x + 0.5; } } // namespace extractor |