From 892b0820ce03268bf4c55bed8a8f697009f67f36 Mon Sep 17 00:00:00 2001
From: Paul Baltescu <pauldb89@gmail.com>
Date: Wed, 6 Mar 2013 15:42:29 +0000
Subject: Fixed 3 TODOs.

---
 extractor/fast_intersector.cc | 7 ++-----
 extractor/precomputation.cc   | 7 ++++---
 extractor/precomputation.h    | 3 ++-
 extractor/run_extractor.cc    | 1 -
 extractor/sampler.cc          | 3 +--
 5 files changed, 9 insertions(+), 12 deletions(-)

(limited to 'extractor')

diff --git a/extractor/fast_intersector.cc b/extractor/fast_intersector.cc
index cec3d30b..1b8c32b1 100644
--- a/extractor/fast_intersector.cc
+++ b/extractor/fast_intersector.cc
@@ -35,12 +35,9 @@ vector<int> FastIntersector::ConvertPhrase(const vector<int>& old_phrase) {
   vector<int> new_phrase;
   new_phrase.reserve(old_phrase.size());
   shared_ptr<DataArray> data_array = suffix_array->GetData();
-  int num_nonterminals = 0;
   for (int word_id: old_phrase) {
-    // TODO(pauldb): Remove overhead for relabelling the nonterminals here.
-    if (word_id == Precomputation::NON_TERMINAL) {
-      ++num_nonterminals;
-      new_phrase.push_back(vocabulary->GetNonterminalIndex(num_nonterminals));
+    if (word_id < 0) {
+      new_phrase.push_back(word_id);
     } else {
       new_phrase.push_back(
           vocabulary->GetTerminalIndex(data_array->GetWord(word_id)));
diff --git a/extractor/precomputation.cc b/extractor/precomputation.cc
index 8cc32ffd..e29018c2 100644
--- a/extractor/precomputation.cc
+++ b/extractor/precomputation.cc
@@ -10,7 +10,8 @@ using namespace std;
 
 namespace extractor {
 
-int Precomputation::NON_TERMINAL = -1;
+int Precomputation::FIRST_NONTERMINAL = -1;
+int Precomputation::SECOND_NONTERMINAL = -2;
 
 Precomputation::Precomputation(
     shared_ptr<SuffixArray> suffix_array, int num_frequent_patterns,
@@ -112,13 +113,13 @@ void Precomputation::AddCollocations(
           && size1 + size2 + 1 <= max_rule_symbols) {
         vector<int> pattern(data.begin() + start1,
             data.begin() + start1 + size1);
-        pattern.push_back(Precomputation::NON_TERMINAL);
+        pattern.push_back(Precomputation::FIRST_NONTERMINAL);
         pattern.insert(pattern.end(), data.begin() + start2,
             data.begin() + start2 + size2);
         AddStartPositions(collocations[pattern], start1, start2);
 
         if (is_super2) {
-          pattern.push_back(Precomputation::NON_TERMINAL);
+          pattern.push_back(Precomputation::SECOND_NONTERMINAL);
           for (size_t k = j + 1; k < matchings.size(); ++k) {
             int start3, size3, is_super3;
             tie(start3, size3, is_super3) = matchings[k];
diff --git a/extractor/precomputation.h b/extractor/precomputation.h
index dbd99c14..2c1eccf8 100644
--- a/extractor/precomputation.h
+++ b/extractor/precomputation.h
@@ -34,7 +34,8 @@ class Precomputation {
 
   virtual const Index& GetCollocations() const;
 
-  static int NON_TERMINAL;
+  static int FIRST_NONTERMINAL;
+  static int SECOND_NONTERMINAL;
 
  protected:
   Precomputation();
diff --git a/extractor/run_extractor.cc b/extractor/run_extractor.cc
index c701c8d0..0f91236d 100644
--- a/extractor/run_extractor.cc
+++ b/extractor/run_extractor.cc
@@ -35,7 +35,6 @@ using namespace extractor;
 using namespace features;
 
 int main(int argc, char** argv) {
-  // TODO(pauldb): Also take arguments from config file.
   po::options_description desc("Command line options");
   desc.add_options()
     ("help,h", "Show available options")
diff --git a/extractor/sampler.cc b/extractor/sampler.cc
index d128913f..f64a408c 100644
--- a/extractor/sampler.cc
+++ b/extractor/sampler.cc
@@ -38,8 +38,7 @@ PhraseLocation Sampler::Sample(const PhraseLocation& location) const {
 }
 
 int Sampler::Round(double x) const {
-  // TODO(pauldb): Remove EPS.
-  return x + 0.5 + 1e-8;
+  return x + 0.5;
 }
 
 } // namespace extractor
-- 
cgit v1.2.3