Remove original version of the intersector.

author: Paul Baltescu <pauldb89@gmail.com> 2013-02-22 11:59:17 +0000
committer: Paul Baltescu <pauldb89@gmail.com> 2013-02-22 11:59:17 +0000
commit: d69c289e172562039bcbe987657280332ab6315e (patch)
tree: cc67d97bf802101d981cfacadb1cea80472aa107 /extractor
parent: a21835881984ba90f303c7b1681d295678cb7131 (diff)
32 files changed, 17 insertions, 1855 deletions
diff --git a/extractor/Makefile.am b/extractor/Makefile.am
index df796cea..721df18b 100644
--- a/extractor/Makefile.am
+++ b/extractor/Makefile.am
@@ -1,7 +1,6 @@
 bin_PROGRAMS = compile run_extractor
 
 EXTRA_PROGRAMS = alignment_test \
-    binary_search_merger_test \
     data_array_test \
     fast_intersector_test \
     feature_count_source_target_test \
@@ -12,10 +11,6 @@ EXTRA_PROGRAMS = alignment_test \
     feature_sample_source_count_test \
     feature_target_given_source_coherent_test \
     grammar_extractor_test \
-    intersector_test \
-    linear_merger_test \
-    matching_comparator_test \
-    matching_test \
     matchings_finder_test \
     phrase_test \
     precomputation_test \
@@ -26,12 +21,10 @@ EXTRA_PROGRAMS = alignment_test \
     scorer_test \
     suffix_array_test \
     target_phrase_extractor_test \
-    translation_table_test \
-    veb_test
+    translation_table_test
 
 if HAVE_GTEST
   RUNNABLE_TESTS = alignment_test \
-    binary_search_merger_test \
     data_array_test \
     fast_intersector_test \
     feature_count_source_target_test \
@@ -42,10 +35,6 @@ if HAVE_GTEST
     feature_sample_source_count_test \
     feature_target_given_source_coherent_test \
     grammar_extractor_test \
-    intersector_test \
-    linear_merger_test \
-    matching_comparator_test \
-    matching_test \
     matchings_finder_test \
     phrase_test \
     precomputation_test \
@@ -56,8 +45,7 @@ if HAVE_GTEST
     scorer_test \
     suffix_array_test \
     target_phrase_extractor_test \
-    translation_table_test \
-    veb_test
+    translation_table_test
 endif
 
 noinst_PROGRAMS = $(RUNNABLE_TESTS)
@@ -66,8 +54,6 @@ TESTS = $(RUNNABLE_TESTS)
 
 alignment_test_SOURCES = alignment_test.cc
 alignment_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libextractor.a
-binary_search_merger_test_SOURCES = binary_search_merger_test.cc
-binary_search_merger_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(GMOCK_LDFLAGS) $(GMOCK_LIBS) libextractor.a
 data_array_test_SOURCES = data_array_test.cc
 data_array_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libextractor.a
 fast_intersector_test_SOURCES = fast_intersector_test.cc
@@ -88,14 +74,6 @@ feature_target_given_source_coherent_test_SOURCES = features/target_given_source
 feature_target_given_source_coherent_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libextractor.a
 grammar_extractor_test_SOURCES = grammar_extractor_test.cc
 grammar_extractor_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(GMOCK_LDFLAGS) $(GMOCK_LIBS) libextractor.a
-intersector_test_SOURCES = intersector_test.cc
-intersector_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(GMOCK_LDFLAGS) $(GMOCK_LIBS) libextractor.a
-linear_merger_test_SOURCES = linear_merger_test.cc
-linear_merger_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(GMOCK_LDFLAGS) $(GMOCK_LIBS) libextractor.a
-matching_comparator_test_SOURCES = matching_comparator_test.cc
-matching_comparator_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libextractor.a
-matching_test_SOURCES = matching_test.cc
-matching_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libextractor.a
 matchings_finder_test_SOURCES = matchings_finder_test.cc
 matchings_finder_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(GMOCK_LDFLAGS) $(GMOCK_LIBS) libextractor.a
 phrase_test_SOURCES = phrase_test.cc
@@ -118,8 +96,6 @@ target_phrase_extractor_test_SOURCES = target_phrase_extractor_test.cc
 target_phrase_extractor_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(GMOCK_LDFLAGS) $(GMOCK_LIBS) libextractor.a
 translation_table_test_SOURCES = translation_table_test.cc
 translation_table_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) $(GMOCK_LDFLAGS) $(GMOCK_LIBS) libextractor.a
-veb_test_SOURCES = veb_test.cc
-veb_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libextractor.a
 
 noinst_LIBRARIES = libextractor.a libcompile.a
 
@@ -139,7 +115,6 @@ libcompile_a_SOURCES = \
 
 libextractor_a_SOURCES = \
   alignment.cc \
-  binary_search_merger.cc \
   data_array.cc \
   fast_intersector.cc \
   features/count_source_target.cc \
@@ -152,11 +127,7 @@ libextractor_a_SOURCES = \
   features/target_given_source_coherent.cc \
   grammar.cc \
   grammar_extractor.cc \
-  matching.cc \
-  matching_comparator.cc \
   matchings_finder.cc \
-  intersector.cc \
-  linear_merger.cc \
   matchings_trie.cc \
   phrase.cc \
   phrase_builder.cc \
@@ -172,9 +143,6 @@ libextractor_a_SOURCES = \
   target_phrase_extractor.cc \
   time_util.cc \
   translation_table.cc \
-  veb.cc \
-  veb_bitset.cc \
-  veb_tree.cc \
   vocabulary.cc
 
 AM_CPPFLAGS = -W -Wall -Wno-sign-compare -std=c++0x $(GTEST_CPPFLAGS) $(GMOCK_CPPFLAGS)
diff --git a/extractor/binary_search_merger.cc b/extractor/binary_search_merger.cc
deleted file mode 100644
index c1b86a77..00000000
--- a/extractor/binary_search_merger.cc
+++ /dev/null
@@ -1,251 +0,0 @@
-#include "binary_search_merger.h"
-
-#include "data_array.h"
-#include "linear_merger.h"
-#include "matching.h"
-#include "matching_comparator.h"
-#include "phrase.h"
-#include "vocabulary.h"
-
-double BinarySearchMerger::BAEZA_YATES_FACTOR = 1.0;
-
-BinarySearchMerger::BinarySearchMerger(
-    shared_ptr<Vocabulary> vocabulary,
-    shared_ptr<LinearMerger> linear_merger,
-    shared_ptr<DataArray> data_array,
-    shared_ptr<MatchingComparator> comparator,
-    bool force_binary_search_merge) :
-    vocabulary(vocabulary), linear_merger(linear_merger),
-    data_array(data_array), comparator(comparator),
-    force_binary_search_merge(force_binary_search_merge) {}
-
-BinarySearchMerger::BinarySearchMerger() {}
-
-BinarySearchMerger::~BinarySearchMerger() {}
-
-void BinarySearchMerger::Merge(
-    vector<int>& locations, const Phrase& phrase, const Phrase& suffix,
-    const vector<int>::iterator& prefix_start,
-    const vector<int>::iterator& prefix_end,
-    const vector<int>::iterator& suffix_start,
-    const vector<int>::iterator& suffix_end,
-    int prefix_subpatterns, int suffix_subpatterns) const {
-  if (IsIntersectionVoid(prefix_start, prefix_end, suffix_start, suffix_end,
-      prefix_subpatterns, suffix_subpatterns, suffix)) {
-    return;
-  }
-
-  int prefix_set_size = prefix_end - prefix_start;
-  int suffix_set_size = suffix_end - suffix_start;
-  if (ShouldUseLinearMerge(prefix_set_size, suffix_set_size)) {
-    linear_merger->Merge(locations, phrase, suffix, prefix_start, prefix_end,
-          suffix_start, suffix_end, prefix_subpatterns, suffix_subpatterns);
-    return;
-  }
-
-  vector<int>::iterator low, high, prefix_low, prefix_high, suffix_mid;
-  if (prefix_set_size > suffix_set_size) {
-    // Binary search on the prefix set.
-    suffix_mid = GetMiddle(suffix_start, suffix_end, suffix_subpatterns);
-    low = prefix_start, high = prefix_end;
-    while (low < high) {
-      vector<int>::iterator prefix_mid =
-          GetMiddle(low, high, prefix_subpatterns);
-
-      GetComparableMatchings(prefix_start, prefix_end, prefix_mid,
-          prefix_subpatterns, prefix_low, prefix_high);
-      int comparison = CompareMatchingsSet(prefix_low, prefix_high, suffix_mid,
-          prefix_subpatterns, suffix_subpatterns, suffix);
-      if (comparison == 0) {
-        break;
-      } else if (comparison < 0) {
-        low = prefix_mid + prefix_subpatterns;
-      } else {
-        high = prefix_mid;
-      }
-    }
-  } else {
-    // Binary search on the suffix set.
-    vector<int>::iterator prefix_mid =
-        GetMiddle(prefix_start, prefix_end, prefix_subpatterns);
-
-    GetComparableMatchings(prefix_start, prefix_end, prefix_mid,
-        prefix_subpatterns, prefix_low, prefix_high);
-    low = suffix_start, high = suffix_end;
-    while (low < high) {
-      suffix_mid = GetMiddle(low, high, suffix_subpatterns);
-
-      int comparison = CompareMatchingsSet(prefix_low, prefix_high, suffix_mid,
-          prefix_subpatterns, suffix_subpatterns, suffix);
-      if (comparison == 0) {
-        break;
-      } else if (comparison > 0) {
-        low = suffix_mid + suffix_subpatterns;
-      } else {
-        high = suffix_mid;
-      }
-    }
-  }
-
-  vector<int> result;
-  int last_chunk_len = suffix.GetChunkLen(suffix.Arity());
-  bool offset = !vocabulary->IsTerminal(suffix.GetSymbol(0));
-  vector<int>::iterator suffix_low, suffix_high;
-  if (low < high) {
-    // We found a group of prefixes with the same starting position that give
-    // different results when compared to the found suffix.
-    // Find all matching suffixes for the previously found set of prefixes.
-    suffix_low = suffix_mid;
-    suffix_high = suffix_mid + suffix_subpatterns;
-    for (auto i = prefix_low; i != prefix_high; i += prefix_subpatterns) {
-      Matching left(i, prefix_subpatterns, data_array->GetSentenceId(*i));
-      while (suffix_low != suffix_start) {
-        Matching right(suffix_low - suffix_subpatterns, suffix_subpatterns,
-            data_array->GetSentenceId(*(suffix_low - suffix_subpatterns)));
-        if (comparator->Compare(left, right, last_chunk_len, offset) <= 0) {
-          suffix_low -= suffix_subpatterns;
-        } else {
-          break;
-        }
-      }
-
-      for (auto j = suffix_low; j != suffix_end; j += suffix_subpatterns) {
-        Matching right(j, suffix_subpatterns, data_array->GetSentenceId(*j));
-        int comparison = comparator->Compare(left, right, last_chunk_len,
-                                             offset);
-        if (comparison == 0) {
-          vector<int> merged = left.Merge(right, phrase.Arity() + 1);
-          result.insert(result.end(), merged.begin(), merged.end());
-        } else if (comparison < 0) {
-          break;
-        }
-        suffix_high = max(suffix_high, j + suffix_subpatterns);
-      }
-    }
-
-    swap(suffix_low, suffix_high);
-  } else if (prefix_set_size > suffix_set_size) {
-    // We did the binary search on the prefix set.
-    suffix_low = suffix_mid;
-    suffix_high = suffix_mid + suffix_subpatterns;
-    if (CompareMatchingsSet(prefix_low, prefix_high, suffix_mid,
-        prefix_subpatterns, suffix_subpatterns, suffix) < 0) {
-      prefix_low = prefix_high;
-    } else {
-      prefix_high = prefix_low;
-    }
-  } else {
-    // We did the binary search on the suffix set.
-    if (CompareMatchingsSet(prefix_low, prefix_high, suffix_mid,
-        prefix_subpatterns, suffix_subpatterns, suffix) < 0) {
-      suffix_low = suffix_mid;
-      suffix_high = suffix_mid;
-    } else {
-      suffix_low = suffix_mid + suffix_subpatterns;
-      suffix_high = suffix_mid + suffix_subpatterns;
-    }
-  }
-
-  Merge(locations, phrase, suffix, prefix_start, prefix_low, suffix_start,
-        suffix_low, prefix_subpatterns, suffix_subpatterns);
-  locations.insert(locations.end(), result.begin(), result.end());
-  Merge(locations, phrase, suffix, prefix_high, prefix_end, suffix_high,
-        suffix_end, prefix_subpatterns, suffix_subpatterns);
-}
-
-bool BinarySearchMerger::IsIntersectionVoid(
-    vector<int>::iterator prefix_start, vector<int>::iterator prefix_end,
-    vector<int>::iterator suffix_start, vector<int>::iterator suffix_end,
-    int prefix_subpatterns, int suffix_subpatterns,
-    const Phrase& suffix) const {
-  // Is any of the sets empty?
-  if (prefix_start >= prefix_end || suffix_start >= suffix_end) {
-    return true;
-  }
-
-  int last_chunk_len = suffix.GetChunkLen(suffix.Arity());
-  bool offset = !vocabulary->IsTerminal(suffix.GetSymbol(0));
-  // Is the first value from the first set larger than the last value in the
-  // second set?
-  Matching left(prefix_start, prefix_subpatterns,
-      data_array->GetSentenceId(*prefix_start));
-  Matching right(suffix_end - suffix_subpatterns, suffix_subpatterns,
-      data_array->GetSentenceId(*(suffix_end - suffix_subpatterns)));
-  if (comparator->Compare(left, right, last_chunk_len, offset) > 0) {
-    return true;
-  }
-
-  // Is the last value from the first set smaller than the first value in the
-  // second set?
-  left = Matching(prefix_end - prefix_subpatterns, prefix_subpatterns,
-      data_array->GetSentenceId(*(prefix_end - prefix_subpatterns)));
-  right = Matching(suffix_start, suffix_subpatterns,
-      data_array->GetSentenceId(*suffix_start));
-  if (comparator->Compare(left, right, last_chunk_len, offset) < 0) {
-    return true;
-  }
-
-  return false;
-}
-
-bool BinarySearchMerger::ShouldUseLinearMerge(
-    int prefix_set_size, int suffix_set_size) const {
-  if (force_binary_search_merge) {
-    return false;
-  }
-
-  int min_size = min(prefix_set_size, suffix_set_size);
-  int max_size = max(prefix_set_size, suffix_set_size);
-
-  return BAEZA_YATES_FACTOR * min_size * log2(max_size) > max_size;
-}
-
-vector<int>::iterator BinarySearchMerger::GetMiddle(
-    vector<int>::iterator low, vector<int>::iterator high,
-    int num_subpatterns) const {
-  return low + (((high - low) / num_subpatterns) / 2) * num_subpatterns;
-}
-
-void BinarySearchMerger::GetComparableMatchings(
-    const vector<int>::iterator& prefix_start,
-    const vector<int>::iterator& prefix_end,
-    const vector<int>::iterator& prefix_mid,
-    int num_subpatterns,
-    vector<int>::iterator& prefix_low,
-    vector<int>::iterator& prefix_high) const {
-  prefix_low = prefix_mid;
-  while (prefix_low != prefix_start
-      && *prefix_mid == *(prefix_low - num_subpatterns)) {
-    prefix_low -= num_subpatterns;
-  }
-  prefix_high = prefix_mid + num_subpatterns;
-  while (prefix_high != prefix_end
-      && *prefix_mid == *prefix_high) {
-    prefix_high += num_subpatterns;
-  }
-}
-
-int BinarySearchMerger::CompareMatchingsSet(
-    const vector<int>::iterator& prefix_start,
-    const vector<int>::iterator& prefix_end,
-    const vector<int>::iterator& suffix_mid,
-    int prefix_subpatterns,
-    int suffix_subpatterns,
-    const Phrase& suffix) const {
-  int result = 0;
-  int last_chunk_len = suffix.GetChunkLen(suffix.Arity());
-  bool offset = !vocabulary->IsTerminal(suffix.GetSymbol(0));
-
-  Matching right(suffix_mid, suffix_subpatterns,
-      data_array->GetSentenceId(*suffix_mid));
-  for (auto i = prefix_start; i != prefix_end; i += prefix_subpatterns) {
-    Matching left(i, prefix_subpatterns, data_array->GetSentenceId(*i));
-    int comparison = comparator->Compare(left, right, last_chunk_len, offset);
-    if (i == prefix_start) {
-      result = comparison;
-    } else if (comparison != result) {
-      return 0;
-    }
-  }
-  return result;
-}
diff --git a/extractor/binary_search_merger.h b/extractor/binary_search_merger.h
deleted file mode 100644
index c887e012..00000000
--- a/extractor/binary_search_merger.h
+++ /dev/null
@@ -1,75 +0,0 @@
-#ifndef _BINARY_SEARCH_MERGER_H_
-#define _BINARY_SEARCH_MERGER_H_
-
-#include <memory>
-#include <vector>
-
-using namespace std;
-
-class DataArray;
-class LinearMerger;
-class MatchingComparator;
-class Phrase;
-class Vocabulary;
-
-class BinarySearchMerger {
- public:
-  BinarySearchMerger(shared_ptr<Vocabulary> vocabulary,
-                     shared_ptr<LinearMerger> linear_merger,
-                     shared_ptr<DataArray> data_array,
-                     shared_ptr<MatchingComparator> comparator,
-                     bool force_binary_search_merge = false);
-
-  virtual ~BinarySearchMerger();
-
-  virtual void Merge(
-      vector<int>& locations, const Phrase& phrase, const Phrase& suffix,
-      const vector<int>::iterator& prefix_start,
-      const vector<int>::iterator& prefix_end,
-      const vector<int>::iterator& suffix_start,
-      const vector<int>::iterator& suffix_end,
-      int prefix_subpatterns, int suffix_subpatterns) const;
-
-  static double BAEZA_YATES_FACTOR;
-
- protected:
-  BinarySearchMerger();
-
- private:
-  bool IsIntersectionVoid(
-      vector<int>::iterator prefix_start, vector<int>::iterator prefix_end,
-      vector<int>::iterator suffix_start, vector<int>::iterator suffix_end,
-      int prefix_subpatterns, int suffix_subpatterns,
-      const Phrase& suffix) const;
-
-  bool ShouldUseLinearMerge(int prefix_set_size, int suffix_set_size) const;
-
-  vector<int>::iterator GetMiddle(vector<int>::iterator low,
-                                  vector<int>::iterator high,
-                                  int num_subpatterns) const;
-
-  void GetComparableMatchings(
-      const vector<int>::iterator& prefix_start,
-      const vector<int>::iterator& prefix_end,
-      const vector<int>::iterator& prefix_mid,
-      int num_subpatterns,
-      vector<int>::iterator& prefix_low,
-      vector<int>::iterator& prefix_high) const;
-
-  int CompareMatchingsSet(
-      const vector<int>::iterator& prefix_low,
-      const vector<int>::iterator& prefix_high,
-      const vector<int>::iterator& suffix_mid,
-      int prefix_subpatterns,
-      int suffix_subpatterns,
-      const Phrase& suffix) const;
-
-  shared_ptr<Vocabulary> vocabulary;
-  shared_ptr<LinearMerger> linear_merger;
-  shared_ptr<DataArray> data_array;
-  shared_ptr<MatchingComparator> comparator;
-  // Should be true only for testing.
-  bool force_binary_search_merge;
-};
-
-#endif
diff --git a/extractor/binary_search_merger_test.cc b/extractor/binary_search_merger_test.cc
deleted file mode 100644
index b1baa62f..00000000
--- a/extractor/binary_search_merger_test.cc
+++ /dev/null
@@ -1,157 +0,0 @@
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "binary_search_merger.h"
-#include "matching_comparator.h"
-#include "mocks/mock_data_array.h"
-#include "mocks/mock_vocabulary.h"
-#include "mocks/mock_linear_merger.h"
-#include "phrase.h"
-#include "phrase_location.h"
-#include "phrase_builder.h"
-
-using namespace std;
-using namespace ::testing;
-
-namespace {
-
-class BinarySearchMergerTest : public Test {
- protected:
-  virtual void SetUp() {
-    shared_ptr<MockVocabulary> vocabulary = make_shared<MockVocabulary>();
-    EXPECT_CALL(*vocabulary, GetTerminalValue(_))
-        .WillRepeatedly(Return("word"));
-
-    shared_ptr<MockDataArray> data_array = make_shared<MockDataArray>();
-    EXPECT_CALL(*data_array, GetSentenceId(_))
-        .WillRepeatedly(Return(1));
-
-    shared_ptr<MatchingComparator> comparator =
-        make_shared<MatchingComparator>(1, 20);
-
-    phrase_builder = make_shared<PhraseBuilder>(vocabulary);
-
-    // We are going to force the binary_search_merger to do all the work, so we
-    // need to check that the linear_merger never gets called.
-    shared_ptr<MockLinearMerger> linear_merger =
-        make_shared<MockLinearMerger>();
-    EXPECT_CALL(*linear_merger, Merge(_, _, _, _, _, _, _, _, _)).Times(0);
-
-    binary_search_merger = make_shared<BinarySearchMerger>(
-        vocabulary, linear_merger, data_array, comparator, true);
-  }
-
-  shared_ptr<BinarySearchMerger> binary_search_merger;
-  shared_ptr<PhraseBuilder> phrase_builder;
-};
-
-TEST_F(BinarySearchMergerTest, aXbTest) {
-  vector<int> locations;
-  // Encoding for him X it (see Adam's dissertation).
-  vector<int> symbols{1, -1, 2};
-  Phrase phrase = phrase_builder->Build(symbols);
-  vector<int> suffix_symbols{-1, 2};
-  Phrase suffix = phrase_builder->Build(suffix_symbols);
-
-  vector<int> prefix_locs{2, 6, 10, 15};
-  vector<int> suffix_locs{0, 4, 8, 13};
-
-  binary_search_merger->Merge(locations, phrase, suffix, prefix_locs.begin(),
-      prefix_locs.end(), suffix_locs.begin(), suffix_locs.end(), 1, 1);
-
-  vector<int> expected_locations{2, 4, 2, 8, 2, 13, 6, 8, 6, 13, 10, 13};
-  EXPECT_EQ(expected_locations, locations);
-}
-
-TEST_F(BinarySearchMergerTest, aXbXcTest) {
-  vector<int> locations;
-  // Encoding for it X him X it (see Adam's dissertation).
-  vector<int> symbols{1, -1, 2, -2, 1};
-  Phrase phrase = phrase_builder->Build(symbols);
-  vector<int> suffix_symbols{-1, 2, -2, 1};
-  Phrase suffix = phrase_builder->Build(suffix_symbols);
-
-  vector<int> prefix_locs{0, 2, 0, 6, 0, 10, 4, 6, 4, 10, 4, 15, 8, 10, 8, 15,
-                          13, 15};
-  vector<int> suffix_locs{2, 4, 2, 8, 2, 13, 6, 8, 6, 13, 10, 13};
-
-  binary_search_merger->Merge(locations, phrase, suffix, prefix_locs.begin(),
-      prefix_locs.end(), suffix_locs.begin(), suffix_locs.end(), 2, 2);
-
-  vector<int> expected_locs{0, 2, 4, 0, 2, 8, 0, 2, 13, 0, 6, 8, 0, 6, 13, 0,
-                            10, 13, 4, 6, 8, 4, 6, 13, 4, 10, 13, 8, 10, 13};
-  EXPECT_EQ(expected_locs, locations);
-}
-
-TEST_F(BinarySearchMergerTest, abXcXdTest) {
-  // Sentence: Anna has many many nuts and sour apples and juicy apples.
-  // Phrase: Anna has X and X apples.
-  vector<int> locations;
-  vector<int> symbols{1, 2, -1, 3, -2, 4};
-  Phrase phrase = phrase_builder->Build(symbols);
-  vector<int> suffix_symbols{2, -1, 3, -2, 4};
-  Phrase suffix = phrase_builder->Build(suffix_symbols);
-
-  vector<int> prefix_locs{1, 6, 1, 9};
-  vector<int> suffix_locs{2, 6, 8, 2, 6, 11, 2, 9, 11};
-
-  binary_search_merger->Merge(locations, phrase, suffix, prefix_locs.begin(),
-      prefix_locs.end(), suffix_locs.begin(), suffix_locs.end(), 2, 3);
-
-  vector<int> expected_locs{1, 6, 8, 1, 6, 11, 1, 9, 11};
-  EXPECT_EQ(expected_locs, locations);
-}
-
-TEST_F(BinarySearchMergerTest, LargeTest) {
-  vector<int> locations;
-  vector<int> symbols{1, -1, 2};
-  Phrase phrase = phrase_builder->Build(symbols);
-  vector<int> suffix_symbols{-1, 2};
-  Phrase suffix = phrase_builder->Build(suffix_symbols);
-
-  vector<int> prefix_locs;
-  for (int i = 0; i < 100; ++i) {
-    prefix_locs.push_back(i * 20 + 1);
-  }
-  vector<int> suffix_locs;
-  for (int i = 0; i < 100; ++i) {
-    suffix_locs.push_back(i * 20 + 5);
-    suffix_locs.push_back(i * 20 + 13);
-  }
-
-  binary_search_merger->Merge(locations, phrase, suffix, prefix_locs.begin(),
-      prefix_locs.end(), suffix_locs.begin(), suffix_locs.end(), 1, 1);
-
-  EXPECT_EQ(400, locations.size());
-  for (int i = 0; i < 100; ++i) {
-    EXPECT_EQ(i * 20 + 1, locations[4 * i]);
-    EXPECT_EQ(i * 20 + 5, locations[4 * i + 1]);
-    EXPECT_EQ(i * 20 + 1, locations[4 * i + 2]);
-    EXPECT_EQ(i * 20 + 13, locations[4 * i + 3]);
-  }
-}
-
-TEST_F(BinarySearchMergerTest, EmptyResultTest) {
-  vector<int> locations;
-  vector<int> symbols{1, -1, 2};
-  Phrase phrase = phrase_builder->Build(symbols);
-  vector<int> suffix_symbols{-1, 2};
-  Phrase suffix = phrase_builder->Build(suffix_symbols);
-
-  vector<int> prefix_locs;
-  for (int i = 0; i < 100; ++i) {
-    prefix_locs.push_back(i * 200 + 1);
-  }
-  vector<int> suffix_locs;
-  for (int i = 0; i < 100; ++i) {
-    suffix_locs.push_back(i * 200 + 101);
-  }
-
-  binary_search_merger->Merge(locations, phrase, suffix, prefix_locs.begin(),
-      prefix_locs.end(), suffix_locs.begin(), suffix_locs.end(), 1, 1);
-
-  EXPECT_EQ(0, locations.size());
-}
-
-}  // namespace
diff --git a/extractor/grammar_extractor.cc b/extractor/grammar_extractor.cc
index a03e805f..b8f6f0c7 100644
--- a/extractor/grammar_extractor.cc
+++ b/extractor/grammar_extractor.cc
@@ -16,13 +16,12 @@ GrammarExtractor::GrammarExtractor(
     shared_ptr<Alignment> alignment, shared_ptr<Precomputation> precomputation,
     shared_ptr<Scorer> scorer, int min_gap_size, int max_rule_span,
     int max_nonterminals, int max_rule_symbols, int max_samples,
-    bool use_fast_intersect, bool use_baeza_yates, bool require_tight_phrases) :
+    bool require_tight_phrases) :
     vocabulary(make_shared<Vocabulary>()),
     rule_factory(make_shared<HieroCachingRuleFactory>(
         source_suffix_array, target_data_array, alignment, vocabulary,
         precomputation, scorer, min_gap_size, max_rule_span, max_nonterminals,
-        max_rule_symbols, max_samples, use_fast_intersect, use_baeza_yates,
-        require_tight_phrases)) {}
+        max_rule_symbols, max_samples, require_tight_phrases)) {}
 
 GrammarExtractor::GrammarExtractor(
     shared_ptr<Vocabulary> vocabulary,
diff --git a/extractor/grammar_extractor.h b/extractor/grammar_extractor.h
index a8f2090d..f50a8d14 100644
--- a/extractor/grammar_extractor.h
+++ b/extractor/grammar_extractor.h
@@ -29,8 +29,6 @@ class GrammarExtractor {
       int max_nonterminals,
       int max_rule_symbols,
       int max_samples,
-      bool use_fast_intersect,
-      bool use_baeza_yates,
       bool require_tight_phrases);
 
   // For testing only.
diff --git a/extractor/intersector.cc b/extractor/intersector.cc
deleted file mode 100644
index 39a7648d..00000000
--- a/extractor/intersector.cc
+++ /dev/null
@@ -1,154 +0,0 @@
-#include "intersector.h"
-
-#include "data_array.h"
-#include "matching_comparator.h"
-#include "phrase.h"
-#include "phrase_location.h"
-#include "precomputation.h"
-#include "suffix_array.h"
-#include "veb.h"
-#include "vocabulary.h"
-
-Intersector::Intersector(shared_ptr<Vocabulary> vocabulary,
-                         shared_ptr<Precomputation> precomputation,
-                         shared_ptr<SuffixArray> suffix_array,
-                         shared_ptr<MatchingComparator> comparator,
-                         bool use_baeza_yates) :
-    vocabulary(vocabulary),
-    suffix_array(suffix_array),
-    use_baeza_yates(use_baeza_yates) {
-  shared_ptr<DataArray> data_array = suffix_array->GetData();
-  linear_merger = make_shared<LinearMerger>(vocabulary, data_array, comparator);
-  binary_search_merger = make_shared<BinarySearchMerger>(
-      vocabulary, linear_merger, data_array, comparator);
-  ConvertIndexes(precomputation, data_array);
-}
-
-Intersector::Intersector(shared_ptr<Vocabulary> vocabulary,
-                         shared_ptr<Precomputation> precomputation,
-                         shared_ptr<SuffixArray> suffix_array,
-                         shared_ptr<LinearMerger> linear_merger,
-                         shared_ptr<BinarySearchMerger> binary_search_merger,
-                         bool use_baeza_yates) :
-    vocabulary(vocabulary),
-    suffix_array(suffix_array),
-    linear_merger(linear_merger),
-    binary_search_merger(binary_search_merger),
-    use_baeza_yates(use_baeza_yates) {
-  ConvertIndexes(precomputation, suffix_array->GetData());
-}
-
-Intersector::Intersector() {}
-
-Intersector::~Intersector() {}
-
-void Intersector::ConvertIndexes(shared_ptr<Precomputation> precomputation,
-                                 shared_ptr<DataArray> data_array) {
-  const Index& precomputed_index = precomputation->GetInvertedIndex();
-  for (pair<vector<int>, vector<int> > entry: precomputed_index) {
-    vector<int> phrase = ConvertPhrase(entry.first, data_array);
-    inverted_index[phrase] = entry.second;
-
-    phrase.push_back(vocabulary->GetNonterminalIndex(1));
-    inverted_index[phrase] = entry.second;
-    phrase.pop_back();
-    phrase.insert(phrase.begin(), vocabulary->GetNonterminalIndex(1));
-    inverted_index[phrase] = entry.second;
-  }
-
-  const Index& precomputed_collocations = precomputation->GetCollocations();
-  for (pair<vector<int>, vector<int> > entry: precomputed_collocations) {
-    vector<int> phrase = ConvertPhrase(entry.first, data_array);
-    collocations[phrase] = entry.second;
-  }
-}
-
-vector<int> Intersector::ConvertPhrase(const vector<int>& old_phrase,
-                                       shared_ptr<DataArray> data_array) {
-  vector<int> new_phrase;
-  new_phrase.reserve(old_phrase.size());
-
-  int arity = 0;
-  for (int word_id: old_phrase) {
-    if (word_id == Precomputation::NON_TERMINAL) {
-      ++arity;
-      new_phrase.push_back(vocabulary->GetNonterminalIndex(arity));
-    } else {
-      new_phrase.push_back(
-          vocabulary->GetTerminalIndex(data_array->GetWord(word_id)));
-    }
-  }
-
-  return new_phrase;
-}
-
-PhraseLocation Intersector::Intersect(
-    const Phrase& prefix, PhraseLocation& prefix_location,
-    const Phrase& suffix, PhraseLocation& suffix_location,
-    const Phrase& phrase) {
-  vector<int> symbols = phrase.Get();
-
-  // We should never attempt to do an intersect query for a pattern starting or
-  // ending with a non terminal. The RuleFactory should handle these cases,
-  // initializing the matchings list with the one for the pattern without the
-  // starting or ending terminal.
-  assert(vocabulary->IsTerminal(symbols.front())
-      && vocabulary->IsTerminal(symbols.back()));
-
-  if (collocations.count(symbols)) {
-    return PhraseLocation(collocations[symbols], phrase.Arity() + 1);
-  }
-
-  vector<int> locations;
-  ExtendPhraseLocation(prefix, prefix_location);
-  ExtendPhraseLocation(suffix, suffix_location);
-  shared_ptr<vector<int> > prefix_matchings = prefix_location.matchings;
-  shared_ptr<vector<int> > suffix_matchings = suffix_location.matchings;
-  int prefix_subpatterns = prefix_location.num_subpatterns;
-  int suffix_subpatterns = suffix_location.num_subpatterns;
-  if (use_baeza_yates) {
-    binary_search_merger->Merge(locations, phrase, suffix,
-        prefix_matchings->begin(), prefix_matchings->end(),
-        suffix_matchings->begin(), suffix_matchings->end(),
-        prefix_subpatterns, suffix_subpatterns);
-  } else {
-    linear_merger->Merge(locations, phrase, suffix, prefix_matchings->begin(),
-        prefix_matchings->end(), suffix_matchings->begin(),
-        suffix_matchings->end(), prefix_subpatterns, suffix_subpatterns);
-  }
-  return PhraseLocation(locations, phrase.Arity() + 1);
-}
-
-void Intersector::ExtendPhraseLocation(
-    const Phrase& phrase, PhraseLocation& phrase_location) {
-  int low = phrase_location.sa_low, high = phrase_location.sa_high;
-  if (phrase_location.matchings != NULL) {
-    return;
-  }
-
-
-  phrase_location.num_subpatterns = 1;
-  phrase_location.sa_low = phrase_location.sa_high = 0;
-
-  vector<int> symbols = phrase.Get();
-  if (inverted_index.count(symbols)) {
-    phrase_location.matchings =
-        make_shared<vector<int> >(inverted_index[symbols]);
-    return;
-  }
-
-  vector<int> matchings;
-  matchings.reserve(high - low + 1);
-  shared_ptr<VEB> veb = VEB::Create(suffix_array->GetSize());
-  for (int i = low; i < high; ++i) {
-    veb->Insert(suffix_array->GetSuffix(i));
-  }
-
-  int value = veb->GetMinimum();
-  while (value != -1) {
-    matchings.push_back(value);
-    value = veb->GetSuccessor(value);
-  }
-
-  phrase_location.matchings = make_shared<vector<int> >(matchings);
-}
diff --git a/extractor/intersector.h b/extractor/intersector.h
deleted file mode 100644
index 8b159f17..00000000
--- a/extractor/intersector.h
+++ /dev/null
@@ -1,79 +0,0 @@
-#ifndef _INTERSECTOR_H_
-#define _INTERSECTOR_H_
-
-#include <memory>
-#include <unordered_map>
-#include <vector>
-
-#include <boost/functional/hash.hpp>
-
-#include "binary_search_merger.h"
-#include "linear_merger.h"
-
-using namespace std;
-
-typedef boost::hash<vector<int> > VectorHash;
-typedef unordered_map<vector<int>, vector<int>, VectorHash> Index;
-
-class DataArray;
-class MatchingComparator;
-class Phrase;
-class PhraseLocation;
-class Precomputation;
-class SuffixArray;
-class Vocabulary;
-
-class Intersector {
- public:
-  Intersector(
-      shared_ptr<Vocabulary> vocabulary,
-      shared_ptr<Precomputation> precomputation,
-      shared_ptr<SuffixArray> source_suffix_array,
-      shared_ptr<MatchingComparator> comparator,
-      bool use_baeza_yates);
-
-  // For testing.
-  Intersector(
-      shared_ptr<Vocabulary> vocabulary,
-      shared_ptr<Precomputation> precomputation,
-      shared_ptr<SuffixArray> source_suffix_array,
-      shared_ptr<LinearMerger> linear_merger,
-      shared_ptr<BinarySearchMerger> binary_search_merger,
-      bool use_baeza_yates);
-
-  virtual ~Intersector();
-
-  virtual PhraseLocation Intersect(
-      const Phrase& prefix, PhraseLocation& prefix_location,
-      const Phrase& suffix, PhraseLocation& suffix_location,
-      const Phrase& phrase);
-
- protected:
-  Intersector();
-
- private:
-  void ConvertIndexes(shared_ptr<Precomputation> precomputation,
-                      shared_ptr<DataArray> data_array);
-
-  vector<int> ConvertPhrase(const vector<int>& old_phrase,
-                            shared_ptr<DataArray> data_array);
-
-  void ExtendPhraseLocation(const Phrase& phrase,
-      PhraseLocation& phrase_location);
-
-  shared_ptr<Vocabulary> vocabulary;
-  shared_ptr<SuffixArray> suffix_array;
-  shared_ptr<LinearMerger> linear_merger;
-  shared_ptr<BinarySearchMerger> binary_search_merger;
-  Index inverted_index;
-  Index collocations;
-  bool use_baeza_yates;
-
-  // TODO(pauldb): Don't forget to remove these.
- public:
-  double sort_time;
-  double linear_merge_time;
-  double binary_merge_time;
-};
-
-#endif
diff --git a/extractor/intersector_test.cc b/extractor/intersector_test.cc
deleted file mode 100644
index ec318362..00000000
--- a/extractor/intersector_test.cc
+++ /dev/null
@@ -1,193 +0,0 @@
-#include <gtest/gtest.h>
-
-#include <memory>
-#include <vector>
-
-#include "intersector.h"
-#include "mocks/mock_binary_search_merger.h"
-#include "mocks/mock_data_array.h"
-#include "mocks/mock_linear_merger.h"
-#include "mocks/mock_precomputation.h"
-#include "mocks/mock_suffix_array.h"
-#include "mocks/mock_vocabulary.h"
-
-using namespace std;
-using namespace ::testing;
-
-namespace {
-
-class IntersectorTest : public Test {
- protected:
-  virtual void SetUp() {
-    data = {2, 3, 4, 3, 4, 3};
-    vector<string> words = {"a", "b", "c", "b", "c", "b"};
-    data_array = make_shared<MockDataArray>();
-    EXPECT_CALL(*data_array, GetData()).WillRepeatedly(ReturnRef(data));
-
-    vocabulary = make_shared<MockVocabulary>();
-    for (size_t i = 0; i < data.size(); ++i) {
-      EXPECT_CALL(*data_array, GetWord(data[i]))
-          .WillRepeatedly(Return(words[i]));
-      EXPECT_CALL(*vocabulary, GetTerminalIndex(words[i]))
-          .WillRepeatedly(Return(data[i]));
-      EXPECT_CALL(*vocabulary, GetTerminalValue(data[i]))
-          .WillRepeatedly(Return(words[i]));
-    }
-
-    vector<int> suffixes = {6, 0, 5, 3, 1, 4, 2};
-    suffix_array = make_shared<MockSuffixArray>();
-    EXPECT_CALL(*suffix_array, GetData())
-        .WillRepeatedly(Return(data_array));
-    EXPECT_CALL(*suffix_array, GetSize())
-        .WillRepeatedly(Return(suffixes.size()));
-    for (size_t i = 0; i < suffixes.size(); ++i) {
-      EXPECT_CALL(*suffix_array, GetSuffix(i))
-          .WillRepeatedly(Return(suffixes[i]));
-    }
-
-    vector<int> key = {2, -1, 4};
-    vector<int> values = {0, 2};
-    collocations[key] = values;
-    precomputation = make_shared<MockPrecomputation>();
-    EXPECT_CALL(*precomputation, GetInvertedIndex())
-       .WillRepeatedly(ReturnRef(inverted_index));
-    EXPECT_CALL(*precomputation, GetCollocations())
-       .WillRepeatedly(ReturnRef(collocations));
-
-    linear_merger = make_shared<MockLinearMerger>();
-    binary_search_merger = make_shared<MockBinarySearchMerger>();
-
-    phrase_builder = make_shared<PhraseBuilder>(vocabulary);
-  }
-
-  Index inverted_index;
-  Index collocations;
-  vector<int> data;
-  shared_ptr<MockVocabulary> vocabulary;
-  shared_ptr<MockDataArray> data_array;
-  shared_ptr<MockSuffixArray> suffix_array;
-  shared_ptr<MockPrecomputation> precomputation;
-  shared_ptr<MockLinearMerger> linear_merger;
-  shared_ptr<MockBinarySearchMerger> binary_search_merger;
-  shared_ptr<PhraseBuilder> phrase_builder;
-  shared_ptr<Intersector> intersector;
-};
-
-TEST_F(IntersectorTest, TestCachedCollocation) {
-  intersector = make_shared<Intersector>(vocabulary, precomputation,
-      suffix_array, linear_merger, binary_search_merger, false);
-
-  vector<int> prefix_symbols = {2, -1};
-  Phrase prefix = phrase_builder->Build(prefix_symbols);
-  vector<int> suffix_symbols = {-1, 4};
-  Phrase suffix = phrase_builder->Build(suffix_symbols);
-  vector<int> symbols = {2, -1, 4};
-  Phrase phrase = phrase_builder->Build(symbols);
-  PhraseLocation prefix_locs(0, 1), suffix_locs(2, 3);
-
-  PhraseLocation result = intersector->Intersect(
-      prefix, prefix_locs, suffix, suffix_locs, phrase);
-
-  vector<int> expected_locs = {0, 2};
-  PhraseLocation expected_result(expected_locs, 2);
-
-  EXPECT_EQ(expected_result, result);
-  EXPECT_EQ(PhraseLocation(0, 1), prefix_locs);
-  EXPECT_EQ(PhraseLocation(2, 3), suffix_locs);
-}
-
-TEST_F(IntersectorTest, TestLinearMergeaXb) {
-  vector<int> prefix_symbols = {3, -1};
-  Phrase prefix = phrase_builder->Build(prefix_symbols);
-  vector<int> suffix_symbols = {-1, 4};
-  Phrase suffix = phrase_builder->Build(suffix_symbols);
-  vector<int> symbols = {3, -1, 4};
-  Phrase phrase = phrase_builder->Build(symbols);
-  PhraseLocation prefix_locs(2, 5), suffix_locs(5, 7);
-
-  vector<int> ex_prefix_locs = {1, 3, 5};
-  PhraseLocation extended_prefix_locs(ex_prefix_locs, 1);
-  vector<int> ex_suffix_locs = {2, 4};
-  PhraseLocation extended_suffix_locs(ex_suffix_locs, 1);
-
-  vector<int> expected_locs = {1, 4};
-  EXPECT_CALL(*linear_merger, Merge(_, _, _, _, _, _, _, _, _))
-      .Times(1)
-      .WillOnce(SetArgReferee<0>(expected_locs));
-  EXPECT_CALL(*binary_search_merger, Merge(_, _, _, _, _, _, _, _, _)).Times(0);
-
-  intersector = make_shared<Intersector>(vocabulary, precomputation,
-      suffix_array, linear_merger, binary_search_merger, false);
-
-  PhraseLocation result = intersector->Intersect(
-      prefix, prefix_locs, suffix, suffix_locs, phrase);
-  PhraseLocation expected_result(expected_locs, 2);
-
-  EXPECT_EQ(expected_result, result);
-  EXPECT_EQ(extended_prefix_locs, prefix_locs);
-  EXPECT_EQ(extended_suffix_locs, suffix_locs);
-}
-
-TEST_F(IntersectorTest, TestBinarySearchMergeaXb) {
-  vector<int> prefix_symbols = {3, -1};
-  Phrase prefix = phrase_builder->Build(prefix_symbols);
-  vector<int> suffix_symbols = {-1, 4};
-  Phrase suffix = phrase_builder->Build(suffix_symbols);
-  vector<int> symbols = {3, -1, 4};
-  Phrase phrase = phrase_builder->Build(symbols);
-  PhraseLocation prefix_locs(2, 5), suffix_locs(5, 7);
-
-  vector<int> ex_prefix_locs = {1, 3, 5};
-  PhraseLocation extended_prefix_locs(ex_prefix_locs, 1);
-  vector<int> ex_suffix_locs = {2, 4};
-  PhraseLocation extended_suffix_locs(ex_suffix_locs, 1);
-
-  vector<int> expected_locs = {1, 4};
-  EXPECT_CALL(*binary_search_merger, Merge(_, _, _, _, _, _, _, _, _))
-      .Times(1)
-      .WillOnce(SetArgReferee<0>(expected_locs));
-  EXPECT_CALL(*linear_merger, Merge(_, _, _, _, _, _, _, _, _)).Times(0);
-
-  intersector = make_shared<Intersector>(vocabulary, precomputation,
-      suffix_array, linear_merger, binary_search_merger, true);
-
-  PhraseLocation result = intersector->Intersect(
-      prefix, prefix_locs, suffix, suffix_locs, phrase);
-  PhraseLocation expected_result(expected_locs, 2);
-
-  EXPECT_EQ(expected_result, result);
-  EXPECT_EQ(extended_prefix_locs, prefix_locs);
-  EXPECT_EQ(extended_suffix_locs, suffix_locs);
-}
-
-TEST_F(IntersectorTest, TestMergeaXbXc) {
-  vector<int> prefix_symbols = {2, -1, 4, -1};
-  Phrase prefix = phrase_builder->Build(prefix_symbols);
-  vector<int> suffix_symbols = {-1, 4, -1, 4};
-  Phrase suffix = phrase_builder->Build(suffix_symbols);
-  vector<int> symbols = {2, -1, 4, -1, 4};
-  Phrase phrase = phrase_builder->Build(symbols);
-
-  vector<int> ex_prefix_locs = {0, 2, 0, 4};
-  PhraseLocation extended_prefix_locs(ex_prefix_locs, 2);
-  vector<int> ex_suffix_locs = {2, 4};
-  PhraseLocation extended_suffix_locs(ex_suffix_locs, 2);
-  vector<int> expected_locs = {0, 2, 4};
-  EXPECT_CALL(*linear_merger, Merge(_, _, _, _, _, _, _, _, _))
-      .Times(1)
-      .WillOnce(SetArgReferee<0>(expected_locs));
-  EXPECT_CALL(*binary_search_merger, Merge(_, _, _, _, _, _, _, _, _)).Times(0);
-
-  intersector = make_shared<Intersector>(vocabulary, precomputation,
-      suffix_array, linear_merger, binary_search_merger, false);
-
-  PhraseLocation result = intersector->Intersect(
-      prefix, extended_prefix_locs, suffix, extended_suffix_locs, phrase);
-  PhraseLocation expected_result(expected_locs, 3);
-
-  EXPECT_EQ(expected_result, result);
-  EXPECT_EQ(ex_prefix_locs, *extended_prefix_locs.matchings);
-  EXPECT_EQ(ex_suffix_locs, *extended_suffix_locs.matchings);
-}
-
-}  // namespace
diff --git a/extractor/linear_merger.cc b/extractor/linear_merger.cc
deleted file mode 100644
index e7a32788..00000000
--- a/extractor/linear_merger.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-#include "linear_merger.h"
-
-#include <cmath>
-
-#include "data_array.h"
-#include "matching.h"
-#include "matching_comparator.h"
-#include "phrase.h"
-#include "phrase_location.h"
-#include "vocabulary.h"
-
-LinearMerger::LinearMerger(shared_ptr<Vocabulary> vocabulary,
-                           shared_ptr<DataArray> data_array,
-                           shared_ptr<MatchingComparator> comparator) :
-    vocabulary(vocabulary), data_array(data_array), comparator(comparator) {}
-
-LinearMerger::LinearMerger() {}
-
-LinearMerger::~LinearMerger() {}
-
-void LinearMerger::Merge(
-    vector<int>& locations, const Phrase& phrase, const Phrase& suffix,
-    vector<int>::iterator prefix_start, vector<int>::iterator prefix_end,
-    vector<int>::iterator suffix_start, vector<int>::iterator suffix_end,
-    int prefix_subpatterns, int suffix_subpatterns) {
-  int last_chunk_len = suffix.GetChunkLen(suffix.Arity());
-  bool offset = !vocabulary->IsTerminal(suffix.GetSymbol(0));
-
-  while (prefix_start != prefix_end) {
-    Matching left(prefix_start, prefix_subpatterns,
-        data_array->GetSentenceId(*prefix_start));
-
-    while (suffix_start != suffix_end) {
-      Matching right(suffix_start, suffix_subpatterns,
-          data_array->GetSentenceId(*suffix_start));
-      if (comparator->Compare(left, right, last_chunk_len, offset) > 0) {
-        suffix_start += suffix_subpatterns;
-      } else {
-        break;
-      }
-    }
-
-    int start_position = *prefix_start;
-    vector<int> :: iterator i = suffix_start;
-    while (prefix_start != prefix_end && *prefix_start == start_position) {
-      Matching left(prefix_start, prefix_subpatterns,
-          data_array->GetSentenceId(*prefix_start));
-
-      while (i != suffix_end) {
-        Matching right(i, suffix_subpatterns, data_array->GetSentenceId(*i));
-        int comparison = comparator->Compare(left, right, last_chunk_len,
-                                             offset);
-        if (comparison == 0) {
-          vector<int> merged = left.Merge(right, phrase.Arity() + 1);
-          locations.insert(locations.end(), merged.begin(), merged.end());
-        } else if (comparison < 0) {
-          break;
-        }
-        i += suffix_subpatterns;
-      }
-
-      prefix_start += prefix_subpatterns;
-    }
-  }
-}
diff --git a/extractor/linear_merger.h b/extractor/linear_merger.h
deleted file mode 100644
index c3c7111e..00000000
--- a/extractor/linear_merger.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef _LINEAR_MERGER_H_
-#define _LINEAR_MERGER_H_
-
-#include <memory>
-#include <vector>
-
-using namespace std;
-
-class MatchingComparator;
-class Phrase;
-class PhraseLocation;
-class DataArray;
-class Vocabulary;
-
-class LinearMerger {
- public:
-  LinearMerger(shared_ptr<Vocabulary> vocabulary,
-               shared_ptr<DataArray> data_array,
-               shared_ptr<MatchingComparator> comparator);
-
-  virtual ~LinearMerger();
-
-  virtual void Merge(
-      vector<int>& locations, const Phrase& phrase, const Phrase& suffix,
-      vector<int>::iterator prefix_start, vector<int>::iterator prefix_end,
-      vector<int>::iterator suffix_start, vector<int>::iterator suffix_end,
-      int prefix_subpatterns, int suffix_subpatterns);
-
- protected:
-  LinearMerger();
-
- private:
-  shared_ptr<Vocabulary> vocabulary;
-  shared_ptr<DataArray> data_array;
-  shared_ptr<MatchingComparator> comparator;
-};
-
-#endif
diff --git a/extractor/linear_merger_test.cc b/extractor/linear_merger_test.cc
deleted file mode 100644
index a6963430..00000000
--- a/extractor/linear_merger_test.cc
+++ /dev/null
@@ -1,149 +0,0 @@
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "linear_merger.h"
-#include "matching_comparator.h"
-#include "mocks/mock_data_array.h"
-#include "mocks/mock_vocabulary.h"
-#include "phrase.h"
-#include "phrase_location.h"
-#include "phrase_builder.h"
-
-using namespace std;
-using namespace ::testing;
-
-namespace {
-
-class LinearMergerTest : public Test {
- protected:
-  virtual void SetUp() {
-    shared_ptr<MockVocabulary> vocabulary = make_shared<MockVocabulary>();
-    EXPECT_CALL(*vocabulary, GetTerminalValue(_))
-        .WillRepeatedly(Return("word"));
-
-    shared_ptr<MockDataArray> data_array = make_shared<MockDataArray>();
-    EXPECT_CALL(*data_array, GetSentenceId(_))
-        .WillRepeatedly(Return(1));
-
-    shared_ptr<MatchingComparator> comparator =
-        make_shared<MatchingComparator>(1, 20);
-
-    phrase_builder = make_shared<PhraseBuilder>(vocabulary);
-    linear_merger = make_shared<LinearMerger>(vocabulary, data_array,
-                                              comparator);
-  }
-
-  shared_ptr<LinearMerger> linear_merger;
-  shared_ptr<PhraseBuilder> phrase_builder;
-};
-
-TEST_F(LinearMergerTest, aXbTest) {
-  vector<int> locations;
-  // Encoding for him X it (see Adam's dissertation).
-  vector<int> symbols{1, -1, 2};
-  Phrase phrase = phrase_builder->Build(symbols);
-  vector<int> suffix_symbols{-1, 2};
-  Phrase suffix = phrase_builder->Build(suffix_symbols);
-
-  vector<int> prefix_locs{2, 6, 10, 15};
-  vector<int> suffix_locs{0, 4, 8, 13};
-
-  linear_merger->Merge(locations, phrase, suffix, prefix_locs.begin(),
-      prefix_locs.end(), suffix_locs.begin(), suffix_locs.end(), 1, 1);
-
-  vector<int> expected_locations{2, 4, 2, 8, 2, 13, 6, 8, 6, 13, 10, 13};
-  EXPECT_EQ(expected_locations, locations);
-}
-
-TEST_F(LinearMergerTest, aXbXcTest) {
-  vector<int> locations;
-  // Encoding for it X him X it (see Adam's dissertation).
-  vector<int> symbols{1, -1, 2, -2, 1};
-  Phrase phrase = phrase_builder->Build(symbols);
-  vector<int> suffix_symbols{-1, 2, -2, 1};
-  Phrase suffix = phrase_builder->Build(suffix_symbols);
-
-  vector<int> prefix_locs{0, 2, 0, 6, 0, 10, 4, 6, 4, 10, 4, 15, 8, 10, 8, 15,
-                          13, 15};
-  vector<int> suffix_locs{2, 4, 2, 8, 2, 13, 6, 8, 6, 13, 10, 13};
-
-  linear_merger->Merge(locations, phrase, suffix, prefix_locs.begin(),
-      prefix_locs.end(), suffix_locs.begin(), suffix_locs.end(), 2, 2);
-
-  vector<int> expected_locs{0, 2, 4, 0, 2, 8, 0, 2, 13, 0, 6, 8, 0, 6, 13, 0,
-                            10, 13, 4, 6, 8, 4, 6, 13, 4, 10, 13, 8, 10, 13};
-  EXPECT_EQ(expected_locs, locations);
-}
-
-TEST_F(LinearMergerTest, abXcXdTest) {
-  // Sentence: Anna has many many nuts and sour apples and juicy apples.
-  // Phrase: Anna has X and X apples.
-  vector<int> locations;
-  vector<int> symbols{1, 2, -1, 3, -2, 4};
-  Phrase phrase = phrase_builder->Build(symbols);
-  vector<int> suffix_symbols{2, -1, 3, -2, 4};
-  Phrase suffix = phrase_builder->Build(suffix_symbols);
-
-  vector<int> prefix_locs{1, 6, 1, 9};
-  vector<int> suffix_locs{2, 6, 8, 2, 6, 11, 2, 9, 11};
-
-  linear_merger->Merge(locations, phrase, suffix, prefix_locs.begin(),
-      prefix_locs.end(), suffix_locs.begin(), suffix_locs.end(), 2, 3);
-
-  vector<int> expected_locs{1, 6, 8, 1, 6, 11, 1, 9, 11};
-  EXPECT_EQ(expected_locs, locations);
-}
-
-TEST_F(LinearMergerTest, LargeTest) {
-  vector<int> locations;
-  vector<int> symbols{1, -1, 2};
-  Phrase phrase = phrase_builder->Build(symbols);
-  vector<int> suffix_symbols{-1, 2};
-  Phrase suffix = phrase_builder->Build(suffix_symbols);
-
-  vector<int> prefix_locs;
-  for (int i = 0; i < 100; ++i) {
-    prefix_locs.push_back(i * 20 + 1);
-  }
-  vector<int> suffix_locs;
-  for (int i = 0; i < 100; ++i) {
-    suffix_locs.push_back(i * 20 + 5);
-    suffix_locs.push_back(i * 20 + 13);
-  }
-
-  linear_merger->Merge(locations, phrase, suffix, prefix_locs.begin(),
-      prefix_locs.end(), suffix_locs.begin(), suffix_locs.end(), 1, 1);
-
-  EXPECT_EQ(400, locations.size());
-  for (int i = 0; i < 100; ++i) {
-    EXPECT_EQ(i * 20 + 1, locations[4 * i]);
-    EXPECT_EQ(i * 20 + 5, locations[4 * i + 1]);
-    EXPECT_EQ(i * 20 + 1, locations[4 * i + 2]);
-    EXPECT_EQ(i * 20 + 13, locations[4 * i + 3]);
-  }
-}
-
-TEST_F(LinearMergerTest, EmptyResultTest) {
-  vector<int> locations;
-  vector<int> symbols{1, -1, 2};
-  Phrase phrase = phrase_builder->Build(symbols);
-  vector<int> suffix_symbols{-1, 2};
-  Phrase suffix = phrase_builder->Build(suffix_symbols);
-
-  vector<int> prefix_locs;
-  for (int i = 0; i < 100; ++i) {
-    prefix_locs.push_back(i * 200 + 1);
-  }
-  vector<int> suffix_locs;
-  for (int i = 0; i < 100; ++i) {
-    suffix_locs.push_back(i * 200 + 101);
-  }
-
-  linear_merger->Merge(locations, phrase, suffix, prefix_locs.begin(),
-      prefix_locs.end(), suffix_locs.begin(), suffix_locs.end(), 1, 1);
-
-  EXPECT_EQ(0, locations.size());
-}
-
-}  // namespace
diff --git a/extractor/matching.cc b/extractor/matching.cc
deleted file mode 100644
index 16a3ed6f..00000000
--- a/extractor/matching.cc
+++ /dev/null
@@ -1,12 +0,0 @@
-#include "matching.h"
-
-Matching::Matching(vector<int>::iterator start, int len, int sentence_id) :
-    positions(start, start + len), sentence_id(sentence_id) {}
-
-vector<int> Matching::Merge(const Matching& other, int num_subpatterns) const {
-  vector<int> result = positions;
-  if (num_subpatterns > positions.size()) {
-    result.push_back(other.positions.back());
-  }
-  return result;
-}
diff --git a/extractor/matching.h b/extractor/matching.h
deleted file mode 100644
index 4c46559e..00000000
--- a/extractor/matching.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef _MATCHING_H_
-#define _MATCHING_H_
-
-#include <memory>
-#include <vector>
-
-using namespace std;
-
-struct Matching {
-  Matching(vector<int>::iterator start, int len, int sentence_id);
-
-  vector<int> Merge(const Matching& other, int num_subpatterns) const;
-
-  vector<int> positions;
-  int sentence_id;
-};
-
-#endif
diff --git a/extractor/matching_comparator.cc b/extractor/matching_comparator.cc
deleted file mode 100644
index 03db95c0..00000000
--- a/extractor/matching_comparator.cc
+++ /dev/null
@@ -1,46 +0,0 @@
-#include "matching_comparator.h"
-
-#include "matching.h"
-#include "vocabulary.h"
-
-MatchingComparator::MatchingComparator(int min_gap_size, int max_rule_span) :
-    min_gap_size(min_gap_size), max_rule_span(max_rule_span) {}
-
-int MatchingComparator::Compare(const Matching& left,
-                                const Matching& right,
-                                int last_chunk_len,
-                                bool offset) const {
-  if (left.sentence_id != right.sentence_id) {
-    return left.sentence_id < right.sentence_id ? -1 : 1;
-  }
-
-  if (left.positions.size() == 1 && right.positions.size() == 1) {
-    // The prefix and the suffix must be separated by a non-terminal, otherwise
-    // we would be doing a suffix array lookup.
-    if (right.positions[0] - left.positions[0] <= min_gap_size) {
-      return 1;
-    }
-  } else if (offset) {
-    for (size_t i = 1; i < left.positions.size(); ++i) {
-      if (left.positions[i] != right.positions[i - 1]) {
-        return left.positions[i] < right.positions[i - 1] ? -1 : 1;
-      }
-    }
-  } else {
-    if (left.positions[0] + 1 != right.positions[0]) {
-      return left.positions[0] + 1 < right.positions[0] ? -1 : 1;
-    }
-    for (size_t i = 1; i < left.positions.size(); ++i) {
-      if (left.positions[i] != right.positions[i]) {
-        return left.positions[i] < right.positions[i] ? -1 : 1;
-      }
-    }
-  }
-
-  if (right.positions.back() + last_chunk_len - left.positions.front() >
-      max_rule_span) {
-    return -1;
-  }
-
-  return 0;
-}
diff --git a/extractor/matching_comparator.h b/extractor/matching_comparator.h
deleted file mode 100644
index 6e1bb487..00000000
--- a/extractor/matching_comparator.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef _MATCHING_COMPARATOR_H_
-#define _MATCHING_COMPARATOR_H_
-
-#include <memory>
-
-using namespace std;
-
-class Vocabulary;
-class Matching;
-
-class MatchingComparator {
- public:
-  MatchingComparator(int min_gap_size, int max_rule_span);
-
-  int Compare(const Matching& left, const Matching& right,
-              int last_chunk_len, bool offset) const;
-
- private:
-  int min_gap_size;
-  int max_rule_span;
-};
-
-#endif
diff --git a/extractor/matching_comparator_test.cc b/extractor/matching_comparator_test.cc
deleted file mode 100644
index b8f898cf..00000000
--- a/extractor/matching_comparator_test.cc
+++ /dev/null
@@ -1,139 +0,0 @@
-#include <gtest/gtest.h>
-
-#include "matching.h"
-#include "matching_comparator.h"
-
-using namespace ::testing;
-
-namespace {
-
-class MatchingComparatorTest : public Test {
- protected:
-  virtual void SetUp() {
-    comparator = make_shared<MatchingComparator>(1, 20);
-  }
-
-  shared_ptr<MatchingComparator> comparator;
-};
-
-TEST_F(MatchingComparatorTest, SmallerSentenceId) {
-  vector<int> left_locations{1};
-  Matching left(left_locations.begin(), 1, 1);
-  vector<int> right_locations{100};
-  Matching right(right_locations.begin(), 1, 5);
-  EXPECT_EQ(-1, comparator->Compare(left, right, 1, true));
-}
-
-TEST_F(MatchingComparatorTest, GreaterSentenceId) {
-  vector<int> left_locations{100};
-  Matching left(left_locations.begin(), 1, 5);
-  vector<int> right_locations{1};
-  Matching right(right_locations.begin(), 1, 1);
-  EXPECT_EQ(1, comparator->Compare(left, right, 1, true));
-}
-
-TEST_F(MatchingComparatorTest, SmalleraXb) {
-  vector<int> left_locations{1};
-  Matching left(left_locations.begin(), 1, 1);
-  vector<int> right_locations{21};
-  Matching right(right_locations.begin(), 1, 1);
-  // The matching exceeds the max rule span.
-  EXPECT_EQ(-1, comparator->Compare(left, right, 1, true));
-}
-
-TEST_F(MatchingComparatorTest, EqualaXb) {
-  vector<int> left_locations{1};
-  Matching left(left_locations.begin(), 1, 1);
-  vector<int> lower_right_locations{3};
-  Matching right(lower_right_locations.begin(), 1, 1);
-  EXPECT_EQ(0, comparator->Compare(left, right, 1, true));
-
-  vector<int> higher_right_locations{20};
-  right = Matching(higher_right_locations.begin(), 1, 1);
-  EXPECT_EQ(0, comparator->Compare(left, right, 1, true));
-}
-
-TEST_F(MatchingComparatorTest, GreateraXb) {
-  vector<int> left_locations{1};
-  Matching left(left_locations.begin(), 1, 1);
-  vector<int> right_locations{2};
-  Matching right(right_locations.begin(), 1, 1);
-  // The gap between the prefix and the suffix is of size 0, less than the
-  // min gap size.
-  EXPECT_EQ(1, comparator->Compare(left, right, 1, true));
-}
-
-TEST_F(MatchingComparatorTest, SmalleraXbXc) {
-  vector<int> left_locations{1, 3};
-  Matching left(left_locations.begin(), 2, 1);
-  vector<int> right_locations{4, 6};
-  // The common part doesn't match.
-  Matching right(right_locations.begin(), 2, 1);
-  EXPECT_EQ(-1, comparator->Compare(left, right, 1, true));
-
-  // The common part matches, but the rule exceeds the max span.
-  vector<int> other_right_locations{3, 21};
-  right = Matching(other_right_locations.begin(), 2, 1);
-  EXPECT_EQ(-1, comparator->Compare(left, right, 1, true));
-}
-
-TEST_F(MatchingComparatorTest, EqualaXbXc) {
-  vector<int> left_locations{1, 3};
-  Matching left(left_locations.begin(), 2, 1);
-  vector<int> right_locations{3, 5};
-  // The leftmost possible match.
-  Matching right(right_locations.begin(), 2, 1);
-  EXPECT_EQ(0, comparator->Compare(left, right, 1, true));
-
-  // The rightmost possible match.
-  vector<int> other_right_locations{3, 20};
-  right = Matching(other_right_locations.begin(), 2, 1);
-  EXPECT_EQ(0, comparator->Compare(left, right, 1, true));
-}
-
-TEST_F(MatchingComparatorTest, GreateraXbXc) {
-  vector<int> left_locations{1, 4};
-  Matching left(left_locations.begin(), 2, 1);
-  vector<int> right_locations{3, 5};
-  // The common part doesn't match.
-  Matching right(right_locations.begin(), 2, 1);
-  EXPECT_EQ(1, comparator->Compare(left, right, 1, true));
-}
-
-TEST_F(MatchingComparatorTest, SmallerabXcXd) {
-  vector<int> left_locations{9, 13};
-  Matching left(left_locations.begin(), 2, 1);
-  // The suffix doesn't start on the next position.
-  vector<int> right_locations{11, 13, 15};
-  Matching right(right_locations.begin(), 3, 1);
-  EXPECT_EQ(-1, comparator->Compare(left, right, 1, false));
-
-  // The common part doesn't match.
-  vector<int> other_right_locations{10, 16, 18};
-  right = Matching(other_right_locations.begin(), 3, 1);
-  EXPECT_EQ(-1, comparator->Compare(left, right, 1, false));
-}
-
-TEST_F(MatchingComparatorTest, EqualabXcXd) {
-  vector<int> left_locations{10, 13};
-  Matching left(left_locations.begin(), 2, 1);
-  vector<int> right_locations{11, 13, 15};
-  Matching right(right_locations.begin(), 3, 1);
-  EXPECT_EQ(0, comparator->Compare(left, right, 1, false));
-}
-
-TEST_F(MatchingComparatorTest, GreaterabXcXd) {
-  vector<int> left_locations{9, 15};
-  Matching left(left_locations.begin(), 2, 1);
-  // The suffix doesn't start on the next position.
-  vector<int> right_locations{7, 15, 17};
-  Matching right(right_locations.begin(), 3, 1);
-  EXPECT_EQ(1, comparator->Compare(left, right, 1, false));
-
-  // The common part doesn't match.
-  vector<int> other_right_locations{10, 13, 16};
-  right = Matching(other_right_locations.begin(), 3, 1);
-  EXPECT_EQ(1, comparator->Compare(left, right, 1, false));
-}
-
-}  // namespace
diff --git a/extractor/matching_test.cc b/extractor/matching_test.cc
deleted file mode 100644
index 9593aa86..00000000
--- a/extractor/matching_test.cc
+++ /dev/null
@@ -1,25 +0,0 @@
-#include <gtest/gtest.h>
-
-#include <vector>
-
-#include "matching.h"
-
-using namespace std;
-
-namespace {
-
-TEST(MatchingTest, SameSize) {
-  vector<int> positions{1, 2, 3};
-  Matching left(positions.begin(), positions.size(), 0);
-  Matching right(positions.begin(), positions.size(), 0);
-  EXPECT_EQ(positions, left.Merge(right, positions.size()));
-}
-
-TEST(MatchingTest, DifferentSize) {
-  vector<int> positions{1, 2, 3};
-  Matching left(positions.begin(), positions.size() - 1, 0);
-  Matching right(positions.begin() + 1, positions.size() - 1, 0);
-  vector<int> result = left.Merge(right, positions.size());
-}
-
-}  // namespace
diff --git a/extractor/mocks/mock_binary_search_merger.h b/extractor/mocks/mock_binary_search_merger.h
deleted file mode 100644
index e23386f0..00000000
--- a/extractor/mocks/mock_binary_search_merger.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#include <gmock/gmock.h>
-
-#include <vector>
-
-#include "../binary_search_merger.h"
-#include "../phrase.h"
-
-using namespace std;
-
-class MockBinarySearchMerger: public BinarySearchMerger {
- public:
-  MOCK_CONST_METHOD9(Merge, void(vector<int>&, const Phrase&, const Phrase&,
-      const vector<int>::iterator&, const vector<int>::iterator&,
-      const vector<int>::iterator&, const vector<int>::iterator&, int, int));
-};
diff --git a/extractor/mocks/mock_intersector.h b/extractor/mocks/mock_intersector.h
deleted file mode 100644
index 372fa7ea..00000000
--- a/extractor/mocks/mock_intersector.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#include <gmock/gmock.h>
-
-#include "../intersector.h"
-#include "../phrase.h"
-#include "../phrase_location.h"
-
-class MockIntersector : public Intersector {
- public:
-  MOCK_METHOD5(Intersect, PhraseLocation(const Phrase&, PhraseLocation&,
-      const Phrase&, PhraseLocation&, const Phrase&));
-};
diff --git a/extractor/mocks/mock_linear_merger.h b/extractor/mocks/mock_linear_merger.h
deleted file mode 100644
index 522c1f31..00000000
--- a/extractor/mocks/mock_linear_merger.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#include <gmock/gmock.h>
-
-#include <vector>
-
-#include "../linear_merger.h"
-#include "../phrase.h"
-
-using namespace std;
-
-class MockLinearMerger: public LinearMerger {
- public:
-  MOCK_METHOD9(Merge, void(vector<int>&, const Phrase&, const Phrase&,
-      vector<int>::iterator, vector<int>::iterator, vector<int>::iterator,
-      vector<int>::iterator, int, int));
-};
diff --git a/extractor/rule_factory.cc b/extractor/rule_factory.cc
index 4101fcfa..e8b93f83 100644
--- a/extractor/rule_factory.cc
+++ b/extractor/rule_factory.cc
@@ -7,9 +7,7 @@
 
 #include "grammar.h"
 #include "fast_intersector.h"
-#include "intersector.h"
 #include "matchings_finder.h"
-#include "matching_comparator.h"
 #include "phrase.h"
 #include "rule.h"
 #include "rule_extractor.h"
@@ -50,8 +48,6 @@ HieroCachingRuleFactory::HieroCachingRuleFactory(
     int max_nonterminals,
     int max_rule_symbols,
     int max_samples,
-    bool use_fast_intersect,
-    bool use_baeza_yates,
     bool require_tight_phrases) :
     vocabulary(vocabulary),
     scorer(scorer),
@@ -59,13 +55,8 @@ HieroCachingRuleFactory::HieroCachingRuleFactory(
     max_rule_span(max_rule_span),
     max_nonterminals(max_nonterminals),
     max_chunks(max_nonterminals + 1),
-    max_rule_symbols(max_rule_symbols),
-    use_fast_intersect(use_fast_intersect) {
+    max_rule_symbols(max_rule_symbols) {
   matchings_finder = make_shared<MatchingsFinder>(source_suffix_array);
-  shared_ptr<MatchingComparator> comparator =
-      make_shared<MatchingComparator>(min_gap_size, max_rule_span);
-  intersector = make_shared<Intersector>(vocabulary, precomputation,
-      source_suffix_array, comparator, use_baeza_yates);
   fast_intersector = make_shared<FastIntersector>(source_suffix_array,
       precomputation, vocabulary, max_rule_span, min_gap_size);
   phrase_builder = make_shared<PhraseBuilder>(vocabulary);
@@ -78,7 +69,6 @@ HieroCachingRuleFactory::HieroCachingRuleFactory(
 
 HieroCachingRuleFactory::HieroCachingRuleFactory(
     shared_ptr<MatchingsFinder> finder,
-    shared_ptr<Intersector> intersector,
     shared_ptr<FastIntersector> fast_intersector,
     shared_ptr<PhraseBuilder> phrase_builder,
     shared_ptr<RuleExtractor> rule_extractor,
@@ -89,10 +79,8 @@ HieroCachingRuleFactory::HieroCachingRuleFactory(
     int max_rule_span,
     int max_nonterminals,
     int max_chunks,
-    int max_rule_symbols,
-    bool use_fast_intersect) :
+    int max_rule_symbols) :
     matchings_finder(finder),
-    intersector(intersector),
     fast_intersector(fast_intersector),
     phrase_builder(phrase_builder),
     rule_extractor(rule_extractor),
@@ -103,15 +91,13 @@ HieroCachingRuleFactory::HieroCachingRuleFactory(
     max_rule_span(max_rule_span),
     max_nonterminals(max_nonterminals),
     max_chunks(max_chunks),
-    max_rule_symbols(max_rule_symbols),
-    use_fast_intersect(use_fast_intersect) {}
+    max_rule_symbols(max_rule_symbols) {}
 
 HieroCachingRuleFactory::HieroCachingRuleFactory() {}
 
 HieroCachingRuleFactory::~HieroCachingRuleFactory() {}
 
 Grammar HieroCachingRuleFactory::GetGrammar(const vector<int>& word_ids) {
-  intersector->sort_time = 0;
   Clock::time_point start_time = Clock::now();
   double total_extract_time = 0;
   double total_intersect_time = 0;
@@ -164,17 +150,8 @@ Grammar HieroCachingRuleFactory::GetGrammar(const vector<int>& word_ids) {
         PhraseLocation phrase_location;
         if (next_phrase.Arity() > 0) {
           Clock::time_point intersect_start = Clock::now();
-          if (use_fast_intersect) {
-            phrase_location = fast_intersector->Intersect(
-                node->matchings, next_suffix_link->matchings, next_phrase);
-          } else {
-            phrase_location = intersector->Intersect(
-                node->phrase,
-                node->matchings,
-                next_suffix_link->phrase,
-                next_suffix_link->matchings,
-                next_phrase);
-          }
+          phrase_location = fast_intersector->Intersect(
+              node->matchings, next_suffix_link->matchings, next_phrase);
           Clock::time_point intersect_stop = Clock::now();
           total_intersect_time += GetDuration(intersect_start, intersect_stop);
         } else {
diff --git a/extractor/rule_factory.h b/extractor/rule_factory.h
index a39386a8..2d1c7f5a 100644
--- a/extractor/rule_factory.h
+++ b/extractor/rule_factory.h
@@ -14,7 +14,6 @@ class DataArray;
 class Grammar;
 class MatchingsFinder;
 class FastIntersector;
-class Intersector;
 class Precomputation;
 class Rule;
 class RuleExtractor;
@@ -38,14 +37,11 @@ class HieroCachingRuleFactory {
       int max_nonterminals,
       int max_rule_symbols,
       int max_samples,
-      bool use_fast_intersect,
-      bool use_beaza_yates,
       bool require_tight_phrases);
 
   // For testing only.
   HieroCachingRuleFactory(
       shared_ptr<MatchingsFinder> finder,
-      shared_ptr<Intersector> intersector,
       shared_ptr<FastIntersector> fast_intersector,
       shared_ptr<PhraseBuilder> phrase_builder,
       shared_ptr<RuleExtractor> rule_extractor,
@@ -56,8 +52,7 @@ class HieroCachingRuleFactory {
       int max_rule_span,
       int max_nonterminals,
       int max_chunks,
-      int max_rule_symbols,
-      bool use_fast_intersect);
+      int max_rule_symbols);
 
   virtual ~HieroCachingRuleFactory();
 
@@ -83,7 +78,6 @@ class HieroCachingRuleFactory {
                             const shared_ptr<TrieNode>& node);
 
   shared_ptr<MatchingsFinder> matchings_finder;
-  shared_ptr<Intersector> intersector;
   shared_ptr<FastIntersector> fast_intersector;
   MatchingsTrie trie;
   shared_ptr<PhraseBuilder> phrase_builder;
@@ -96,7 +90,6 @@ class HieroCachingRuleFactory {
   int max_nonterminals;
   int max_chunks;
   int max_rule_symbols;
-  bool use_fast_intersect;
 };
 
 #endif
diff --git a/extractor/rule_factory_test.cc b/extractor/rule_factory_test.cc
index d329382a..fc709461 100644
--- a/extractor/rule_factory_test.cc
+++ b/extractor/rule_factory_test.cc
@@ -6,7 +6,6 @@
 
 #include "grammar.h"
 #include "mocks/mock_fast_intersector.h"
-#include "mocks/mock_intersector.h"
 #include "mocks/mock_matchings_finder.h"
 #include "mocks/mock_rule_extractor.h"
 #include "mocks/mock_sampler.h"
@@ -25,7 +24,6 @@ class RuleFactoryTest : public Test {
  protected:
   virtual void SetUp() {
     finder = make_shared<MockMatchingsFinder>();
-    intersector = make_shared<MockIntersector>();
     fast_intersector = make_shared<MockFastIntersector>();
 
     vocabulary = make_shared<MockVocabulary>();
@@ -55,7 +53,6 @@ class RuleFactoryTest : public Test {
 
   vector<string> feature_names;
   shared_ptr<MockMatchingsFinder> finder;
-  shared_ptr<MockIntersector> intersector;
   shared_ptr<MockFastIntersector> fast_intersector;
   shared_ptr<MockVocabulary> vocabulary;
   shared_ptr<PhraseBuilder> phrase_builder;
@@ -66,81 +63,39 @@ class RuleFactoryTest : public Test {
 };
 
 TEST_F(RuleFactoryTest, TestGetGrammarDifferentWords) {
-  factory = make_shared<HieroCachingRuleFactory>(finder, intersector,
-      fast_intersector, phrase_builder, extractor, vocabulary, sampler,
-      scorer, 1, 10, 2, 3, 5, false);
+  factory = make_shared<HieroCachingRuleFactory>(finder, fast_intersector,
+      phrase_builder, extractor, vocabulary, sampler, scorer, 1, 10, 2, 3, 5);
 
   EXPECT_CALL(*finder, Find(_, _, _))
       .Times(6)
       .WillRepeatedly(Return(PhraseLocation(0, 1)));
 
-  EXPECT_CALL(*intersector, Intersect(_, _, _, _, _))
+  EXPECT_CALL(*fast_intersector, Intersect(_, _, _))
       .Times(1)
       .WillRepeatedly(Return(PhraseLocation(0, 1)));
-  EXPECT_CALL(*fast_intersector, Intersect(_, _, _)).Times(0);
 
   vector<int> word_ids = {2, 3, 4};
   Grammar grammar = factory->GetGrammar(word_ids);
   EXPECT_EQ(feature_names, grammar.GetFeatureNames());
   EXPECT_EQ(7, grammar.GetRules().size());
-
-  // Test for fast intersector.
-  factory = make_shared<HieroCachingRuleFactory>(finder, intersector,
-      fast_intersector, phrase_builder, extractor, vocabulary, sampler,
-      scorer, 1, 10, 2, 3, 5, true);
-
-  EXPECT_CALL(*finder, Find(_, _, _))
-      .Times(6)
-      .WillRepeatedly(Return(PhraseLocation(0, 1)));
-
-  EXPECT_CALL(*fast_intersector, Intersect(_, _, _))
-      .Times(1)
-      .WillRepeatedly(Return(PhraseLocation(0, 1)));
-  EXPECT_CALL(*intersector, Intersect(_, _, _, _, _)).Times(0);
-
-  grammar = factory->GetGrammar(word_ids);
-  EXPECT_EQ(feature_names, grammar.GetFeatureNames());
-  EXPECT_EQ(7, grammar.GetRules().size());
 }
 
 TEST_F(RuleFactoryTest, TestGetGrammarRepeatingWords) {
-  factory = make_shared<HieroCachingRuleFactory>(finder, intersector,
-      fast_intersector, phrase_builder, extractor, vocabulary, sampler,
-      scorer, 1, 10, 2, 3, 5, false);
+  factory = make_shared<HieroCachingRuleFactory>(finder, fast_intersector,
+      phrase_builder, extractor, vocabulary, sampler, scorer, 1, 10, 2, 3, 5);
 
   EXPECT_CALL(*finder, Find(_, _, _))
       .Times(12)
       .WillRepeatedly(Return(PhraseLocation(0, 1)));
 
-  EXPECT_CALL(*intersector, Intersect(_, _, _, _, _))
+  EXPECT_CALL(*fast_intersector, Intersect(_, _, _))
       .Times(16)
       .WillRepeatedly(Return(PhraseLocation(0, 1)));
 
-  EXPECT_CALL(*fast_intersector, Intersect(_, _, _)).Times(0);
-
   vector<int> word_ids = {2, 3, 4, 2, 3};
   Grammar grammar = factory->GetGrammar(word_ids);
   EXPECT_EQ(feature_names, grammar.GetFeatureNames());
   EXPECT_EQ(28, grammar.GetRules().size());
-
-  // Test for fast intersector.
-  factory = make_shared<HieroCachingRuleFactory>(finder, intersector,
-      fast_intersector, phrase_builder, extractor, vocabulary, sampler,
-      scorer, 1, 10, 2, 3, 5, true);
-
-  EXPECT_CALL(*finder, Find(_, _, _))
-      .Times(12)
-      .WillRepeatedly(Return(PhraseLocation(0, 1)));
-
-  EXPECT_CALL(*fast_intersector, Intersect(_, _, _))
-      .Times(16)
-      .WillRepeatedly(Return(PhraseLocation(0, 1)));
-
-  EXPECT_CALL(*intersector, Intersect(_, _, _, _, _)).Times(0);
-
-  grammar = factory->GetGrammar(word_ids);
-  EXPECT_EQ(feature_names, grammar.GetFeatureNames());
-  EXPECT_EQ(28, grammar.GetRules().size());
 }
 
 }  // namespace
diff --git a/extractor/run_extractor.cc b/extractor/run_extractor.cc
index 38f10a5f..d73f77cd 100644
--- a/extractor/run_extractor.cc
+++ b/extractor/run_extractor.cc
@@ -58,13 +58,9 @@ int main(int argc, char** argv) {
         "Minimum number of occurences for a pharse to be considered frequent")
     ("max_samples", po::value<int>()->default_value(300),
         "Maximum number of samples")
-    ("fast_intersect", po::value<bool>()->default_value(false),
-        "Enable fast intersect")
     // TODO(pauldb): Check if this works when set to false.
     ("tight_phrases", po::value<bool>()->default_value(true),
-        "False if phrases may be loose (better, but slower)")
-    ("baeza_yates", po::value<bool>()->default_value(true),
-        "Use double binary search");
+        "False if phrases may be loose (better, but slower)");
 
   po::variables_map vm;
   po::store(po::parse_command_line(argc, argv, desc), vm);
@@ -170,8 +166,6 @@ int main(int argc, char** argv) {
       vm["max_nonterminals"].as<int>(),
       vm["max_rule_symbols"].as<int>(),
       vm["max_samples"].as<int>(),
-      vm["fast_intersect"].as<bool>(),
-      vm["baeza_yates"].as<bool>(),
       vm["tight_phrases"].as<bool>());
 
   int grammar_id = 0;
diff --git a/extractor/veb.cc b/extractor/veb.cc
deleted file mode 100644
index f38f672e..00000000
--- a/extractor/veb.cc
+++ /dev/null
@@ -1,25 +0,0 @@
-#include "veb.h"
-
-#include "veb_bitset.h"
-#include "veb_tree.h"
-
-int VEB::MIN_BOTTOM_BITS = 5;
-int VEB::MIN_BOTTOM_SIZE = 1 << VEB::MIN_BOTTOM_BITS;
-
-shared_ptr<VEB> VEB::Create(int size) {
-  if (size > MIN_BOTTOM_SIZE) {
-    return shared_ptr<VEB>(new VEBTree(size));
-  } else {
-    return shared_ptr<VEB>(new VEBBitset(size));
-  }
-}
-
-int VEB::GetMinimum() {
-  return min;
-}
-
-int VEB::GetMaximum() {
-  return max;
-}
-
-VEB::VEB(int min, int max) : min(min), max(max) {}
diff --git a/extractor/veb.h b/extractor/veb.h
deleted file mode 100644
index c8209cf7..00000000
--- a/extractor/veb.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef _VEB_H_
-#define _VEB_H_
-
-#include <memory>
-
-using namespace std;
-
-class VEB {
- public:
-  static shared_ptr<VEB> Create(int size);
-
-  virtual void Insert(int value) = 0;
-
-  virtual int GetSuccessor(int value) = 0;
-
-  int GetMinimum();
-
-  int GetMaximum();
-
-  static int MIN_BOTTOM_BITS;
-  static int MIN_BOTTOM_SIZE;
-
- protected:
-  VEB(int min = -1, int max = -1);
-
-  int min, max;
-};
-
-#endif
diff --git a/extractor/veb_bitset.cc b/extractor/veb_bitset.cc
deleted file mode 100644
index 4e364cc5..00000000
--- a/extractor/veb_bitset.cc
+++ /dev/null
@@ -1,25 +0,0 @@
-#include "veb_bitset.h"
-
-using namespace std;
-
-VEBBitset::VEBBitset(int size) : bitset(size) {
-  min = max = -1;
-}
-
-void VEBBitset::Insert(int value) {
-  bitset[value] = 1;
-  if (min == -1 || value < min) {
-    min = value;
-  }
-  if (max == - 1 || value > max) {
-    max = value;
-  }
-}
-
-int VEBBitset::GetSuccessor(int value) {
-  int next_value = bitset.find_next(value);
-  if (next_value == bitset.npos) {
-    return -1;
-  }
-  return next_value;
-}
diff --git a/extractor/veb_bitset.h b/extractor/veb_bitset.h
deleted file mode 100644
index f8a91234..00000000
--- a/extractor/veb_bitset.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef _VEB_BITSET_H_
-#define _VEB_BITSET_H_
-
-#include <boost/dynamic_bitset.hpp>
-
-#include "veb.h"
-
-class VEBBitset: public VEB {
- public:
-  VEBBitset(int size);
-
-  void Insert(int value);
-
-  int GetMinimum();
-
-  int GetSuccessor(int value);
-
- private:
-  boost::dynamic_bitset<> bitset;
-};
-
-#endif
diff --git a/extractor/veb_test.cc b/extractor/veb_test.cc
deleted file mode 100644
index c40c9f28..00000000
--- a/extractor/veb_test.cc
+++ /dev/null
@@ -1,56 +0,0 @@
-#include <gtest/gtest.h>
-
-#include <algorithm>
-#include <vector>
-
-#include "veb.h"
-
-using namespace std;
-
-namespace {
-
-class VEBTest : public ::testing::Test {
- protected:
-  void VEBSortTester(vector<int> values, int max_value) {
-    shared_ptr<VEB> veb = VEB::Create(max_value);
-    for (int value: values) {
-      veb->Insert(value);
-    }
-
-    sort(values.begin(), values.end());
-    EXPECT_EQ(values.front(), veb->GetMinimum());
-    EXPECT_EQ(values.back(), veb->GetMaximum());
-    for (size_t i = 0; i + 1 < values.size(); ++i) {
-      EXPECT_EQ(values[i + 1], veb->GetSuccessor(values[i]));
-    }
-    EXPECT_EQ(-1, veb->GetSuccessor(values.back()));
-  }
-};
-
-TEST_F(VEBTest, SmallRange) {
-  vector<int> values{8, 13, 5, 1, 4, 15, 2, 10, 6, 7};
-  VEBSortTester(values, 16);
-}
-
-TEST_F(VEBTest, MediumRange) {
-  vector<int> values{167, 243, 88, 12, 137, 199, 212, 45, 150, 189};
-  VEBSortTester(values, 255);
-}
-
-TEST_F(VEBTest, LargeRangeSparse) {
-  vector<int> values;
-  for (size_t i = 0; i < 100; ++i) {
-    values.push_back(i * 1000000);
-  }
-  VEBSortTester(values, 100000000);
-}
-
-TEST_F(VEBTest, LargeRangeDense) {
-  vector<int> values;
-  for (size_t i = 0; i < 1000000; ++i) {
-    values.push_back(i);
-  }
-  VEBSortTester(values, 1000000);
-}
-
-}  // namespace
diff --git a/extractor/veb_tree.cc b/extractor/veb_tree.cc
deleted file mode 100644
index f8945445..00000000
--- a/extractor/veb_tree.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <cmath>
-
-#include "veb_tree.h"
-
-VEBTree::VEBTree(int size) {
-  int num_bits = ceil(log2(size));
-
-  lower_bits = num_bits >> 1;
-  upper_size = (size >> lower_bits) + 1;
-
-  clusters.reserve(upper_size);
-  clusters.resize(upper_size);
-}
-
-int VEBTree::GetNextValue(int value) {
-  return value & ((1 << lower_bits) - 1);
-}
-
-int VEBTree::GetCluster(int value) {
-  return value >> lower_bits;
-}
-
-int VEBTree::Compose(int cluster, int value) {
-  return (cluster << lower_bits) + value;
-}
-
-void VEBTree::Insert(int value) {
-  if (min == -1 && max == -1) {
-    min = max = value;
-    return;
-  }
-
-  if (value < min) {
-    swap(min, value);
-  }
-
-  int cluster = GetCluster(value), next_value = GetNextValue(value);
-  if (clusters[cluster] == NULL) {
-    clusters[cluster] = VEB::Create(1 << lower_bits);
-    if (summary == NULL) {
-      summary = VEB::Create(upper_size);
-    }
-    summary->Insert(cluster);
-  }
-  clusters[cluster]->Insert(next_value);
-
-  if (value > max) {
-    max = value;
-  }
-}
-
-int VEBTree::GetSuccessor(int value) {
-  if (value >= max) {
-    return -1;
-  }
-  if (value < min) {
-    return min;
-  }
-
-  int cluster = GetCluster(value), next_value = GetNextValue(value);
-  if (clusters[cluster] != NULL &&
-      next_value < clusters[cluster]->GetMaximum()) {
-    return Compose(cluster, clusters[cluster]->GetSuccessor(next_value));
-  } else {
-    int next_cluster = summary->GetSuccessor(cluster);
-    if (next_cluster == -1) {
-      return -1;
-    }
-    return Compose(next_cluster, clusters[next_cluster]->GetMinimum());
-  }
-}
diff --git a/extractor/veb_tree.h b/extractor/veb_tree.h
deleted file mode 100644
index 578d3e6a..00000000
--- a/extractor/veb_tree.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef _VEB_TREE_H_
-#define _VEB_TREE_H_
-
-#include <memory>
-#include <vector>
-
-using namespace std;
-
-#include "veb.h"
-
-class VEBTree: public VEB {
- public:
-  VEBTree(int size);
-
-  void Insert(int value);
-
-  int GetSuccessor(int value);
-
- private:
-  int GetNextValue(int value);
-  int GetCluster(int value);
-  int Compose(int cluster, int value);
-
-  int lower_bits, upper_size;
-  shared_ptr<VEB> summary;
-  vector<shared_ptr<VEB> > clusters;
-};
-
-#endif
author	Paul Baltescu <pauldb89@gmail.com>	2013-02-22 11:59:17 +0000
committer	Paul Baltescu <pauldb89@gmail.com>	2013-02-22 11:59:17 +0000
commit	d69c289e172562039bcbe987657280332ab6315e (patch)
tree	cc67d97bf802101d981cfacadb1cea80472aa107 /extractor
parent	a21835881984ba90f303c7b1681d295678cb7131 (diff)