diff options
Diffstat (limited to 'extractor')
| -rw-r--r-- | extractor/fast_intersector.cc | 11 | ||||
| -rw-r--r-- | extractor/fast_intersector_test.cc | 8 | ||||
| -rw-r--r-- | extractor/mocks/mock_precomputation.h | 2 | ||||
| -rw-r--r-- | extractor/precomputation.cc | 138 | ||||
| -rw-r--r-- | extractor/precomputation.h | 49 | ||||
| -rw-r--r-- | extractor/precomputation_test.cc | 143 | 
6 files changed, 152 insertions, 199 deletions
diff --git a/extractor/fast_intersector.cc b/extractor/fast_intersector.cc index 5360c1da..a8591a72 100644 --- a/extractor/fast_intersector.cc +++ b/extractor/fast_intersector.cc @@ -20,13 +20,10 @@ FastIntersector::FastIntersector(shared_ptr<SuffixArray> suffix_array,      vocabulary(vocabulary),      max_rule_span(max_rule_span),      min_gap_size(min_gap_size) { -  auto precomputed_collocations = precomputation->GetCollocations(); -  for (auto item: precomputed_collocations) { -    vector<int> phrase = ConvertPhrase(item.first); -    vector<int> location = item.second; -    vector<int>& phrase_collocations = collocations[phrase]; -    phrase_collocations.insert(phrase_collocations.end(), location.begin(), -                               location.end()); +  Index precomputed_collocations = precomputation->GetCollocations(); +  for (pair<vector<int>, vector<int>> entry: precomputed_collocations) { +    vector<int> phrase = ConvertPhrase(entry.first); +    collocations[phrase] = entry.second;    }  } diff --git a/extractor/fast_intersector_test.cc b/extractor/fast_intersector_test.cc index 2e618b63..76c3aaea 100644 --- a/extractor/fast_intersector_test.cc +++ b/extractor/fast_intersector_test.cc @@ -60,14 +60,14 @@ class FastIntersectorTest : public Test {      precomputation = make_shared<MockPrecomputation>();      EXPECT_CALL(*precomputation, GetCollocations()) -        .WillRepeatedly(Return(collocations)); +        .WillRepeatedly(ReturnRef(collocations));      phrase_builder = make_shared<PhraseBuilder>(vocabulary);      intersector = make_shared<FastIntersector>(suffix_array, precomputation,                                                 vocabulary, 15, 1);    } -  Collocations collocations; +  Index collocations;    shared_ptr<MockDataArray> data_array;    shared_ptr<MockSuffixArray> suffix_array;    shared_ptr<MockPrecomputation> precomputation; @@ -82,9 +82,9 @@ TEST_F(FastIntersectorTest, TestCachedCollocation) {    Phrase phrase = phrase_builder->Build(symbols);    PhraseLocation prefix_location(15, 16), suffix_location(16, 17); -  collocations.push_back(make_pair(symbols, expected_location)); +  collocations[symbols] = expected_location;    EXPECT_CALL(*precomputation, GetCollocations()) -      .WillRepeatedly(Return(collocations)); +      .WillRepeatedly(ReturnRef(collocations));    intersector = make_shared<FastIntersector>(suffix_array, precomputation,                                               vocabulary, 15, 1); diff --git a/extractor/mocks/mock_precomputation.h b/extractor/mocks/mock_precomputation.h index 86f4ce27..8753343e 100644 --- a/extractor/mocks/mock_precomputation.h +++ b/extractor/mocks/mock_precomputation.h @@ -6,7 +6,7 @@ namespace extractor {  class MockPrecomputation : public Precomputation {   public: -  MOCK_CONST_METHOD0(GetCollocations, Collocations()); +  MOCK_CONST_METHOD0(GetCollocations, const Index&());  };  } // namespace extractor diff --git a/extractor/precomputation.cc b/extractor/precomputation.cc index 37dbf7b7..3b8aed69 100644 --- a/extractor/precomputation.cc +++ b/extractor/precomputation.cc @@ -14,65 +14,63 @@ int Precomputation::FIRST_NONTERMINAL = -1;  int Precomputation::SECOND_NONTERMINAL = -2;  Precomputation::Precomputation( -    shared_ptr<SuffixArray> suffix_array, int num_frequent_phrases, -    int num_super_frequent_phrases, int max_rule_span, +    shared_ptr<SuffixArray> suffix_array, int num_frequent_patterns, +    int num_super_frequent_patterns, int max_rule_span,      int max_rule_symbols, int min_gap_size,      int max_frequent_phrase_len, int min_frequency) {    vector<int> data = suffix_array->GetData()->GetData(); -  vector<vector<int>> frequent_phrases = FindMostFrequentPhrases( -      suffix_array, data, num_frequent_phrases, max_frequent_phrase_len, +  vector<vector<int>> frequent_patterns = FindMostFrequentPatterns( +      suffix_array, data, num_frequent_patterns, max_frequent_phrase_len,        min_frequency);    // Construct sets containing the frequent and superfrequent contiguous    // collocations. -  unordered_set<vector<int>, VectorHash> frequent_phrases_set; -  unordered_set<vector<int>, VectorHash> super_frequent_phrases_set; -  for (size_t i = 0; i < frequent_phrases.size(); ++i) { -    frequent_phrases_set.insert(frequent_phrases[i]); -    if (i < num_super_frequent_phrases) { -      super_frequent_phrases_set.insert(frequent_phrases[i]); +  unordered_set<vector<int>, VectorHash> frequent_patterns_set; +  unordered_set<vector<int>, VectorHash> super_frequent_patterns_set; +  for (size_t i = 0; i < frequent_patterns.size(); ++i) { +    frequent_patterns_set.insert(frequent_patterns[i]); +    if (i < num_super_frequent_patterns) { +      super_frequent_patterns_set.insert(frequent_patterns[i]);      }    } -  vector<tuple<int, int, int>> locations; +  vector<tuple<int, int, int>> matchings;    for (size_t i = 0; i < data.size(); ++i) { -    // If the sentence is over, add all the discontiguous frequent phrases to -    // the list. +    // If the sentence is over, add all the discontiguous frequent patterns to +    // the index.      if (data[i] == DataArray::END_OF_LINE) { -      AddCollocations(locations, data, max_rule_span, min_gap_size, +      AddCollocations(matchings, data, max_rule_span, min_gap_size,            max_rule_symbols); -      locations.clear(); +      matchings.clear();        continue;      } -    vector<int> phrase; -    // Find all the contiguous frequent phrases starting at position i. +    vector<int> pattern; +    // Find all the contiguous frequent patterns starting at position i.      for (int j = 1; j <= max_frequent_phrase_len && i + j <= data.size(); ++j) { -      phrase.push_back(data[i + j - 1]); -      if (frequent_phrases_set.count(phrase)) { -        int is_super_frequent = super_frequent_phrases_set.count(phrase); -        locations.push_back(make_tuple(i, j, is_super_frequent)); +      pattern.push_back(data[i + j - 1]); +      if (frequent_patterns_set.count(pattern)) { +        int is_super_frequent = super_frequent_patterns_set.count(pattern); +        matchings.push_back(make_tuple(i, j, is_super_frequent));        } else { -        // If the current phrase is not frequent, any longer phrase having the -        // current phrase as prefix will not be frequent. +        // If the current pattern is not frequent, any longer pattern having the +        // current pattern as prefix will not be frequent.          break;        }      }    } - -  collocations.shrink_to_fit();  }  Precomputation::Precomputation() {}  Precomputation::~Precomputation() {} -vector<vector<int>> Precomputation::FindMostFrequentPhrases( +vector<vector<int>> Precomputation::FindMostFrequentPatterns(      shared_ptr<SuffixArray> suffix_array, const vector<int>& data, -    int num_frequent_phrases, int max_frequent_phrase_len, int min_frequency) { +    int num_frequent_patterns, int max_frequent_phrase_len, int min_frequency) {    vector<int> lcp = suffix_array->BuildLCPArray();    vector<int> run_start(max_frequent_phrase_len); -  // Find all the phrases occurring at least min_frequency times. +  // Find all the patterns occurring at least min_frequency times.    priority_queue<pair<int, pair<int, int>>> heap;    for (size_t i = 1; i < lcp.size(); ++i) {      for (int len = lcp[i]; len < max_frequent_phrase_len; ++len) { @@ -85,34 +83,34 @@ vector<vector<int>> Precomputation::FindMostFrequentPhrases(      }    } -  // Extract the most frequent phrases. -  vector<vector<int>> frequent_phrases; -  while (frequent_phrases.size() < num_frequent_phrases && !heap.empty()) { +  // Extract the most frequent patterns. +  vector<vector<int>> frequent_patterns; +  while (frequent_patterns.size() < num_frequent_patterns && !heap.empty()) {      int start = heap.top().second.first;      int len = heap.top().second.second;      heap.pop(); -    vector<int> phrase(data.begin() + start, data.begin() + start + len); -    if (find(phrase.begin(), phrase.end(), DataArray::END_OF_LINE) == -        phrase.end()) { -      frequent_phrases.push_back(phrase); +    vector<int> pattern(data.begin() + start, data.begin() + start + len); +    if (find(pattern.begin(), pattern.end(), DataArray::END_OF_LINE) == +        pattern.end()) { +      frequent_patterns.push_back(pattern);      }    } -  return frequent_phrases; +  return frequent_patterns;  }  void Precomputation::AddCollocations( -    const vector<tuple<int, int, int>>& locations, const vector<int>& data, +    const vector<tuple<int, int, int>>& matchings, const vector<int>& data,      int max_rule_span, int min_gap_size, int max_rule_symbols) { -  // Select the leftmost subphrase. -  for (size_t i = 0; i < locations.size(); ++i) { +  // Select the leftmost subpattern. +  for (size_t i = 0; i < matchings.size(); ++i) {      int start1, size1, is_super1; -    tie(start1, size1, is_super1) = locations[i]; +    tie(start1, size1, is_super1) = matchings[i]; -    // Select the second (middle) subphrase -    for (size_t j = i + 1; j < locations.size(); ++j) { +    // Select the second (middle) subpattern +    for (size_t j = i + 1; j < matchings.size(); ++j) {        int start2, size2, is_super2; -      tie(start2, size2, is_super2) = locations[j]; +      tie(start2, size2, is_super2) = matchings[j];        if (start2 - start1 >= max_rule_span) {          break;        } @@ -120,21 +118,20 @@ void Precomputation::AddCollocations(        if (start2 - start1 - size1 >= min_gap_size            && start2 + size2 - start1 <= max_rule_span            && size1 + size2 + 1 <= max_rule_symbols) { -        vector<int> collocation(data.begin() + start1, +        vector<int> pattern(data.begin() + start1,              data.begin() + start1 + size1); -        collocation.push_back(Precomputation::FIRST_NONTERMINAL); -        collocation.insert(collocation.end(), data.begin() + start2, +        pattern.push_back(Precomputation::FIRST_NONTERMINAL); +        pattern.insert(pattern.end(), data.begin() + start2,              data.begin() + start2 + size2); - -        AddCollocation(collocation, GetLocation(start1, start2)); +        AddStartPositions(collocations[pattern], start1, start2);          // Try extending the binary collocation to a ternary collocation.          if (is_super2) { -          collocation.push_back(Precomputation::SECOND_NONTERMINAL); -          // Select the rightmost subphrase. -          for (size_t k = j + 1; k < locations.size(); ++k) { +          pattern.push_back(Precomputation::SECOND_NONTERMINAL); +          // Select the rightmost subpattern. +          for (size_t k = j + 1; k < matchings.size(); ++k) {              int start3, size3, is_super3; -            tie(start3, size3, is_super3) = locations[k]; +            tie(start3, size3, is_super3) = matchings[k];              if (start3 - start1 >= max_rule_span) {                break;              } @@ -143,12 +140,10 @@ void Precomputation::AddCollocations(                  && start3 + size3 - start1 <= max_rule_span                  && size1 + size2 + size3 + 2 <= max_rule_symbols                  && (is_super1 || is_super3)) { -              collocation.insert(collocation.end(), data.begin() + start3, +              pattern.insert(pattern.end(), data.begin() + start3,                    data.begin() + start3 + size3); - -              AddCollocation(collocation, GetLocation(start1, start2, start3)); - -              collocation.erase(collocation.end() - size3); +              AddStartPositions(collocations[pattern], start1, start2, start3); +              pattern.erase(pattern.end() - size3);              }            }          } @@ -157,29 +152,20 @@ void Precomputation::AddCollocations(    }  } -vector<int> Precomputation::GetLocation(int pos1, int pos2) { -  vector<int> location; -  location.push_back(pos1); -  location.push_back(pos2); -  return location; -} - -vector<int> Precomputation::GetLocation(int pos1, int pos2, int pos3) { -  vector<int> location; -  location.push_back(pos1); -  location.push_back(pos2); -  location.push_back(pos3); -  return location; +void Precomputation::AddStartPositions( +    vector<int>& positions, int pos1, int pos2) { +  positions.push_back(pos1); +  positions.push_back(pos2);  } -void Precomputation::AddCollocation(vector<int> collocation, -                                    vector<int> location) { -  collocation.shrink_to_fit(); -  location.shrink_to_fit(); -  collocations.push_back(make_pair(collocation, location)); +void Precomputation::AddStartPositions( +    vector<int>& positions, int pos1, int pos2, int pos3) { +  positions.push_back(pos1); +  positions.push_back(pos2); +  positions.push_back(pos3);  } -Collocations Precomputation::GetCollocations() const { +const Index& Precomputation::GetCollocations() const {    return collocations;  } diff --git a/extractor/precomputation.h b/extractor/precomputation.h index 0a06349b..9f0c9424 100644 --- a/extractor/precomputation.h +++ b/extractor/precomputation.h @@ -19,18 +19,16 @@ using namespace std;  namespace extractor {  typedef boost::hash<vector<int>> VectorHash; -typedef vector<pair<vector<int>, vector<int>>> Collocations; +typedef unordered_map<vector<int>, vector<int>, VectorHash> Index;  class SuffixArray;  /** - * Data structure containing all the data needed for constructing an index with - * all the occurrences of the most frequent discontiguous collocations in the - * source data. + * Data structure wrapping an index with all the occurrences of the most + * frequent discontiguous collocations in the source data.   * - * Let a, b, c be contiguous phrases. The data structure will contain the - * locations in the source data where every collocation of the following forms - * occurs: + * Let a, b, c be contiguous collocations. The index will contain an entry for + * every collocation of the form:   * - aXb, where a and b are frequent   * - aXbXc, where a and b are super-frequent and c is frequent or   *                b and c are super-frequent and a is frequent. @@ -39,8 +37,8 @@ class Precomputation {   public:    // Constructs the index using the suffix array.    Precomputation( -      shared_ptr<SuffixArray> suffix_array, int num_frequent_phrases, -      int num_super_frequent_phrases, int max_rule_span, +      shared_ptr<SuffixArray> suffix_array, int num_frequent_patterns, +      int num_super_frequent_patterns, int max_rule_span,        int max_rule_symbols, int min_gap_size,        int max_frequent_phrase_len, int min_frequency); @@ -49,9 +47,8 @@ class Precomputation {    virtual ~Precomputation(); -  // Returns the list of the locations of the most frequent collocations in the -  // source data. -  virtual Collocations GetCollocations() const; +  // Returns a reference to the index. +  virtual const Index& GetCollocations() const;    bool operator==(const Precomputation& other) const; @@ -60,29 +57,23 @@ class Precomputation {   private:    // Finds the most frequent contiguous collocations. -  vector<vector<int>> FindMostFrequentPhrases( +  vector<vector<int>> FindMostFrequentPatterns(        shared_ptr<SuffixArray> suffix_array, const vector<int>& data, -      int num_frequent_phrases, int max_frequent_phrase_len, +      int num_frequent_patterns, int max_frequent_phrase_len,        int min_frequency);    // Given the locations of the frequent contiguous collocations in a sentence,    // it adds new entries to the index for each discontiguous collocation    // matching the criteria specified in the class description. -  void AddCollocations(const vector<std::tuple<int, int, int>>& locations, -                       const vector<int>& data, int max_rule_span, -                       int min_gap_size, int max_rule_symbols); +  void AddCollocations( +      const vector<std::tuple<int, int, int>>& matchings, const vector<int>& data, +      int max_rule_span, int min_gap_size, int max_rule_symbols); -  // Creates a vector representation for the location of a binary collocation -  // containing the starting points of each subpattern. -  vector<int> GetLocation(int pos1, int pos2); +  // Adds an occurrence of a binary collocation. +  void AddStartPositions(vector<int>& positions, int pos1, int pos2); -  // Creates a vector representation for the location of a ternary collocation -  // containing the starting points of each subpattern. -  vector<int> GetLocation(int pos1, int pos2, int pos3); - -  // Appends a collocation to the list of collocations after shrinking the -  // vectors to avoid unnecessary memory usage. -  void AddCollocation(vector<int> collocation, vector<int> location); +  // Adds an occurrence of a ternary collocation. +  void AddStartPositions(vector<int>& positions, int pos1, int pos2, int pos3);    friend class boost::serialization::access; @@ -100,13 +91,13 @@ class Precomputation {      for (size_t i = 0; i < num_entries; ++i) {        pair<vector<int>, vector<int>> entry;        ar >> entry; -      collocations.push_back(entry); +      collocations.insert(entry);      }    }    BOOST_SERIALIZATION_SPLIT_MEMBER(); -  Collocations collocations; +  Index collocations;  };  } // namespace extractor diff --git a/extractor/precomputation_test.cc b/extractor/precomputation_test.cc index c6e457fd..e81ece5d 100644 --- a/extractor/precomputation_test.cc +++ b/extractor/precomputation_test.cc @@ -38,23 +38,6 @@ class PrecomputationTest : public Test {      precomputation = Precomputation(suffix_array, 3, 3, 10, 5, 1, 4, 2);    } -  void CheckCollocation(const Collocations& collocations, -                        const vector<int>& collocation, -                        const vector<vector<int>>& locations) { -    for (auto location: locations) { -      auto item = make_pair(collocation, location); -      EXPECT_FALSE(find(collocations.begin(), collocations.end(), item) == -                   collocations.end()); -    } -  } - -  void CheckIllegalCollocation(const Collocations& collocations, -                               const vector<int>& collocation) { -    for (auto item: collocations) { -      EXPECT_FALSE(collocation == item.first); -    } -  } -    vector<int> data;    shared_ptr<MockDataArray> data_array;    shared_ptr<MockSuffixArray> suffix_array; @@ -62,71 +45,67 @@ class PrecomputationTest : public Test {  };  TEST_F(PrecomputationTest, TestCollocations) { -  Collocations collocations = precomputation.GetCollocations(); - -  EXPECT_EQ(50, collocations.size()); - -  vector<int> collocation = {2, 3, -1, 2}; -  vector<vector<int>> locations = {{1, 5}, {1, 8}, {5, 8}, {5, 11}, {8, 11}}; -  CheckCollocation(collocations, collocation, locations); - -  collocation = {2, 3, -1, 2, 3}; -  locations = {{1, 5}, {1, 8}, {5, 8}}; -  CheckCollocation(collocations, collocation, locations); - -  collocation = {2, 3, -1, 3}; -  locations = {{1, 6}, {1, 9}, {5, 9}}; -  CheckCollocation(collocations, collocation, locations); -  collocation = {3, -1, 2}; -  locations = {{2, 5}, {2, 8}, {2, 11}, {6, 8}, {6, 11}, {9, 11}}; -  CheckCollocation(collocations, collocation, locations); -  collocation = {3, -1, 3}; -  locations = {{2, 6}, {2, 9}, {6, 9}}; -  CheckCollocation(collocations, collocation, locations); -  collocation = {3, -1, 2, 3}; -  locations = {{2, 5}, {2, 8}, {6, 8}}; -  CheckCollocation(collocations, collocation, locations); -  collocation = {2, -1, 2}; -  locations = {{1, 5}, {1, 8}, {5, 8}, {5, 11}, {8, 11}}; -  CheckCollocation(collocations, collocation, locations); -  collocation = {2, -1, 2, 3}; -  locations = {{1, 5}, {1, 8}, {5, 8}}; -  CheckCollocation(collocations, collocation, locations); -  collocation = {2, -1, 3}; -  locations = {{1, 6}, {1, 9}, {5, 9}}; -  CheckCollocation(collocations, collocation, locations); - -  collocation = {2, -1, 2, -2, 2}; -  locations = {{1, 5, 8}, {5, 8, 11}}; -  CheckCollocation(collocations, collocation, locations); -  collocation = {2, -1, 2, -2, 3}; -  locations = {{1, 5, 9}}; -  CheckCollocation(collocations, collocation, locations); -  collocation = {2, -1, 3, -2, 2}; -  locations = {{1, 6, 8}, {5, 9, 11}}; -  CheckCollocation(collocations, collocation, locations); -  collocation = {2, -1, 3, -2, 3}; -  locations = {{1, 6, 9}}; -  CheckCollocation(collocations, collocation, locations); -  collocation = {3, -1, 2, -2, 2}; -  locations = {{2, 5, 8}, {2, 5, 11}, {2, 8, 11}, {6, 8, 11}}; -  CheckCollocation(collocations, collocation, locations); -  collocation = {3, -1, 2, -2, 3}; -  locations = {{2, 5, 9}}; -  CheckCollocation(collocations, collocation, locations); -  collocation = {3, -1, 3, -2, 2}; -  locations = {{2, 6, 8}, {2, 6, 11}, {2, 9, 11}, {6, 9, 11}}; -  CheckCollocation(collocations, collocation, locations); -  collocation = {3, -1, 3, -2, 3}; -  locations = {{2, 6, 9}}; -  CheckCollocation(collocations, collocation, locations); - -  // Collocation exceeds max_rule_symbols. -  collocation = {2, -1, 2, -2, 2, 3}; -  CheckIllegalCollocation(collocations, collocation); -  // Collocation contains non frequent pattern. -  collocation = {2, -1, 5}; -  CheckIllegalCollocation(collocations, collocation); +  Index collocations = precomputation.GetCollocations(); + +  vector<int> key = {2, 3, -1, 2}; +  vector<int> expected_value = {1, 5, 1, 8, 5, 8, 5, 11, 8, 11}; +  EXPECT_EQ(expected_value, collocations[key]); +  key = {2, 3, -1, 2, 3}; +  expected_value = {1, 5, 1, 8, 5, 8}; +  EXPECT_EQ(expected_value, collocations[key]); +  key = {2, 3, -1, 3}; +  expected_value = {1, 6, 1, 9, 5, 9}; +  EXPECT_EQ(expected_value, collocations[key]); +  key = {3, -1, 2}; +  expected_value = {2, 5, 2, 8, 2, 11, 6, 8, 6, 11, 9, 11}; +  EXPECT_EQ(expected_value, collocations[key]); +  key = {3, -1, 3}; +  expected_value = {2, 6, 2, 9, 6, 9}; +  EXPECT_EQ(expected_value, collocations[key]); +  key = {3, -1, 2, 3}; +  expected_value = {2, 5, 2, 8, 6, 8}; +  EXPECT_EQ(expected_value, collocations[key]); +  key = {2, -1, 2}; +  expected_value = {1, 5, 1, 8, 5, 8, 5, 11, 8, 11}; +  EXPECT_EQ(expected_value, collocations[key]); +  key = {2, -1, 2, 3}; +  expected_value = {1, 5, 1, 8, 5, 8}; +  EXPECT_EQ(expected_value, collocations[key]); +  key = {2, -1, 3}; +  expected_value = {1, 6, 1, 9, 5, 9}; +  EXPECT_EQ(expected_value, collocations[key]); + +  key = {2, -1, 2, -2, 2}; +  expected_value = {1, 5, 8, 5, 8, 11}; +  EXPECT_EQ(expected_value, collocations[key]); +  key = {2, -1, 2, -2, 3}; +  expected_value = {1, 5, 9}; +  EXPECT_EQ(expected_value, collocations[key]); +  key = {2, -1, 3, -2, 2}; +  expected_value = {1, 6, 8, 5, 9, 11}; +  EXPECT_EQ(expected_value, collocations[key]); +  key = {2, -1, 3, -2, 3}; +  expected_value = {1, 6, 9}; +  EXPECT_EQ(expected_value, collocations[key]); +  key = {3, -1, 2, -2, 2}; +  expected_value = {2, 5, 8, 2, 5, 11, 2, 8, 11, 6, 8, 11}; +  EXPECT_EQ(expected_value, collocations[key]); +  key = {3, -1, 2, -2, 3}; +  expected_value = {2, 5, 9}; +  EXPECT_EQ(expected_value, collocations[key]); +  key = {3, -1, 3, -2, 2}; +  expected_value = {2, 6, 8, 2, 6, 11, 2, 9, 11, 6, 9, 11}; +  EXPECT_EQ(expected_value, collocations[key]); +  key = {3, -1, 3, -2, 3}; +  expected_value = {2, 6, 9}; +  EXPECT_EQ(expected_value, collocations[key]); + +  // Exceeds max_rule_symbols. +  key = {2, -1, 2, -2, 2, 3}; +  EXPECT_EQ(0, collocations.count(key)); +  // Contains non frequent pattern. +  key = {2, -1, 5}; +  EXPECT_EQ(0, collocations.count(key));  }  TEST_F(PrecomputationTest, TestSerialization) {  | 
