summaryrefslogtreecommitdiff
path: root/extractor
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-06-25 17:28:54 +0100
committerPaul Baltescu <pauldb89@gmail.com>2013-06-25 17:28:54 +0100
commitac468c610193bd03e650b2b591b31ee41714cdbf (patch)
treee014b6136b089ee7ef3b42bdafcabfecb147952c /extractor
parent9a0a9582d38315fd83628112144077b35b5f1367 (diff)
Undo last commit.
Diffstat (limited to 'extractor')
-rw-r--r--extractor/fast_intersector.cc11
-rw-r--r--extractor/fast_intersector_test.cc8
-rw-r--r--extractor/mocks/mock_precomputation.h2
-rw-r--r--extractor/precomputation.cc138
-rw-r--r--extractor/precomputation.h49
-rw-r--r--extractor/precomputation_test.cc143
6 files changed, 152 insertions, 199 deletions
diff --git a/extractor/fast_intersector.cc b/extractor/fast_intersector.cc
index 5360c1da..a8591a72 100644
--- a/extractor/fast_intersector.cc
+++ b/extractor/fast_intersector.cc
@@ -20,13 +20,10 @@ FastIntersector::FastIntersector(shared_ptr<SuffixArray> suffix_array,
vocabulary(vocabulary),
max_rule_span(max_rule_span),
min_gap_size(min_gap_size) {
- auto precomputed_collocations = precomputation->GetCollocations();
- for (auto item: precomputed_collocations) {
- vector<int> phrase = ConvertPhrase(item.first);
- vector<int> location = item.second;
- vector<int>& phrase_collocations = collocations[phrase];
- phrase_collocations.insert(phrase_collocations.end(), location.begin(),
- location.end());
+ Index precomputed_collocations = precomputation->GetCollocations();
+ for (pair<vector<int>, vector<int>> entry: precomputed_collocations) {
+ vector<int> phrase = ConvertPhrase(entry.first);
+ collocations[phrase] = entry.second;
}
}
diff --git a/extractor/fast_intersector_test.cc b/extractor/fast_intersector_test.cc
index 2e618b63..76c3aaea 100644
--- a/extractor/fast_intersector_test.cc
+++ b/extractor/fast_intersector_test.cc
@@ -60,14 +60,14 @@ class FastIntersectorTest : public Test {
precomputation = make_shared<MockPrecomputation>();
EXPECT_CALL(*precomputation, GetCollocations())
- .WillRepeatedly(Return(collocations));
+ .WillRepeatedly(ReturnRef(collocations));
phrase_builder = make_shared<PhraseBuilder>(vocabulary);
intersector = make_shared<FastIntersector>(suffix_array, precomputation,
vocabulary, 15, 1);
}
- Collocations collocations;
+ Index collocations;
shared_ptr<MockDataArray> data_array;
shared_ptr<MockSuffixArray> suffix_array;
shared_ptr<MockPrecomputation> precomputation;
@@ -82,9 +82,9 @@ TEST_F(FastIntersectorTest, TestCachedCollocation) {
Phrase phrase = phrase_builder->Build(symbols);
PhraseLocation prefix_location(15, 16), suffix_location(16, 17);
- collocations.push_back(make_pair(symbols, expected_location));
+ collocations[symbols] = expected_location;
EXPECT_CALL(*precomputation, GetCollocations())
- .WillRepeatedly(Return(collocations));
+ .WillRepeatedly(ReturnRef(collocations));
intersector = make_shared<FastIntersector>(suffix_array, precomputation,
vocabulary, 15, 1);
diff --git a/extractor/mocks/mock_precomputation.h b/extractor/mocks/mock_precomputation.h
index 86f4ce27..8753343e 100644
--- a/extractor/mocks/mock_precomputation.h
+++ b/extractor/mocks/mock_precomputation.h
@@ -6,7 +6,7 @@ namespace extractor {
class MockPrecomputation : public Precomputation {
public:
- MOCK_CONST_METHOD0(GetCollocations, Collocations());
+ MOCK_CONST_METHOD0(GetCollocations, const Index&());
};
} // namespace extractor
diff --git a/extractor/precomputation.cc b/extractor/precomputation.cc
index 37dbf7b7..3b8aed69 100644
--- a/extractor/precomputation.cc
+++ b/extractor/precomputation.cc
@@ -14,65 +14,63 @@ int Precomputation::FIRST_NONTERMINAL = -1;
int Precomputation::SECOND_NONTERMINAL = -2;
Precomputation::Precomputation(
- shared_ptr<SuffixArray> suffix_array, int num_frequent_phrases,
- int num_super_frequent_phrases, int max_rule_span,
+ shared_ptr<SuffixArray> suffix_array, int num_frequent_patterns,
+ int num_super_frequent_patterns, int max_rule_span,
int max_rule_symbols, int min_gap_size,
int max_frequent_phrase_len, int min_frequency) {
vector<int> data = suffix_array->GetData()->GetData();
- vector<vector<int>> frequent_phrases = FindMostFrequentPhrases(
- suffix_array, data, num_frequent_phrases, max_frequent_phrase_len,
+ vector<vector<int>> frequent_patterns = FindMostFrequentPatterns(
+ suffix_array, data, num_frequent_patterns, max_frequent_phrase_len,
min_frequency);
// Construct sets containing the frequent and superfrequent contiguous
// collocations.
- unordered_set<vector<int>, VectorHash> frequent_phrases_set;
- unordered_set<vector<int>, VectorHash> super_frequent_phrases_set;
- for (size_t i = 0; i < frequent_phrases.size(); ++i) {
- frequent_phrases_set.insert(frequent_phrases[i]);
- if (i < num_super_frequent_phrases) {
- super_frequent_phrases_set.insert(frequent_phrases[i]);
+ unordered_set<vector<int>, VectorHash> frequent_patterns_set;
+ unordered_set<vector<int>, VectorHash> super_frequent_patterns_set;
+ for (size_t i = 0; i < frequent_patterns.size(); ++i) {
+ frequent_patterns_set.insert(frequent_patterns[i]);
+ if (i < num_super_frequent_patterns) {
+ super_frequent_patterns_set.insert(frequent_patterns[i]);
}
}
- vector<tuple<int, int, int>> locations;
+ vector<tuple<int, int, int>> matchings;
for (size_t i = 0; i < data.size(); ++i) {
- // If the sentence is over, add all the discontiguous frequent phrases to
- // the list.
+ // If the sentence is over, add all the discontiguous frequent patterns to
+ // the index.
if (data[i] == DataArray::END_OF_LINE) {
- AddCollocations(locations, data, max_rule_span, min_gap_size,
+ AddCollocations(matchings, data, max_rule_span, min_gap_size,
max_rule_symbols);
- locations.clear();
+ matchings.clear();
continue;
}
- vector<int> phrase;
- // Find all the contiguous frequent phrases starting at position i.
+ vector<int> pattern;
+ // Find all the contiguous frequent patterns starting at position i.
for (int j = 1; j <= max_frequent_phrase_len && i + j <= data.size(); ++j) {
- phrase.push_back(data[i + j - 1]);
- if (frequent_phrases_set.count(phrase)) {
- int is_super_frequent = super_frequent_phrases_set.count(phrase);
- locations.push_back(make_tuple(i, j, is_super_frequent));
+ pattern.push_back(data[i + j - 1]);
+ if (frequent_patterns_set.count(pattern)) {
+ int is_super_frequent = super_frequent_patterns_set.count(pattern);
+ matchings.push_back(make_tuple(i, j, is_super_frequent));
} else {
- // If the current phrase is not frequent, any longer phrase having the
- // current phrase as prefix will not be frequent.
+ // If the current pattern is not frequent, any longer pattern having the
+ // current pattern as prefix will not be frequent.
break;
}
}
}
-
- collocations.shrink_to_fit();
}
Precomputation::Precomputation() {}
Precomputation::~Precomputation() {}
-vector<vector<int>> Precomputation::FindMostFrequentPhrases(
+vector<vector<int>> Precomputation::FindMostFrequentPatterns(
shared_ptr<SuffixArray> suffix_array, const vector<int>& data,
- int num_frequent_phrases, int max_frequent_phrase_len, int min_frequency) {
+ int num_frequent_patterns, int max_frequent_phrase_len, int min_frequency) {
vector<int> lcp = suffix_array->BuildLCPArray();
vector<int> run_start(max_frequent_phrase_len);
- // Find all the phrases occurring at least min_frequency times.
+ // Find all the patterns occurring at least min_frequency times.
priority_queue<pair<int, pair<int, int>>> heap;
for (size_t i = 1; i < lcp.size(); ++i) {
for (int len = lcp[i]; len < max_frequent_phrase_len; ++len) {
@@ -85,34 +83,34 @@ vector<vector<int>> Precomputation::FindMostFrequentPhrases(
}
}
- // Extract the most frequent phrases.
- vector<vector<int>> frequent_phrases;
- while (frequent_phrases.size() < num_frequent_phrases && !heap.empty()) {
+ // Extract the most frequent patterns.
+ vector<vector<int>> frequent_patterns;
+ while (frequent_patterns.size() < num_frequent_patterns && !heap.empty()) {
int start = heap.top().second.first;
int len = heap.top().second.second;
heap.pop();
- vector<int> phrase(data.begin() + start, data.begin() + start + len);
- if (find(phrase.begin(), phrase.end(), DataArray::END_OF_LINE) ==
- phrase.end()) {
- frequent_phrases.push_back(phrase);
+ vector<int> pattern(data.begin() + start, data.begin() + start + len);
+ if (find(pattern.begin(), pattern.end(), DataArray::END_OF_LINE) ==
+ pattern.end()) {
+ frequent_patterns.push_back(pattern);
}
}
- return frequent_phrases;
+ return frequent_patterns;
}
void Precomputation::AddCollocations(
- const vector<tuple<int, int, int>>& locations, const vector<int>& data,
+ const vector<tuple<int, int, int>>& matchings, const vector<int>& data,
int max_rule_span, int min_gap_size, int max_rule_symbols) {
- // Select the leftmost subphrase.
- for (size_t i = 0; i < locations.size(); ++i) {
+ // Select the leftmost subpattern.
+ for (size_t i = 0; i < matchings.size(); ++i) {
int start1, size1, is_super1;
- tie(start1, size1, is_super1) = locations[i];
+ tie(start1, size1, is_super1) = matchings[i];
- // Select the second (middle) subphrase
- for (size_t j = i + 1; j < locations.size(); ++j) {
+ // Select the second (middle) subpattern
+ for (size_t j = i + 1; j < matchings.size(); ++j) {
int start2, size2, is_super2;
- tie(start2, size2, is_super2) = locations[j];
+ tie(start2, size2, is_super2) = matchings[j];
if (start2 - start1 >= max_rule_span) {
break;
}
@@ -120,21 +118,20 @@ void Precomputation::AddCollocations(
if (start2 - start1 - size1 >= min_gap_size
&& start2 + size2 - start1 <= max_rule_span
&& size1 + size2 + 1 <= max_rule_symbols) {
- vector<int> collocation(data.begin() + start1,
+ vector<int> pattern(data.begin() + start1,
data.begin() + start1 + size1);
- collocation.push_back(Precomputation::FIRST_NONTERMINAL);
- collocation.insert(collocation.end(), data.begin() + start2,
+ pattern.push_back(Precomputation::FIRST_NONTERMINAL);
+ pattern.insert(pattern.end(), data.begin() + start2,
data.begin() + start2 + size2);
-
- AddCollocation(collocation, GetLocation(start1, start2));
+ AddStartPositions(collocations[pattern], start1, start2);
// Try extending the binary collocation to a ternary collocation.
if (is_super2) {
- collocation.push_back(Precomputation::SECOND_NONTERMINAL);
- // Select the rightmost subphrase.
- for (size_t k = j + 1; k < locations.size(); ++k) {
+ pattern.push_back(Precomputation::SECOND_NONTERMINAL);
+ // Select the rightmost subpattern.
+ for (size_t k = j + 1; k < matchings.size(); ++k) {
int start3, size3, is_super3;
- tie(start3, size3, is_super3) = locations[k];
+ tie(start3, size3, is_super3) = matchings[k];
if (start3 - start1 >= max_rule_span) {
break;
}
@@ -143,12 +140,10 @@ void Precomputation::AddCollocations(
&& start3 + size3 - start1 <= max_rule_span
&& size1 + size2 + size3 + 2 <= max_rule_symbols
&& (is_super1 || is_super3)) {
- collocation.insert(collocation.end(), data.begin() + start3,
+ pattern.insert(pattern.end(), data.begin() + start3,
data.begin() + start3 + size3);
-
- AddCollocation(collocation, GetLocation(start1, start2, start3));
-
- collocation.erase(collocation.end() - size3);
+ AddStartPositions(collocations[pattern], start1, start2, start3);
+ pattern.erase(pattern.end() - size3);
}
}
}
@@ -157,29 +152,20 @@ void Precomputation::AddCollocations(
}
}
-vector<int> Precomputation::GetLocation(int pos1, int pos2) {
- vector<int> location;
- location.push_back(pos1);
- location.push_back(pos2);
- return location;
-}
-
-vector<int> Precomputation::GetLocation(int pos1, int pos2, int pos3) {
- vector<int> location;
- location.push_back(pos1);
- location.push_back(pos2);
- location.push_back(pos3);
- return location;
+void Precomputation::AddStartPositions(
+ vector<int>& positions, int pos1, int pos2) {
+ positions.push_back(pos1);
+ positions.push_back(pos2);
}
-void Precomputation::AddCollocation(vector<int> collocation,
- vector<int> location) {
- collocation.shrink_to_fit();
- location.shrink_to_fit();
- collocations.push_back(make_pair(collocation, location));
+void Precomputation::AddStartPositions(
+ vector<int>& positions, int pos1, int pos2, int pos3) {
+ positions.push_back(pos1);
+ positions.push_back(pos2);
+ positions.push_back(pos3);
}
-Collocations Precomputation::GetCollocations() const {
+const Index& Precomputation::GetCollocations() const {
return collocations;
}
diff --git a/extractor/precomputation.h b/extractor/precomputation.h
index 0a06349b..9f0c9424 100644
--- a/extractor/precomputation.h
+++ b/extractor/precomputation.h
@@ -19,18 +19,16 @@ using namespace std;
namespace extractor {
typedef boost::hash<vector<int>> VectorHash;
-typedef vector<pair<vector<int>, vector<int>>> Collocations;
+typedef unordered_map<vector<int>, vector<int>, VectorHash> Index;
class SuffixArray;
/**
- * Data structure containing all the data needed for constructing an index with
- * all the occurrences of the most frequent discontiguous collocations in the
- * source data.
+ * Data structure wrapping an index with all the occurrences of the most
+ * frequent discontiguous collocations in the source data.
*
- * Let a, b, c be contiguous phrases. The data structure will contain the
- * locations in the source data where every collocation of the following forms
- * occurs:
+ * Let a, b, c be contiguous collocations. The index will contain an entry for
+ * every collocation of the form:
* - aXb, where a and b are frequent
* - aXbXc, where a and b are super-frequent and c is frequent or
* b and c are super-frequent and a is frequent.
@@ -39,8 +37,8 @@ class Precomputation {
public:
// Constructs the index using the suffix array.
Precomputation(
- shared_ptr<SuffixArray> suffix_array, int num_frequent_phrases,
- int num_super_frequent_phrases, int max_rule_span,
+ shared_ptr<SuffixArray> suffix_array, int num_frequent_patterns,
+ int num_super_frequent_patterns, int max_rule_span,
int max_rule_symbols, int min_gap_size,
int max_frequent_phrase_len, int min_frequency);
@@ -49,9 +47,8 @@ class Precomputation {
virtual ~Precomputation();
- // Returns the list of the locations of the most frequent collocations in the
- // source data.
- virtual Collocations GetCollocations() const;
+ // Returns a reference to the index.
+ virtual const Index& GetCollocations() const;
bool operator==(const Precomputation& other) const;
@@ -60,29 +57,23 @@ class Precomputation {
private:
// Finds the most frequent contiguous collocations.
- vector<vector<int>> FindMostFrequentPhrases(
+ vector<vector<int>> FindMostFrequentPatterns(
shared_ptr<SuffixArray> suffix_array, const vector<int>& data,
- int num_frequent_phrases, int max_frequent_phrase_len,
+ int num_frequent_patterns, int max_frequent_phrase_len,
int min_frequency);
// Given the locations of the frequent contiguous collocations in a sentence,
// it adds new entries to the index for each discontiguous collocation
// matching the criteria specified in the class description.
- void AddCollocations(const vector<std::tuple<int, int, int>>& locations,
- const vector<int>& data, int max_rule_span,
- int min_gap_size, int max_rule_symbols);
+ void AddCollocations(
+ const vector<std::tuple<int, int, int>>& matchings, const vector<int>& data,
+ int max_rule_span, int min_gap_size, int max_rule_symbols);
- // Creates a vector representation for the location of a binary collocation
- // containing the starting points of each subpattern.
- vector<int> GetLocation(int pos1, int pos2);
+ // Adds an occurrence of a binary collocation.
+ void AddStartPositions(vector<int>& positions, int pos1, int pos2);
- // Creates a vector representation for the location of a ternary collocation
- // containing the starting points of each subpattern.
- vector<int> GetLocation(int pos1, int pos2, int pos3);
-
- // Appends a collocation to the list of collocations after shrinking the
- // vectors to avoid unnecessary memory usage.
- void AddCollocation(vector<int> collocation, vector<int> location);
+ // Adds an occurrence of a ternary collocation.
+ void AddStartPositions(vector<int>& positions, int pos1, int pos2, int pos3);
friend class boost::serialization::access;
@@ -100,13 +91,13 @@ class Precomputation {
for (size_t i = 0; i < num_entries; ++i) {
pair<vector<int>, vector<int>> entry;
ar >> entry;
- collocations.push_back(entry);
+ collocations.insert(entry);
}
}
BOOST_SERIALIZATION_SPLIT_MEMBER();
- Collocations collocations;
+ Index collocations;
};
} // namespace extractor
diff --git a/extractor/precomputation_test.cc b/extractor/precomputation_test.cc
index c6e457fd..e81ece5d 100644
--- a/extractor/precomputation_test.cc
+++ b/extractor/precomputation_test.cc
@@ -38,23 +38,6 @@ class PrecomputationTest : public Test {
precomputation = Precomputation(suffix_array, 3, 3, 10, 5, 1, 4, 2);
}
- void CheckCollocation(const Collocations& collocations,
- const vector<int>& collocation,
- const vector<vector<int>>& locations) {
- for (auto location: locations) {
- auto item = make_pair(collocation, location);
- EXPECT_FALSE(find(collocations.begin(), collocations.end(), item) ==
- collocations.end());
- }
- }
-
- void CheckIllegalCollocation(const Collocations& collocations,
- const vector<int>& collocation) {
- for (auto item: collocations) {
- EXPECT_FALSE(collocation == item.first);
- }
- }
-
vector<int> data;
shared_ptr<MockDataArray> data_array;
shared_ptr<MockSuffixArray> suffix_array;
@@ -62,71 +45,67 @@ class PrecomputationTest : public Test {
};
TEST_F(PrecomputationTest, TestCollocations) {
- Collocations collocations = precomputation.GetCollocations();
-
- EXPECT_EQ(50, collocations.size());
-
- vector<int> collocation = {2, 3, -1, 2};
- vector<vector<int>> locations = {{1, 5}, {1, 8}, {5, 8}, {5, 11}, {8, 11}};
- CheckCollocation(collocations, collocation, locations);
-
- collocation = {2, 3, -1, 2, 3};
- locations = {{1, 5}, {1, 8}, {5, 8}};
- CheckCollocation(collocations, collocation, locations);
-
- collocation = {2, 3, -1, 3};
- locations = {{1, 6}, {1, 9}, {5, 9}};
- CheckCollocation(collocations, collocation, locations);
- collocation = {3, -1, 2};
- locations = {{2, 5}, {2, 8}, {2, 11}, {6, 8}, {6, 11}, {9, 11}};
- CheckCollocation(collocations, collocation, locations);
- collocation = {3, -1, 3};
- locations = {{2, 6}, {2, 9}, {6, 9}};
- CheckCollocation(collocations, collocation, locations);
- collocation = {3, -1, 2, 3};
- locations = {{2, 5}, {2, 8}, {6, 8}};
- CheckCollocation(collocations, collocation, locations);
- collocation = {2, -1, 2};
- locations = {{1, 5}, {1, 8}, {5, 8}, {5, 11}, {8, 11}};
- CheckCollocation(collocations, collocation, locations);
- collocation = {2, -1, 2, 3};
- locations = {{1, 5}, {1, 8}, {5, 8}};
- CheckCollocation(collocations, collocation, locations);
- collocation = {2, -1, 3};
- locations = {{1, 6}, {1, 9}, {5, 9}};
- CheckCollocation(collocations, collocation, locations);
-
- collocation = {2, -1, 2, -2, 2};
- locations = {{1, 5, 8}, {5, 8, 11}};
- CheckCollocation(collocations, collocation, locations);
- collocation = {2, -1, 2, -2, 3};
- locations = {{1, 5, 9}};
- CheckCollocation(collocations, collocation, locations);
- collocation = {2, -1, 3, -2, 2};
- locations = {{1, 6, 8}, {5, 9, 11}};
- CheckCollocation(collocations, collocation, locations);
- collocation = {2, -1, 3, -2, 3};
- locations = {{1, 6, 9}};
- CheckCollocation(collocations, collocation, locations);
- collocation = {3, -1, 2, -2, 2};
- locations = {{2, 5, 8}, {2, 5, 11}, {2, 8, 11}, {6, 8, 11}};
- CheckCollocation(collocations, collocation, locations);
- collocation = {3, -1, 2, -2, 3};
- locations = {{2, 5, 9}};
- CheckCollocation(collocations, collocation, locations);
- collocation = {3, -1, 3, -2, 2};
- locations = {{2, 6, 8}, {2, 6, 11}, {2, 9, 11}, {6, 9, 11}};
- CheckCollocation(collocations, collocation, locations);
- collocation = {3, -1, 3, -2, 3};
- locations = {{2, 6, 9}};
- CheckCollocation(collocations, collocation, locations);
-
- // Collocation exceeds max_rule_symbols.
- collocation = {2, -1, 2, -2, 2, 3};
- CheckIllegalCollocation(collocations, collocation);
- // Collocation contains non frequent pattern.
- collocation = {2, -1, 5};
- CheckIllegalCollocation(collocations, collocation);
+ Index collocations = precomputation.GetCollocations();
+
+ vector<int> key = {2, 3, -1, 2};
+ vector<int> expected_value = {1, 5, 1, 8, 5, 8, 5, 11, 8, 11};
+ EXPECT_EQ(expected_value, collocations[key]);
+ key = {2, 3, -1, 2, 3};
+ expected_value = {1, 5, 1, 8, 5, 8};
+ EXPECT_EQ(expected_value, collocations[key]);
+ key = {2, 3, -1, 3};
+ expected_value = {1, 6, 1, 9, 5, 9};
+ EXPECT_EQ(expected_value, collocations[key]);
+ key = {3, -1, 2};
+ expected_value = {2, 5, 2, 8, 2, 11, 6, 8, 6, 11, 9, 11};
+ EXPECT_EQ(expected_value, collocations[key]);
+ key = {3, -1, 3};
+ expected_value = {2, 6, 2, 9, 6, 9};
+ EXPECT_EQ(expected_value, collocations[key]);
+ key = {3, -1, 2, 3};
+ expected_value = {2, 5, 2, 8, 6, 8};
+ EXPECT_EQ(expected_value, collocations[key]);
+ key = {2, -1, 2};
+ expected_value = {1, 5, 1, 8, 5, 8, 5, 11, 8, 11};
+ EXPECT_EQ(expected_value, collocations[key]);
+ key = {2, -1, 2, 3};
+ expected_value = {1, 5, 1, 8, 5, 8};
+ EXPECT_EQ(expected_value, collocations[key]);
+ key = {2, -1, 3};
+ expected_value = {1, 6, 1, 9, 5, 9};
+ EXPECT_EQ(expected_value, collocations[key]);
+
+ key = {2, -1, 2, -2, 2};
+ expected_value = {1, 5, 8, 5, 8, 11};
+ EXPECT_EQ(expected_value, collocations[key]);
+ key = {2, -1, 2, -2, 3};
+ expected_value = {1, 5, 9};
+ EXPECT_EQ(expected_value, collocations[key]);
+ key = {2, -1, 3, -2, 2};
+ expected_value = {1, 6, 8, 5, 9, 11};
+ EXPECT_EQ(expected_value, collocations[key]);
+ key = {2, -1, 3, -2, 3};
+ expected_value = {1, 6, 9};
+ EXPECT_EQ(expected_value, collocations[key]);
+ key = {3, -1, 2, -2, 2};
+ expected_value = {2, 5, 8, 2, 5, 11, 2, 8, 11, 6, 8, 11};
+ EXPECT_EQ(expected_value, collocations[key]);
+ key = {3, -1, 2, -2, 3};
+ expected_value = {2, 5, 9};
+ EXPECT_EQ(expected_value, collocations[key]);
+ key = {3, -1, 3, -2, 2};
+ expected_value = {2, 6, 8, 2, 6, 11, 2, 9, 11, 6, 9, 11};
+ EXPECT_EQ(expected_value, collocations[key]);
+ key = {3, -1, 3, -2, 3};
+ expected_value = {2, 6, 9};
+ EXPECT_EQ(expected_value, collocations[key]);
+
+ // Exceeds max_rule_symbols.
+ key = {2, -1, 2, -2, 2, 3};
+ EXPECT_EQ(0, collocations.count(key));
+ // Contains non frequent pattern.
+ key = {2, -1, 5};
+ EXPECT_EQ(0, collocations.count(key));
}
TEST_F(PrecomputationTest, TestSerialization) {