diff options
author | Paul Baltescu <pauldb89@gmail.com> | 2013-11-24 13:19:28 +0000 |
---|---|---|
committer | Paul Baltescu <pauldb89@gmail.com> | 2013-11-24 16:26:49 +0000 |
commit | 0d1f0af81ad2a368bc3216451a87111be8b9f6f5 (patch) | |
tree | 15d54f0ae8e79ffff5e06d66799855dd8bd37a63 /extractor/precomputation_test.cc | |
parent | 9cc2e002a064a2e14444669178126d1e96be8230 (diff) |
Reduce memory overhead for constructing the intersector.
Diffstat (limited to 'extractor/precomputation_test.cc')
-rw-r--r-- | extractor/precomputation_test.cc | 110 |
1 files changed, 78 insertions, 32 deletions
diff --git a/extractor/precomputation_test.cc b/extractor/precomputation_test.cc index e81ece5d..fd85fcf8 100644 --- a/extractor/precomputation_test.cc +++ b/extractor/precomputation_test.cc @@ -9,6 +9,7 @@ #include "mocks/mock_data_array.h" #include "mocks/mock_suffix_array.h" +#include "mocks/mock_vocabulary.h" #include "precomputation.h" using namespace std; @@ -23,7 +24,31 @@ class PrecomputationTest : public Test { virtual void SetUp() { data = {4, 2, 3, 5, 7, 2, 3, 5, 2, 3, 4, 2, 1}; data_array = make_shared<MockDataArray>(); - EXPECT_CALL(*data_array, GetData()).WillRepeatedly(ReturnRef(data)); + EXPECT_CALL(*data_array, GetSize()).WillRepeatedly(Return(data.size())); + for (size_t i = 0; i < data.size(); ++i) { + EXPECT_CALL(*data_array, AtIndex(i)).WillRepeatedly(Return(data[i])); + } + vector<pair<int, int>> expected_calls = {{8, 1}, {8, 2}, {6, 1}}; + for (const auto& call: expected_calls) { + int start = call.first; + int size = call.second; + vector<int> word_ids(data.begin() + start, data.begin() + start + size); + EXPECT_CALL(*data_array, GetWordIds(start, size)) + .WillRepeatedly(Return(word_ids)); + } + + expected_calls = {{1, 1}, {5, 1}, {8, 1}, {9, 1}, {5, 2}, + {6, 1}, {8, 2}, {1, 2}, {2, 1}, {11, 1}}; + for (const auto& call: expected_calls) { + int start = call.first; + int size = call.second; + vector<string> words; + for (size_t j = start; j < start + size; ++j) { + words.push_back(to_string(data[j])); + } + EXPECT_CALL(*data_array, GetWords(start, size)) + .WillRepeatedly(Return(words)); + } vector<int> suffixes{12, 8, 5, 1, 9, 6, 2, 0, 10, 7, 3, 4, 13}; vector<int> lcp{-1, 0, 2, 3, 1, 0, 1, 2, 0, 2, 0, 1, 0, 0}; @@ -35,77 +60,98 @@ class PrecomputationTest : public Test { } EXPECT_CALL(*suffix_array, BuildLCPArray()).WillRepeatedly(Return(lcp)); - precomputation = Precomputation(suffix_array, 3, 3, 10, 5, 1, 4, 2); + vocabulary = make_shared<MockVocabulary>(); + EXPECT_CALL(*vocabulary, GetTerminalIndex("2")).WillRepeatedly(Return(2)); + EXPECT_CALL(*vocabulary, GetTerminalIndex("3")).WillRepeatedly(Return(3)); + + precomputation = Precomputation(vocabulary, suffix_array, + 3, 3, 10, 5, 1, 4, 2); } vector<int> data; shared_ptr<MockDataArray> data_array; shared_ptr<MockSuffixArray> suffix_array; + shared_ptr<MockVocabulary> vocabulary; Precomputation precomputation; }; TEST_F(PrecomputationTest, TestCollocations) { - Index collocations = precomputation.GetCollocations(); - vector<int> key = {2, 3, -1, 2}; vector<int> expected_value = {1, 5, 1, 8, 5, 8, 5, 11, 8, 11}; - EXPECT_EQ(expected_value, collocations[key]); + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); key = {2, 3, -1, 2, 3}; expected_value = {1, 5, 1, 8, 5, 8}; - EXPECT_EQ(expected_value, collocations[key]); + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); key = {2, 3, -1, 3}; expected_value = {1, 6, 1, 9, 5, 9}; - EXPECT_EQ(expected_value, collocations[key]); + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); key = {3, -1, 2}; expected_value = {2, 5, 2, 8, 2, 11, 6, 8, 6, 11, 9, 11}; - EXPECT_EQ(expected_value, collocations[key]); + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); key = {3, -1, 3}; expected_value = {2, 6, 2, 9, 6, 9}; - EXPECT_EQ(expected_value, collocations[key]); + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); key = {3, -1, 2, 3}; expected_value = {2, 5, 2, 8, 6, 8}; - EXPECT_EQ(expected_value, collocations[key]); + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); key = {2, -1, 2}; expected_value = {1, 5, 1, 8, 5, 8, 5, 11, 8, 11}; - EXPECT_EQ(expected_value, collocations[key]); + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); key = {2, -1, 2, 3}; expected_value = {1, 5, 1, 8, 5, 8}; - EXPECT_EQ(expected_value, collocations[key]); + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); key = {2, -1, 3}; expected_value = {1, 6, 1, 9, 5, 9}; - EXPECT_EQ(expected_value, collocations[key]); + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); - key = {2, -1, 2, -2, 2}; + key = {2, -1, 2, -1, 2}; expected_value = {1, 5, 8, 5, 8, 11}; - EXPECT_EQ(expected_value, collocations[key]); - key = {2, -1, 2, -2, 3}; + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); + key = {2, -1, 2, -1, 3}; expected_value = {1, 5, 9}; - EXPECT_EQ(expected_value, collocations[key]); - key = {2, -1, 3, -2, 2}; + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); + key = {2, -1, 3, -1, 2}; expected_value = {1, 6, 8, 5, 9, 11}; - EXPECT_EQ(expected_value, collocations[key]); - key = {2, -1, 3, -2, 3}; + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); + key = {2, -1, 3, -1, 3}; expected_value = {1, 6, 9}; - EXPECT_EQ(expected_value, collocations[key]); - key = {3, -1, 2, -2, 2}; + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); + key = {3, -1, 2, -1, 2}; expected_value = {2, 5, 8, 2, 5, 11, 2, 8, 11, 6, 8, 11}; - EXPECT_EQ(expected_value, collocations[key]); - key = {3, -1, 2, -2, 3}; + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); + key = {3, -1, 2, -1, 3}; expected_value = {2, 5, 9}; - EXPECT_EQ(expected_value, collocations[key]); - key = {3, -1, 3, -2, 2}; + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); + key = {3, -1, 3, -1, 2}; expected_value = {2, 6, 8, 2, 6, 11, 2, 9, 11, 6, 9, 11}; - EXPECT_EQ(expected_value, collocations[key]); - key = {3, -1, 3, -2, 3}; + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); + key = {3, -1, 3, -1, 3}; expected_value = {2, 6, 9}; - EXPECT_EQ(expected_value, collocations[key]); + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); // Exceeds max_rule_symbols. - key = {2, -1, 2, -2, 2, 3}; - EXPECT_EQ(0, collocations.count(key)); + key = {2, -1, 2, -1, 2, 3}; + EXPECT_FALSE(precomputation.Contains(key)); // Contains non frequent pattern. key = {2, -1, 5}; - EXPECT_EQ(0, collocations.count(key)); + EXPECT_FALSE(precomputation.Contains(key)); } TEST_F(PrecomputationTest, TestSerialization) { |