summaryrefslogtreecommitdiff
path: root/extractor/precomputation_test.cc
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-11-24 13:19:28 +0000
committerPaul Baltescu <pauldb89@gmail.com>2013-11-24 16:26:49 +0000
commitf528ac27dab11770f01595b043675dba2947a263 (patch)
tree0f340a74879772fc8a5d8c4484df27457f25f11e /extractor/precomputation_test.cc
parent79206291f78fba893fda6a61ff0ae9264d00bb82 (diff)
Reduce memory overhead for constructing the intersector.
Diffstat (limited to 'extractor/precomputation_test.cc')
-rw-r--r--extractor/precomputation_test.cc110
1 files changed, 78 insertions, 32 deletions
diff --git a/extractor/precomputation_test.cc b/extractor/precomputation_test.cc
index e81ece5d..fd85fcf8 100644
--- a/extractor/precomputation_test.cc
+++ b/extractor/precomputation_test.cc
@@ -9,6 +9,7 @@
#include "mocks/mock_data_array.h"
#include "mocks/mock_suffix_array.h"
+#include "mocks/mock_vocabulary.h"
#include "precomputation.h"
using namespace std;
@@ -23,7 +24,31 @@ class PrecomputationTest : public Test {
virtual void SetUp() {
data = {4, 2, 3, 5, 7, 2, 3, 5, 2, 3, 4, 2, 1};
data_array = make_shared<MockDataArray>();
- EXPECT_CALL(*data_array, GetData()).WillRepeatedly(ReturnRef(data));
+ EXPECT_CALL(*data_array, GetSize()).WillRepeatedly(Return(data.size()));
+ for (size_t i = 0; i < data.size(); ++i) {
+ EXPECT_CALL(*data_array, AtIndex(i)).WillRepeatedly(Return(data[i]));
+ }
+ vector<pair<int, int>> expected_calls = {{8, 1}, {8, 2}, {6, 1}};
+ for (const auto& call: expected_calls) {
+ int start = call.first;
+ int size = call.second;
+ vector<int> word_ids(data.begin() + start, data.begin() + start + size);
+ EXPECT_CALL(*data_array, GetWordIds(start, size))
+ .WillRepeatedly(Return(word_ids));
+ }
+
+ expected_calls = {{1, 1}, {5, 1}, {8, 1}, {9, 1}, {5, 2},
+ {6, 1}, {8, 2}, {1, 2}, {2, 1}, {11, 1}};
+ for (const auto& call: expected_calls) {
+ int start = call.first;
+ int size = call.second;
+ vector<string> words;
+ for (size_t j = start; j < start + size; ++j) {
+ words.push_back(to_string(data[j]));
+ }
+ EXPECT_CALL(*data_array, GetWords(start, size))
+ .WillRepeatedly(Return(words));
+ }
vector<int> suffixes{12, 8, 5, 1, 9, 6, 2, 0, 10, 7, 3, 4, 13};
vector<int> lcp{-1, 0, 2, 3, 1, 0, 1, 2, 0, 2, 0, 1, 0, 0};
@@ -35,77 +60,98 @@ class PrecomputationTest : public Test {
}
EXPECT_CALL(*suffix_array, BuildLCPArray()).WillRepeatedly(Return(lcp));
- precomputation = Precomputation(suffix_array, 3, 3, 10, 5, 1, 4, 2);
+ vocabulary = make_shared<MockVocabulary>();
+ EXPECT_CALL(*vocabulary, GetTerminalIndex("2")).WillRepeatedly(Return(2));
+ EXPECT_CALL(*vocabulary, GetTerminalIndex("3")).WillRepeatedly(Return(3));
+
+ precomputation = Precomputation(vocabulary, suffix_array,
+ 3, 3, 10, 5, 1, 4, 2);
}
vector<int> data;
shared_ptr<MockDataArray> data_array;
shared_ptr<MockSuffixArray> suffix_array;
+ shared_ptr<MockVocabulary> vocabulary;
Precomputation precomputation;
};
TEST_F(PrecomputationTest, TestCollocations) {
- Index collocations = precomputation.GetCollocations();
-
vector<int> key = {2, 3, -1, 2};
vector<int> expected_value = {1, 5, 1, 8, 5, 8, 5, 11, 8, 11};
- EXPECT_EQ(expected_value, collocations[key]);
+ EXPECT_TRUE(precomputation.Contains(key));
+ EXPECT_EQ(expected_value, precomputation.GetCollocations(key));
key = {2, 3, -1, 2, 3};
expected_value = {1, 5, 1, 8, 5, 8};
- EXPECT_EQ(expected_value, collocations[key]);
+ EXPECT_TRUE(precomputation.Contains(key));
+ EXPECT_EQ(expected_value, precomputation.GetCollocations(key));
key = {2, 3, -1, 3};
expected_value = {1, 6, 1, 9, 5, 9};
- EXPECT_EQ(expected_value, collocations[key]);
+ EXPECT_TRUE(precomputation.Contains(key));
+ EXPECT_EQ(expected_value, precomputation.GetCollocations(key));
key = {3, -1, 2};
expected_value = {2, 5, 2, 8, 2, 11, 6, 8, 6, 11, 9, 11};
- EXPECT_EQ(expected_value, collocations[key]);
+ EXPECT_TRUE(precomputation.Contains(key));
+ EXPECT_EQ(expected_value, precomputation.GetCollocations(key));
key = {3, -1, 3};
expected_value = {2, 6, 2, 9, 6, 9};
- EXPECT_EQ(expected_value, collocations[key]);
+ EXPECT_TRUE(precomputation.Contains(key));
+ EXPECT_EQ(expected_value, precomputation.GetCollocations(key));
key = {3, -1, 2, 3};
expected_value = {2, 5, 2, 8, 6, 8};
- EXPECT_EQ(expected_value, collocations[key]);
+ EXPECT_TRUE(precomputation.Contains(key));
+ EXPECT_EQ(expected_value, precomputation.GetCollocations(key));
key = {2, -1, 2};
expected_value = {1, 5, 1, 8, 5, 8, 5, 11, 8, 11};
- EXPECT_EQ(expected_value, collocations[key]);
+ EXPECT_TRUE(precomputation.Contains(key));
+ EXPECT_EQ(expected_value, precomputation.GetCollocations(key));
key = {2, -1, 2, 3};
expected_value = {1, 5, 1, 8, 5, 8};
- EXPECT_EQ(expected_value, collocations[key]);
+ EXPECT_TRUE(precomputation.Contains(key));
+ EXPECT_EQ(expected_value, precomputation.GetCollocations(key));
key = {2, -1, 3};
expected_value = {1, 6, 1, 9, 5, 9};
- EXPECT_EQ(expected_value, collocations[key]);
+ EXPECT_TRUE(precomputation.Contains(key));
+ EXPECT_EQ(expected_value, precomputation.GetCollocations(key));
- key = {2, -1, 2, -2, 2};
+ key = {2, -1, 2, -1, 2};
expected_value = {1, 5, 8, 5, 8, 11};
- EXPECT_EQ(expected_value, collocations[key]);
- key = {2, -1, 2, -2, 3};
+ EXPECT_TRUE(precomputation.Contains(key));
+ EXPECT_EQ(expected_value, precomputation.GetCollocations(key));
+ key = {2, -1, 2, -1, 3};
expected_value = {1, 5, 9};
- EXPECT_EQ(expected_value, collocations[key]);
- key = {2, -1, 3, -2, 2};
+ EXPECT_TRUE(precomputation.Contains(key));
+ EXPECT_EQ(expected_value, precomputation.GetCollocations(key));
+ key = {2, -1, 3, -1, 2};
expected_value = {1, 6, 8, 5, 9, 11};
- EXPECT_EQ(expected_value, collocations[key]);
- key = {2, -1, 3, -2, 3};
+ EXPECT_TRUE(precomputation.Contains(key));
+ EXPECT_EQ(expected_value, precomputation.GetCollocations(key));
+ key = {2, -1, 3, -1, 3};
expected_value = {1, 6, 9};
- EXPECT_EQ(expected_value, collocations[key]);
- key = {3, -1, 2, -2, 2};
+ EXPECT_TRUE(precomputation.Contains(key));
+ EXPECT_EQ(expected_value, precomputation.GetCollocations(key));
+ key = {3, -1, 2, -1, 2};
expected_value = {2, 5, 8, 2, 5, 11, 2, 8, 11, 6, 8, 11};
- EXPECT_EQ(expected_value, collocations[key]);
- key = {3, -1, 2, -2, 3};
+ EXPECT_TRUE(precomputation.Contains(key));
+ EXPECT_EQ(expected_value, precomputation.GetCollocations(key));
+ key = {3, -1, 2, -1, 3};
expected_value = {2, 5, 9};
- EXPECT_EQ(expected_value, collocations[key]);
- key = {3, -1, 3, -2, 2};
+ EXPECT_TRUE(precomputation.Contains(key));
+ EXPECT_EQ(expected_value, precomputation.GetCollocations(key));
+ key = {3, -1, 3, -1, 2};
expected_value = {2, 6, 8, 2, 6, 11, 2, 9, 11, 6, 9, 11};
- EXPECT_EQ(expected_value, collocations[key]);
- key = {3, -1, 3, -2, 3};
+ EXPECT_TRUE(precomputation.Contains(key));
+ EXPECT_EQ(expected_value, precomputation.GetCollocations(key));
+ key = {3, -1, 3, -1, 3};
expected_value = {2, 6, 9};
- EXPECT_EQ(expected_value, collocations[key]);
+ EXPECT_TRUE(precomputation.Contains(key));
+ EXPECT_EQ(expected_value, precomputation.GetCollocations(key));
// Exceeds max_rule_symbols.
- key = {2, -1, 2, -2, 2, 3};
- EXPECT_EQ(0, collocations.count(key));
+ key = {2, -1, 2, -1, 2, 3};
+ EXPECT_FALSE(precomputation.Contains(key));
// Contains non frequent pattern.
key = {2, -1, 5};
- EXPECT_EQ(0, collocations.count(key));
+ EXPECT_FALSE(precomputation.Contains(key));
}
TEST_F(PrecomputationTest, TestSerialization) {