From f528ac27dab11770f01595b043675dba2947a263 Mon Sep 17 00:00:00 2001 From: Paul Baltescu Date: Sun, 24 Nov 2013 13:19:28 +0000 Subject: Reduce memory overhead for constructing the intersector. --- extractor/precomputation_test.cc | 110 +++++++++++++++++++++++++++------------ 1 file changed, 78 insertions(+), 32 deletions(-) (limited to 'extractor/precomputation_test.cc') diff --git a/extractor/precomputation_test.cc b/extractor/precomputation_test.cc index e81ece5d..fd85fcf8 100644 --- a/extractor/precomputation_test.cc +++ b/extractor/precomputation_test.cc @@ -9,6 +9,7 @@ #include "mocks/mock_data_array.h" #include "mocks/mock_suffix_array.h" +#include "mocks/mock_vocabulary.h" #include "precomputation.h" using namespace std; @@ -23,7 +24,31 @@ class PrecomputationTest : public Test { virtual void SetUp() { data = {4, 2, 3, 5, 7, 2, 3, 5, 2, 3, 4, 2, 1}; data_array = make_shared(); - EXPECT_CALL(*data_array, GetData()).WillRepeatedly(ReturnRef(data)); + EXPECT_CALL(*data_array, GetSize()).WillRepeatedly(Return(data.size())); + for (size_t i = 0; i < data.size(); ++i) { + EXPECT_CALL(*data_array, AtIndex(i)).WillRepeatedly(Return(data[i])); + } + vector> expected_calls = {{8, 1}, {8, 2}, {6, 1}}; + for (const auto& call: expected_calls) { + int start = call.first; + int size = call.second; + vector word_ids(data.begin() + start, data.begin() + start + size); + EXPECT_CALL(*data_array, GetWordIds(start, size)) + .WillRepeatedly(Return(word_ids)); + } + + expected_calls = {{1, 1}, {5, 1}, {8, 1}, {9, 1}, {5, 2}, + {6, 1}, {8, 2}, {1, 2}, {2, 1}, {11, 1}}; + for (const auto& call: expected_calls) { + int start = call.first; + int size = call.second; + vector words; + for (size_t j = start; j < start + size; ++j) { + words.push_back(to_string(data[j])); + } + EXPECT_CALL(*data_array, GetWords(start, size)) + .WillRepeatedly(Return(words)); + } vector suffixes{12, 8, 5, 1, 9, 6, 2, 0, 10, 7, 3, 4, 13}; vector lcp{-1, 0, 2, 3, 1, 0, 1, 2, 0, 2, 0, 1, 0, 0}; @@ -35,77 +60,98 @@ class PrecomputationTest : public Test { } EXPECT_CALL(*suffix_array, BuildLCPArray()).WillRepeatedly(Return(lcp)); - precomputation = Precomputation(suffix_array, 3, 3, 10, 5, 1, 4, 2); + vocabulary = make_shared(); + EXPECT_CALL(*vocabulary, GetTerminalIndex("2")).WillRepeatedly(Return(2)); + EXPECT_CALL(*vocabulary, GetTerminalIndex("3")).WillRepeatedly(Return(3)); + + precomputation = Precomputation(vocabulary, suffix_array, + 3, 3, 10, 5, 1, 4, 2); } vector data; shared_ptr data_array; shared_ptr suffix_array; + shared_ptr vocabulary; Precomputation precomputation; }; TEST_F(PrecomputationTest, TestCollocations) { - Index collocations = precomputation.GetCollocations(); - vector key = {2, 3, -1, 2}; vector expected_value = {1, 5, 1, 8, 5, 8, 5, 11, 8, 11}; - EXPECT_EQ(expected_value, collocations[key]); + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); key = {2, 3, -1, 2, 3}; expected_value = {1, 5, 1, 8, 5, 8}; - EXPECT_EQ(expected_value, collocations[key]); + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); key = {2, 3, -1, 3}; expected_value = {1, 6, 1, 9, 5, 9}; - EXPECT_EQ(expected_value, collocations[key]); + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); key = {3, -1, 2}; expected_value = {2, 5, 2, 8, 2, 11, 6, 8, 6, 11, 9, 11}; - EXPECT_EQ(expected_value, collocations[key]); + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); key = {3, -1, 3}; expected_value = {2, 6, 2, 9, 6, 9}; - EXPECT_EQ(expected_value, collocations[key]); + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); key = {3, -1, 2, 3}; expected_value = {2, 5, 2, 8, 6, 8}; - EXPECT_EQ(expected_value, collocations[key]); + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); key = {2, -1, 2}; expected_value = {1, 5, 1, 8, 5, 8, 5, 11, 8, 11}; - EXPECT_EQ(expected_value, collocations[key]); + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); key = {2, -1, 2, 3}; expected_value = {1, 5, 1, 8, 5, 8}; - EXPECT_EQ(expected_value, collocations[key]); + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); key = {2, -1, 3}; expected_value = {1, 6, 1, 9, 5, 9}; - EXPECT_EQ(expected_value, collocations[key]); + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); - key = {2, -1, 2, -2, 2}; + key = {2, -1, 2, -1, 2}; expected_value = {1, 5, 8, 5, 8, 11}; - EXPECT_EQ(expected_value, collocations[key]); - key = {2, -1, 2, -2, 3}; + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); + key = {2, -1, 2, -1, 3}; expected_value = {1, 5, 9}; - EXPECT_EQ(expected_value, collocations[key]); - key = {2, -1, 3, -2, 2}; + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); + key = {2, -1, 3, -1, 2}; expected_value = {1, 6, 8, 5, 9, 11}; - EXPECT_EQ(expected_value, collocations[key]); - key = {2, -1, 3, -2, 3}; + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); + key = {2, -1, 3, -1, 3}; expected_value = {1, 6, 9}; - EXPECT_EQ(expected_value, collocations[key]); - key = {3, -1, 2, -2, 2}; + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); + key = {3, -1, 2, -1, 2}; expected_value = {2, 5, 8, 2, 5, 11, 2, 8, 11, 6, 8, 11}; - EXPECT_EQ(expected_value, collocations[key]); - key = {3, -1, 2, -2, 3}; + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); + key = {3, -1, 2, -1, 3}; expected_value = {2, 5, 9}; - EXPECT_EQ(expected_value, collocations[key]); - key = {3, -1, 3, -2, 2}; + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); + key = {3, -1, 3, -1, 2}; expected_value = {2, 6, 8, 2, 6, 11, 2, 9, 11, 6, 9, 11}; - EXPECT_EQ(expected_value, collocations[key]); - key = {3, -1, 3, -2, 3}; + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); + key = {3, -1, 3, -1, 3}; expected_value = {2, 6, 9}; - EXPECT_EQ(expected_value, collocations[key]); + EXPECT_TRUE(precomputation.Contains(key)); + EXPECT_EQ(expected_value, precomputation.GetCollocations(key)); // Exceeds max_rule_symbols. - key = {2, -1, 2, -2, 2, 3}; - EXPECT_EQ(0, collocations.count(key)); + key = {2, -1, 2, -1, 2, 3}; + EXPECT_FALSE(precomputation.Contains(key)); // Contains non frequent pattern. key = {2, -1, 5}; - EXPECT_EQ(0, collocations.count(key)); + EXPECT_FALSE(precomputation.Contains(key)); } TEST_F(PrecomputationTest, TestSerialization) { -- cgit v1.2.3