diff options
Diffstat (limited to 'extractor/suffix_array_sampler_test.cc')
-rw-r--r-- | extractor/suffix_array_sampler_test.cc | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/extractor/suffix_array_sampler_test.cc b/extractor/suffix_array_sampler_test.cc new file mode 100644 index 00000000..4b88c027 --- /dev/null +++ b/extractor/suffix_array_sampler_test.cc @@ -0,0 +1,114 @@ +#include <gtest/gtest.h> + +#include <memory> + +#include "mocks/mock_data_array.h" +#include "mocks/mock_suffix_array.h" +#include "suffix_array_sampler.h" + +using namespace std; +using namespace ::testing; + +namespace extractor { +namespace { + +class SuffixArraySamplerTest : public Test { + protected: + virtual void SetUp() { + data_array = make_shared<MockDataArray>(); + for (int i = 0; i < 10; ++i) { + EXPECT_CALL(*data_array, GetSentenceId(i)).WillRepeatedly(Return(i)); + } + + suffix_array = make_shared<MockSuffixArray>(); + EXPECT_CALL(*suffix_array, GetData()).WillRepeatedly(Return(data_array)); + for (int i = 0; i < 10; ++i) { + EXPECT_CALL(*suffix_array, GetSuffix(i)).WillRepeatedly(Return(i)); + } + } + + shared_ptr<MockDataArray> data_array; + shared_ptr<MockSuffixArray> suffix_array; +}; + +TEST_F(SuffixArraySamplerTest, TestSample) { + PhraseLocation location(0, 10); + unordered_set<int> blacklisted_sentence_ids; + + SuffixArrayRangeSampler sampler(suffix_array, 1); + vector<int> expected_locations = {0}; + EXPECT_EQ(PhraseLocation(expected_locations, 1), + sampler.Sample(location, blacklisted_sentence_ids)); + + sampler = SuffixArrayRangeSampler(suffix_array, 2); + expected_locations = {0, 5}; + EXPECT_EQ(PhraseLocation(expected_locations, 1), + sampler.Sample(location, blacklisted_sentence_ids)); + + sampler = SuffixArrayRangeSampler(suffix_array, 3); + expected_locations = {0, 3, 7}; + EXPECT_EQ(PhraseLocation(expected_locations, 1), + sampler.Sample(location, blacklisted_sentence_ids)); + + sampler = SuffixArrayRangeSampler(suffix_array, 4); + expected_locations = {0, 3, 5, 8}; + EXPECT_EQ(PhraseLocation(expected_locations, 1), + sampler.Sample(location, blacklisted_sentence_ids)); + + sampler = SuffixArrayRangeSampler(suffix_array, 100); + expected_locations = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + EXPECT_EQ(PhraseLocation(expected_locations, 1), + sampler.Sample(location, blacklisted_sentence_ids)); +} + +TEST_F(SuffixArraySamplerTest, TestBackoffSample) { + PhraseLocation location(0, 10); + + SuffixArrayRangeSampler sampler(suffix_array, 1); + unordered_set<int> blacklisted_sentence_ids = {0}; + vector<int> expected_locations = {1}; + EXPECT_EQ(PhraseLocation(expected_locations, 1), + sampler.Sample(location, blacklisted_sentence_ids)); + + blacklisted_sentence_ids = {0, 1, 2, 3, 4, 5, 6, 7, 8}; + expected_locations = {9}; + EXPECT_EQ(PhraseLocation(expected_locations, 1), + sampler.Sample(location, blacklisted_sentence_ids)); + + sampler = SuffixArrayRangeSampler(suffix_array, 2); + blacklisted_sentence_ids = {0, 5}; + expected_locations = {1, 4}; + EXPECT_EQ(PhraseLocation(expected_locations, 1), + sampler.Sample(location, blacklisted_sentence_ids)); + + blacklisted_sentence_ids = {0, 1, 2, 3}; + expected_locations = {4, 5}; + EXPECT_EQ(PhraseLocation(expected_locations, 1), + sampler.Sample(location, blacklisted_sentence_ids)); + + sampler = SuffixArrayRangeSampler(suffix_array, 3); + blacklisted_sentence_ids = {0, 3, 7}; + expected_locations = {1, 2, 6}; + EXPECT_EQ(PhraseLocation(expected_locations, 1), + sampler.Sample(location, blacklisted_sentence_ids)); + + sampler = SuffixArrayRangeSampler(suffix_array, 4); + blacklisted_sentence_ids = {0, 3, 5, 8}; + expected_locations = {1, 2, 4, 7}; + EXPECT_EQ(PhraseLocation(expected_locations, 1), + sampler.Sample(location, blacklisted_sentence_ids)); + + sampler = SuffixArrayRangeSampler(suffix_array, 100); + blacklisted_sentence_ids = {0}; + expected_locations = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + EXPECT_EQ(PhraseLocation(expected_locations, 1), + sampler.Sample(location, blacklisted_sentence_ids)); + + blacklisted_sentence_ids = {9}; + expected_locations = {0, 1, 2, 3, 4, 5, 6, 7, 8}; + EXPECT_EQ(PhraseLocation(expected_locations, 1), + sampler.Sample(location, blacklisted_sentence_ids)); +} + +} +} // namespace extractor |