From 4679f78193a826201055ce7bb2e01b5ad64bf04b Mon Sep 17 00:00:00 2001 From: Paul Baltescu Date: Fri, 22 Feb 2013 11:02:10 +0000 Subject: Updated unit tests for data array. --- extractor/data_array.h | 6 +++--- extractor/data_array_test.cc | 30 ++++++++++++++++++++++++++++-- python/src/sa/_sa.c | 2 +- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/extractor/data_array.h b/extractor/data_array.h index 7c120b3c..96950789 100644 --- a/extractor/data_array.h +++ b/extractor/data_array.h @@ -15,6 +15,9 @@ enum Side { TARGET }; +// TODO: This class has features for both the source and target data arrays. +// Maybe we can save some memory by having more specific implementations (e.g. +// sentence_id is only needed for the source data array). class DataArray { public: static int NULL_WORD; @@ -48,7 +51,6 @@ class DataArray { virtual int GetSentenceStart(int position) const; - //TODO(pauldb): Add unit tests. virtual int GetSentenceLength(int sentence_id) const; virtual int GetSentenceId(int position) const; @@ -67,8 +69,6 @@ class DataArray { unordered_map word2id; vector id2word; vector data; - // TODO(pauldb): We only need sentence_id for the source language. Maybe we - // can save some memory here. vector sentence_id; vector sentence_start; }; diff --git a/extractor/data_array_test.cc b/extractor/data_array_test.cc index 772ba10e..ba5ce09e 100644 --- a/extractor/data_array_test.cc +++ b/extractor/data_array_test.cc @@ -26,18 +26,28 @@ class DataArrayTest : public Test { }; TEST_F(DataArrayTest, TestGetData) { - vector expected_source_data{2, 3, 4, 5, 1, 2, 6, 7, 8, 5, 1}; + vector expected_source_data = {2, 3, 4, 5, 1, 2, 6, 7, 8, 5, 1}; + vector expected_source_words = { + "ana", "are", "mere", ".", "__END_OF_LINE__", + "ana", "bea", "mult", "lapte", ".", "__END_OF_LINE__" + }; EXPECT_EQ(expected_source_data, source_data->GetData()); EXPECT_EQ(expected_source_data.size(), source_data->GetSize()); for (size_t i = 0; i < expected_source_data.size(); ++i) { EXPECT_EQ(expected_source_data[i], source_data->AtIndex(i)); + EXPECT_EQ(expected_source_words[i], source_data->GetWordAtIndex(i)); } - vector expected_target_data{2, 3, 4, 5, 1, 2, 6, 7, 8, 9, 10, 5, 1}; + vector expected_target_data = {2, 3, 4, 5, 1, 2, 6, 7, 8, 9, 10, 5, 1}; + vector expected_target_words = { + "anna", "has", "apples", ".", "__END_OF_LINE__", + "anna", "drinks", "a", "lot", "of", "milk", ".", "__END_OF_LINE__" + }; EXPECT_EQ(expected_target_data, target_data->GetData()); EXPECT_EQ(expected_target_data.size(), target_data->GetSize()); for (size_t i = 0; i < expected_target_data.size(); ++i) { EXPECT_EQ(expected_target_data[i], target_data->AtIndex(i)); + EXPECT_EQ(expected_target_words[i], target_data->GetWordAtIndex(i)); } } @@ -61,10 +71,26 @@ TEST_F(DataArrayTest, TestSentenceData) { EXPECT_EQ(5, source_data->GetSentenceStart(1)); EXPECT_EQ(11, source_data->GetSentenceStart(2)); + EXPECT_EQ(4, source_data->GetSentenceLength(0)); + EXPECT_EQ(5, source_data->GetSentenceLength(1)); + + vector expected_source_ids = {0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1}; + for (size_t i = 0; i < expected_source_ids.size(); ++i) { + EXPECT_EQ(expected_source_ids[i], source_data->GetSentenceId(i)); + } + EXPECT_EQ(2, target_data->GetNumSentences()); EXPECT_EQ(0, target_data->GetSentenceStart(0)); EXPECT_EQ(5, target_data->GetSentenceStart(1)); EXPECT_EQ(13, target_data->GetSentenceStart(2)); + + EXPECT_EQ(4, target_data->GetSentenceLength(0)); + EXPECT_EQ(7, target_data->GetSentenceLength(1)); + + vector expected_target_ids = {0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}; + for (size_t i = 0; i < expected_target_ids.size(); ++i) { + EXPECT_EQ(expected_target_ids[i], target_data->GetSentenceId(i)); + } } } // namespace diff --git a/python/src/sa/_sa.c b/python/src/sa/_sa.c index 17957593..55a13ed3 100644 --- a/python/src/sa/_sa.c +++ b/python/src/sa/_sa.c @@ -1,4 +1,4 @@ -/* Generated by Cython 0.17.1 on Thu Feb 21 14:13:02 2013 */ +/* Generated by Cython 0.17.1 on Thu Feb 21 22:29:49 2013 */ #define PY_SSIZE_T_CLEAN #include "Python.h" -- cgit v1.2.3