From 02099a01350a41a99ec400e9b29df08a01d88979 Mon Sep 17 00:00:00 2001 From: Paul Baltescu Date: Tue, 4 Jun 2013 23:17:57 +0100 Subject: Serialize data structures. --- extractor/data_array_test.cc | 91 ++++++++++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 37 deletions(-) (limited to 'extractor/data_array_test.cc') diff --git a/extractor/data_array_test.cc b/extractor/data_array_test.cc index 71175fda..6c329e34 100644 --- a/extractor/data_array_test.cc +++ b/extractor/data_array_test.cc @@ -1,8 +1,11 @@ #include #include +#include #include +#include +#include #include #include "data_array.h" @@ -10,6 +13,7 @@ using namespace std; using namespace ::testing; namespace fs = boost::filesystem; +namespace ar = boost::archive; namespace extractor { namespace { @@ -18,12 +22,12 @@ class DataArrayTest : public Test { protected: virtual void SetUp() { string sample_test_file("sample_bitext.txt"); - source_data = make_shared(sample_test_file, SOURCE); - target_data = make_shared(sample_test_file, TARGET); + source_data = DataArray(sample_test_file, SOURCE); + target_data = DataArray(sample_test_file, TARGET); } - shared_ptr source_data; - shared_ptr target_data; + DataArray source_data; + DataArray target_data; }; TEST_F(DataArrayTest, TestGetData) { @@ -32,11 +36,11 @@ TEST_F(DataArrayTest, TestGetData) { "ana", "are", "mere", ".", "__END_OF_LINE__", "ana", "bea", "mult", "lapte", ".", "__END_OF_LINE__" }; - EXPECT_EQ(expected_source_data, source_data->GetData()); - EXPECT_EQ(expected_source_data.size(), source_data->GetSize()); + EXPECT_EQ(expected_source_data, source_data.GetData()); + EXPECT_EQ(expected_source_data.size(), source_data.GetSize()); for (size_t i = 0; i < expected_source_data.size(); ++i) { - EXPECT_EQ(expected_source_data[i], source_data->AtIndex(i)); - EXPECT_EQ(expected_source_words[i], source_data->GetWordAtIndex(i)); + EXPECT_EQ(expected_source_data[i], source_data.AtIndex(i)); + EXPECT_EQ(expected_source_words[i], source_data.GetWordAtIndex(i)); } vector expected_target_data = {2, 3, 4, 5, 1, 2, 6, 7, 8, 9, 10, 5, 1}; @@ -44,55 +48,68 @@ TEST_F(DataArrayTest, TestGetData) { "anna", "has", "apples", ".", "__END_OF_LINE__", "anna", "drinks", "a", "lot", "of", "milk", ".", "__END_OF_LINE__" }; - EXPECT_EQ(expected_target_data, target_data->GetData()); - EXPECT_EQ(expected_target_data.size(), target_data->GetSize()); + EXPECT_EQ(expected_target_data, target_data.GetData()); + EXPECT_EQ(expected_target_data.size(), target_data.GetSize()); for (size_t i = 0; i < expected_target_data.size(); ++i) { - EXPECT_EQ(expected_target_data[i], target_data->AtIndex(i)); - EXPECT_EQ(expected_target_words[i], target_data->GetWordAtIndex(i)); + EXPECT_EQ(expected_target_data[i], target_data.AtIndex(i)); + EXPECT_EQ(expected_target_words[i], target_data.GetWordAtIndex(i)); } } TEST_F(DataArrayTest, TestVocabulary) { - EXPECT_EQ(9, source_data->GetVocabularySize()); - EXPECT_TRUE(source_data->HasWord("mere")); - EXPECT_EQ(4, source_data->GetWordId("mere")); - EXPECT_EQ("mere", source_data->GetWord(4)); - EXPECT_FALSE(source_data->HasWord("banane")); - - EXPECT_EQ(11, target_data->GetVocabularySize()); - EXPECT_TRUE(target_data->HasWord("apples")); - EXPECT_EQ(4, target_data->GetWordId("apples")); - EXPECT_EQ("apples", target_data->GetWord(4)); - EXPECT_FALSE(target_data->HasWord("bananas")); + EXPECT_EQ(9, source_data.GetVocabularySize()); + EXPECT_TRUE(source_data.HasWord("mere")); + EXPECT_EQ(4, source_data.GetWordId("mere")); + EXPECT_EQ("mere", source_data.GetWord(4)); + EXPECT_FALSE(source_data.HasWord("banane")); + + EXPECT_EQ(11, target_data.GetVocabularySize()); + EXPECT_TRUE(target_data.HasWord("apples")); + EXPECT_EQ(4, target_data.GetWordId("apples")); + EXPECT_EQ("apples", target_data.GetWord(4)); + EXPECT_FALSE(target_data.HasWord("bananas")); } TEST_F(DataArrayTest, TestSentenceData) { - EXPECT_EQ(2, source_data->GetNumSentences()); - EXPECT_EQ(0, source_data->GetSentenceStart(0)); - EXPECT_EQ(5, source_data->GetSentenceStart(1)); - EXPECT_EQ(11, source_data->GetSentenceStart(2)); + EXPECT_EQ(2, source_data.GetNumSentences()); + EXPECT_EQ(0, source_data.GetSentenceStart(0)); + EXPECT_EQ(5, source_data.GetSentenceStart(1)); + EXPECT_EQ(11, source_data.GetSentenceStart(2)); - EXPECT_EQ(4, source_data->GetSentenceLength(0)); - EXPECT_EQ(5, source_data->GetSentenceLength(1)); + EXPECT_EQ(4, source_data.GetSentenceLength(0)); + EXPECT_EQ(5, source_data.GetSentenceLength(1)); vector expected_source_ids = {0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1}; for (size_t i = 0; i < expected_source_ids.size(); ++i) { - EXPECT_EQ(expected_source_ids[i], source_data->GetSentenceId(i)); + EXPECT_EQ(expected_source_ids[i], source_data.GetSentenceId(i)); } - EXPECT_EQ(2, target_data->GetNumSentences()); - EXPECT_EQ(0, target_data->GetSentenceStart(0)); - EXPECT_EQ(5, target_data->GetSentenceStart(1)); - EXPECT_EQ(13, target_data->GetSentenceStart(2)); + EXPECT_EQ(2, target_data.GetNumSentences()); + EXPECT_EQ(0, target_data.GetSentenceStart(0)); + EXPECT_EQ(5, target_data.GetSentenceStart(1)); + EXPECT_EQ(13, target_data.GetSentenceStart(2)); - EXPECT_EQ(4, target_data->GetSentenceLength(0)); - EXPECT_EQ(7, target_data->GetSentenceLength(1)); + EXPECT_EQ(4, target_data.GetSentenceLength(0)); + EXPECT_EQ(7, target_data.GetSentenceLength(1)); vector expected_target_ids = {0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}; for (size_t i = 0; i < expected_target_ids.size(); ++i) { - EXPECT_EQ(expected_target_ids[i], target_data->GetSentenceId(i)); + EXPECT_EQ(expected_target_ids[i], target_data.GetSentenceId(i)); } } +TEST_F(DataArrayTest, TestSerialization) { + stringstream stream(ios_base::binary | ios_base::out | ios_base::in); + ar::binary_oarchive output_stream(stream, ar::no_header); + output_stream << source_data << target_data; + + DataArray source_copy, target_copy; + ar::binary_iarchive input_stream(stream, ar::no_header); + input_stream >> source_copy >> target_copy; + + EXPECT_EQ(source_data, source_copy); + EXPECT_EQ(target_data, target_copy); +} + } // namespace } // namespace extractor -- cgit v1.2.3