From 02099a01350a41a99ec400e9b29df08a01d88979 Mon Sep 17 00:00:00 2001
From: Paul Baltescu <pauldb89@gmail.com>
Date: Tue, 4 Jun 2013 23:17:57 +0100
Subject: Serialize data structures.

---
 extractor/data_array_test.cc | 91 ++++++++++++++++++++++++++------------------
 1 file changed, 54 insertions(+), 37 deletions(-)

(limited to 'extractor/data_array_test.cc')

diff --git a/extractor/data_array_test.cc b/extractor/data_array_test.cc
index 71175fda..6c329e34 100644
--- a/extractor/data_array_test.cc
+++ b/extractor/data_array_test.cc
@@ -1,8 +1,11 @@
 #include <gtest/gtest.h>
 
 #include <memory>
+#include <sstream>
 #include <string>
 
+#include <boost/archive/binary_iarchive.hpp>
+#include <boost/archive/binary_oarchive.hpp>
 #include <boost/filesystem.hpp>
 
 #include "data_array.h"
@@ -10,6 +13,7 @@
 using namespace std;
 using namespace ::testing;
 namespace fs = boost::filesystem;
+namespace ar = boost::archive;
 
 namespace extractor {
 namespace {
@@ -18,12 +22,12 @@ class DataArrayTest : public Test {
  protected:
   virtual void SetUp() {
     string sample_test_file("sample_bitext.txt");
-    source_data = make_shared<DataArray>(sample_test_file, SOURCE);
-    target_data = make_shared<DataArray>(sample_test_file, TARGET);
+    source_data = DataArray(sample_test_file, SOURCE);
+    target_data = DataArray(sample_test_file, TARGET);
   }
 
-  shared_ptr<DataArray> source_data;
-  shared_ptr<DataArray> target_data;
+  DataArray source_data;
+  DataArray target_data;
 };
 
 TEST_F(DataArrayTest, TestGetData) {
@@ -32,11 +36,11 @@ TEST_F(DataArrayTest, TestGetData) {
       "ana", "are", "mere", ".", "__END_OF_LINE__",
       "ana", "bea", "mult", "lapte", ".", "__END_OF_LINE__"
   };
-  EXPECT_EQ(expected_source_data, source_data->GetData());
-  EXPECT_EQ(expected_source_data.size(), source_data->GetSize());
+  EXPECT_EQ(expected_source_data, source_data.GetData());
+  EXPECT_EQ(expected_source_data.size(), source_data.GetSize());
   for (size_t i = 0; i < expected_source_data.size(); ++i) {
-    EXPECT_EQ(expected_source_data[i], source_data->AtIndex(i));
-    EXPECT_EQ(expected_source_words[i], source_data->GetWordAtIndex(i));
+    EXPECT_EQ(expected_source_data[i], source_data.AtIndex(i));
+    EXPECT_EQ(expected_source_words[i], source_data.GetWordAtIndex(i));
   }
 
   vector<int> expected_target_data = {2, 3, 4, 5, 1, 2, 6, 7, 8, 9, 10, 5, 1};
@@ -44,55 +48,68 @@ TEST_F(DataArrayTest, TestGetData) {
       "anna", "has", "apples", ".", "__END_OF_LINE__",
       "anna", "drinks", "a", "lot", "of", "milk", ".", "__END_OF_LINE__"
   };
-  EXPECT_EQ(expected_target_data, target_data->GetData());
-  EXPECT_EQ(expected_target_data.size(), target_data->GetSize());
+  EXPECT_EQ(expected_target_data, target_data.GetData());
+  EXPECT_EQ(expected_target_data.size(), target_data.GetSize());
   for (size_t i = 0; i < expected_target_data.size(); ++i) {
-    EXPECT_EQ(expected_target_data[i], target_data->AtIndex(i));
-    EXPECT_EQ(expected_target_words[i], target_data->GetWordAtIndex(i));
+    EXPECT_EQ(expected_target_data[i], target_data.AtIndex(i));
+    EXPECT_EQ(expected_target_words[i], target_data.GetWordAtIndex(i));
   }
 }
 
 TEST_F(DataArrayTest, TestVocabulary) {
-  EXPECT_EQ(9, source_data->GetVocabularySize());
-  EXPECT_TRUE(source_data->HasWord("mere"));
-  EXPECT_EQ(4, source_data->GetWordId("mere"));
-  EXPECT_EQ("mere", source_data->GetWord(4));
-  EXPECT_FALSE(source_data->HasWord("banane"));
-
-  EXPECT_EQ(11, target_data->GetVocabularySize());
-  EXPECT_TRUE(target_data->HasWord("apples"));
-  EXPECT_EQ(4, target_data->GetWordId("apples"));
-  EXPECT_EQ("apples", target_data->GetWord(4));
-  EXPECT_FALSE(target_data->HasWord("bananas"));
+  EXPECT_EQ(9, source_data.GetVocabularySize());
+  EXPECT_TRUE(source_data.HasWord("mere"));
+  EXPECT_EQ(4, source_data.GetWordId("mere"));
+  EXPECT_EQ("mere", source_data.GetWord(4));
+  EXPECT_FALSE(source_data.HasWord("banane"));
+
+  EXPECT_EQ(11, target_data.GetVocabularySize());
+  EXPECT_TRUE(target_data.HasWord("apples"));
+  EXPECT_EQ(4, target_data.GetWordId("apples"));
+  EXPECT_EQ("apples", target_data.GetWord(4));
+  EXPECT_FALSE(target_data.HasWord("bananas"));
 }
 
 TEST_F(DataArrayTest, TestSentenceData) {
-  EXPECT_EQ(2, source_data->GetNumSentences());
-  EXPECT_EQ(0, source_data->GetSentenceStart(0));
-  EXPECT_EQ(5, source_data->GetSentenceStart(1));
-  EXPECT_EQ(11, source_data->GetSentenceStart(2));
+  EXPECT_EQ(2, source_data.GetNumSentences());
+  EXPECT_EQ(0, source_data.GetSentenceStart(0));
+  EXPECT_EQ(5, source_data.GetSentenceStart(1));
+  EXPECT_EQ(11, source_data.GetSentenceStart(2));
 
-  EXPECT_EQ(4, source_data->GetSentenceLength(0));
-  EXPECT_EQ(5, source_data->GetSentenceLength(1));
+  EXPECT_EQ(4, source_data.GetSentenceLength(0));
+  EXPECT_EQ(5, source_data.GetSentenceLength(1));
 
   vector<int> expected_source_ids = {0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1};
   for (size_t i = 0; i < expected_source_ids.size(); ++i) {
-    EXPECT_EQ(expected_source_ids[i], source_data->GetSentenceId(i));
+    EXPECT_EQ(expected_source_ids[i], source_data.GetSentenceId(i));
   }
 
-  EXPECT_EQ(2, target_data->GetNumSentences());
-  EXPECT_EQ(0, target_data->GetSentenceStart(0));
-  EXPECT_EQ(5, target_data->GetSentenceStart(1));
-  EXPECT_EQ(13, target_data->GetSentenceStart(2));
+  EXPECT_EQ(2, target_data.GetNumSentences());
+  EXPECT_EQ(0, target_data.GetSentenceStart(0));
+  EXPECT_EQ(5, target_data.GetSentenceStart(1));
+  EXPECT_EQ(13, target_data.GetSentenceStart(2));
 
-  EXPECT_EQ(4, target_data->GetSentenceLength(0));
-  EXPECT_EQ(7, target_data->GetSentenceLength(1));
+  EXPECT_EQ(4, target_data.GetSentenceLength(0));
+  EXPECT_EQ(7, target_data.GetSentenceLength(1));
 
   vector<int> expected_target_ids = {0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1};
   for (size_t i = 0; i < expected_target_ids.size(); ++i) {
-    EXPECT_EQ(expected_target_ids[i], target_data->GetSentenceId(i));
+    EXPECT_EQ(expected_target_ids[i], target_data.GetSentenceId(i));
   }
 }
 
+TEST_F(DataArrayTest, TestSerialization) {
+  stringstream stream(ios_base::binary | ios_base::out | ios_base::in);
+  ar::binary_oarchive output_stream(stream, ar::no_header);
+  output_stream << source_data << target_data;
+
+  DataArray source_copy, target_copy;
+  ar::binary_iarchive input_stream(stream, ar::no_header);
+  input_stream >> source_copy >> target_copy;
+
+  EXPECT_EQ(source_data, source_copy);
+  EXPECT_EQ(target_data, target_copy);
+}
+
 } // namespace
 } // namespace extractor
-- 
cgit v1.2.3