From 02099a01350a41a99ec400e9b29df08a01d88979 Mon Sep 17 00:00:00 2001 From: Paul Baltescu Date: Tue, 4 Jun 2013 23:17:57 +0100 Subject: Serialize data structures. --- extractor/data_array.h | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) (limited to 'extractor/data_array.h') diff --git a/extractor/data_array.h b/extractor/data_array.h index 978a6931..2be6a09c 100644 --- a/extractor/data_array.h +++ b/extractor/data_array.h @@ -6,6 +6,10 @@ #include #include +#include +#include +#include +#include namespace fs = boost::filesystem; using namespace std; @@ -43,6 +47,9 @@ class DataArray { // Reads data array from bitext file where the sentences are separated by |||. DataArray(const string& filename, const Side& side); + // Creates empty data array. + DataArray(); + virtual ~DataArray(); // Returns a vector containing the word ids. @@ -82,14 +89,7 @@ class DataArray { // Returns the number of the sentence containing the given position. virtual int GetSentenceId(int position) const; - // Writes data array to file in binary format. - void WriteBinary(const fs::path& filepath) const; - - // Writes data array to file in binary format. - void WriteBinary(FILE* file) const; - - protected: - DataArray(); + bool operator==(const DataArray& other) const; private: // Sets up specific constants. @@ -98,6 +98,28 @@ class DataArray { // Constructs the data array. void CreateDataArray(const vector& lines); + friend class boost::serialization::access; + + template void save(Archive& ar, unsigned int) const { + ar << id2word; + ar << data; + ar << sentence_id; + ar << sentence_start; + } + + template void load(Archive& ar, unsigned int) { + ar >> id2word; + for (size_t i = 0; i < id2word.size(); ++i) { + word2id[id2word[i]] = i; + } + + ar >> data; + ar >> sentence_id; + ar >> sentence_start; + } + + BOOST_SERIALIZATION_SPLIT_MEMBER(); + unordered_map word2id; vector id2word; vector data; -- cgit v1.2.3