summaryrefslogtreecommitdiff
path: root/extractor/data_array.h
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-06-04 23:17:57 +0100
committerPaul Baltescu <pauldb89@gmail.com>2013-06-04 23:17:57 +0100
commit02099a01350a41a99ec400e9b29df08a01d88979 (patch)
treeb5af245aa6c7e00e3d4b088c9e7e3b893d080a26 /extractor/data_array.h
parent891502bdd646c21954684763f322b107568a072e (diff)
Serialize data structures.
Diffstat (limited to 'extractor/data_array.h')
-rw-r--r--extractor/data_array.h38
1 files changed, 30 insertions, 8 deletions
diff --git a/extractor/data_array.h b/extractor/data_array.h
index 978a6931..2be6a09c 100644
--- a/extractor/data_array.h
+++ b/extractor/data_array.h
@@ -6,6 +6,10 @@
#include <vector>
#include <boost/filesystem.hpp>
+#include <boost/serialization/serialization.hpp>
+#include <boost/serialization/split_member.hpp>
+#include <boost/serialization/string.hpp>
+#include <boost/serialization/vector.hpp>
namespace fs = boost::filesystem;
using namespace std;
@@ -43,6 +47,9 @@ class DataArray {
// Reads data array from bitext file where the sentences are separated by |||.
DataArray(const string& filename, const Side& side);
+ // Creates empty data array.
+ DataArray();
+
virtual ~DataArray();
// Returns a vector containing the word ids.
@@ -82,14 +89,7 @@ class DataArray {
// Returns the number of the sentence containing the given position.
virtual int GetSentenceId(int position) const;
- // Writes data array to file in binary format.
- void WriteBinary(const fs::path& filepath) const;
-
- // Writes data array to file in binary format.
- void WriteBinary(FILE* file) const;
-
- protected:
- DataArray();
+ bool operator==(const DataArray& other) const;
private:
// Sets up specific constants.
@@ -98,6 +98,28 @@ class DataArray {
// Constructs the data array.
void CreateDataArray(const vector<string>& lines);
+ friend class boost::serialization::access;
+
+ template<class Archive> void save(Archive& ar, unsigned int) const {
+ ar << id2word;
+ ar << data;
+ ar << sentence_id;
+ ar << sentence_start;
+ }
+
+ template<class Archive> void load(Archive& ar, unsigned int) {
+ ar >> id2word;
+ for (size_t i = 0; i < id2word.size(); ++i) {
+ word2id[id2word[i]] = i;
+ }
+
+ ar >> data;
+ ar >> sentence_id;
+ ar >> sentence_start;
+ }
+
+ BOOST_SERIALIZATION_SPLIT_MEMBER();
+
unordered_map<string, int> word2id;
vector<string> id2word;
vector<int> data;