summaryrefslogtreecommitdiff
path: root/extractor/data_array.h
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2013-06-24 14:40:07 +0200
committerPatrick Simianer <p@simianer.de>2013-06-24 14:40:07 +0200
commite547ab5f765c72ad326b1d3a79f26bb221364d7d (patch)
treee205609de0adce98bdf4ec4e799cd776cebe8b72 /extractor/data_array.h
parentbecb1347773ebaae8cab2669afe4bad048cda992 (diff)
parent5794c0109902cf19a52cc8f1799353270ed9d85d (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'extractor/data_array.h')
-rw-r--r--extractor/data_array.h38
1 files changed, 30 insertions, 8 deletions
diff --git a/extractor/data_array.h b/extractor/data_array.h
index 978a6931..2be6a09c 100644
--- a/extractor/data_array.h
+++ b/extractor/data_array.h
@@ -6,6 +6,10 @@
#include <vector>
#include <boost/filesystem.hpp>
+#include <boost/serialization/serialization.hpp>
+#include <boost/serialization/split_member.hpp>
+#include <boost/serialization/string.hpp>
+#include <boost/serialization/vector.hpp>
namespace fs = boost::filesystem;
using namespace std;
@@ -43,6 +47,9 @@ class DataArray {
// Reads data array from bitext file where the sentences are separated by |||.
DataArray(const string& filename, const Side& side);
+ // Creates empty data array.
+ DataArray();
+
virtual ~DataArray();
// Returns a vector containing the word ids.
@@ -82,14 +89,7 @@ class DataArray {
// Returns the number of the sentence containing the given position.
virtual int GetSentenceId(int position) const;
- // Writes data array to file in binary format.
- void WriteBinary(const fs::path& filepath) const;
-
- // Writes data array to file in binary format.
- void WriteBinary(FILE* file) const;
-
- protected:
- DataArray();
+ bool operator==(const DataArray& other) const;
private:
// Sets up specific constants.
@@ -98,6 +98,28 @@ class DataArray {
// Constructs the data array.
void CreateDataArray(const vector<string>& lines);
+ friend class boost::serialization::access;
+
+ template<class Archive> void save(Archive& ar, unsigned int) const {
+ ar << id2word;
+ ar << data;
+ ar << sentence_id;
+ ar << sentence_start;
+ }
+
+ template<class Archive> void load(Archive& ar, unsigned int) {
+ ar >> id2word;
+ for (size_t i = 0; i < id2word.size(); ++i) {
+ word2id[id2word[i]] = i;
+ }
+
+ ar >> data;
+ ar >> sentence_id;
+ ar >> sentence_start;
+ }
+
+ BOOST_SERIALIZATION_SPLIT_MEMBER();
+
unordered_map<string, int> word2id;
vector<string> id2word;
vector<int> data;