diff options
author | Patrick Simianer <p@simianer.de> | 2013-06-24 14:40:07 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2013-06-24 14:40:07 +0200 |
commit | e547ab5f765c72ad326b1d3a79f26bb221364d7d (patch) | |
tree | e205609de0adce98bdf4ec4e799cd776cebe8b72 /extractor/data_array.h | |
parent | becb1347773ebaae8cab2669afe4bad048cda992 (diff) | |
parent | 5794c0109902cf19a52cc8f1799353270ed9d85d (diff) |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'extractor/data_array.h')
-rw-r--r-- | extractor/data_array.h | 38 |
1 files changed, 30 insertions, 8 deletions
diff --git a/extractor/data_array.h b/extractor/data_array.h index 978a6931..2be6a09c 100644 --- a/extractor/data_array.h +++ b/extractor/data_array.h @@ -6,6 +6,10 @@ #include <vector> #include <boost/filesystem.hpp> +#include <boost/serialization/serialization.hpp> +#include <boost/serialization/split_member.hpp> +#include <boost/serialization/string.hpp> +#include <boost/serialization/vector.hpp> namespace fs = boost::filesystem; using namespace std; @@ -43,6 +47,9 @@ class DataArray { // Reads data array from bitext file where the sentences are separated by |||. DataArray(const string& filename, const Side& side); + // Creates empty data array. + DataArray(); + virtual ~DataArray(); // Returns a vector containing the word ids. @@ -82,14 +89,7 @@ class DataArray { // Returns the number of the sentence containing the given position. virtual int GetSentenceId(int position) const; - // Writes data array to file in binary format. - void WriteBinary(const fs::path& filepath) const; - - // Writes data array to file in binary format. - void WriteBinary(FILE* file) const; - - protected: - DataArray(); + bool operator==(const DataArray& other) const; private: // Sets up specific constants. @@ -98,6 +98,28 @@ class DataArray { // Constructs the data array. void CreateDataArray(const vector<string>& lines); + friend class boost::serialization::access; + + template<class Archive> void save(Archive& ar, unsigned int) const { + ar << id2word; + ar << data; + ar << sentence_id; + ar << sentence_start; + } + + template<class Archive> void load(Archive& ar, unsigned int) { + ar >> id2word; + for (size_t i = 0; i < id2word.size(); ++i) { + word2id[id2word[i]] = i; + } + + ar >> data; + ar >> sentence_id; + ar >> sentence_start; + } + + BOOST_SERIALIZATION_SPLIT_MEMBER(); + unordered_map<string, int> word2id; vector<string> id2word; vector<int> data; |