summaryrefslogtreecommitdiff
path: root/extractor/vocabulary.h
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-11-26 01:14:28 +0000
committerPaul Baltescu <pauldb89@gmail.com>2013-11-26 01:31:40 +0000
commit1cd86c44e1799c441cdcda2a022be0ee6e52d38c (patch)
tree226777c6b734bbe8bf9ad0d4c941df198c72f2d1 /extractor/vocabulary.h
parent6bdb362473cf0ee1c636ca0c3f4cca63d82a5573 (diff)
Serialize vocabulary.
Diffstat (limited to 'extractor/vocabulary.h')
-rw-r--r--extractor/vocabulary.h23
1 files changed, 22 insertions, 1 deletions
diff --git a/extractor/vocabulary.h b/extractor/vocabulary.h
index c8fd9411..db092e99 100644
--- a/extractor/vocabulary.h
+++ b/extractor/vocabulary.h
@@ -5,6 +5,10 @@
#include <unordered_map>
#include <vector>
+#include <boost/serialization/serialization.hpp>
+#include <boost/serialization/string.hpp>
+#include <boost/serialization/vector.hpp>
+
using namespace std;
namespace extractor {
@@ -14,7 +18,7 @@ namespace extractor {
*
* This strucure contains words located in the frequent collocations and words
* encountered during the grammar extraction time. This dictionary is
- * considerably smaller than the dictionaries in the data arrays (and so is the
+ * considerably smaller than the dictionaries in the data arays (and so is the
* query time). Note that this is the single data structure that changes state
* and needs to have thread safe read/write operations.
*
@@ -38,7 +42,24 @@ class Vocabulary {
// Returns the word corresponding to the given word id.
virtual string GetTerminalValue(int symbol);
+ bool operator==(const Vocabulary& vocabulary) const;
+
private:
+ friend class boost::serialization::access;
+
+ template<class Archive> void save(Archive& ar, unsigned int) const {
+ ar << words;
+ }
+
+ template<class Archive> void load(Archive& ar, unsigned int) {
+ ar >> words;
+ for (size_t i = 0; i < words.size(); ++i) {
+ dictionary[words[i]] = i;
+ }
+ }
+
+ BOOST_SERIALIZATION_SPLIT_MEMBER();
+
unordered_map<string, int> dictionary;
vector<string> words;
};