1 files changed, 22 insertions, 1 deletions
diff --git a/extractor/vocabulary.h b/extractor/vocabulary.h
index c8fd9411..db092e99 100644
--- a/extractor/vocabulary.h
+++ b/extractor/vocabulary.h
@@ -5,6 +5,10 @@
 #include <unordered_map>
 #include <vector>
 
+#include <boost/serialization/serialization.hpp>
+#include <boost/serialization/string.hpp>
+#include <boost/serialization/vector.hpp>
+
 using namespace std;
 
 namespace extractor {
@@ -14,7 +18,7 @@ namespace extractor {
  *
  * This strucure contains words located in the frequent collocations and words
  * encountered during the grammar extraction time. This dictionary is
- * considerably smaller than the dictionaries in the data arrays (and so is the
+ * considerably smaller than the dictionaries in the data arays (and so is the
  * query time). Note that this is the single data structure that changes state
  * and needs to have thread safe read/write operations.
  *
@@ -38,7 +42,24 @@ class Vocabulary {
   // Returns the word corresponding to the given word id.
   virtual string GetTerminalValue(int symbol);
 
+  bool operator==(const Vocabulary& vocabulary) const;
+
  private:
+  friend class boost::serialization::access;
+
+  template<class Archive> void save(Archive& ar, unsigned int) const {
+    ar << words;
+  }
+
+  template<class Archive> void load(Archive& ar, unsigned int) {
+    ar >> words;
+    for (size_t i = 0; i < words.size(); ++i) {
+      dictionary[words[i]] = i;
+    }
+  }
+
+  BOOST_SERIALIZATION_SPLIT_MEMBER();
+
   unordered_map<string, int> dictionary;
   vector<string> words;
 };