Merge branch 'master' of github.com:redpony/cdec

author: Chris Dyer <cdyer@cs.cmu.edu> 2011-07-05 23:19:54 -0400
committer: Chris Dyer <cdyer@cs.cmu.edu> 2011-07-05 23:19:54 -0400
commit: c3e46171f722f6276e2613ea6cb087b07325d794 (patch)
tree: 4a289539c4e7a972009dc2f1b680004b959547df /klm/lm/model.hh
parent: f91319978f6e74e5c4e5701da8fbbacb96a3161e (diff)
parent: 59932be2de387ecfcaa81a8387e8f21d5123c050 (diff)
1 files changed, 14 insertions, 7 deletions
diff --git a/klm/lm/model.hh b/klm/lm/model.hh
index b85ccdcc..1f49a382 100644
--- a/klm/lm/model.hh
+++ b/klm/lm/model.hh
@@ -5,6 +5,7 @@
 #include "lm/config.hh"
 #include "lm/facade.hh"
 #include "lm/max_order.hh"
+#include "lm/quantize.hh"
 #include "lm/search_hashed.hh"
 #include "lm/search_trie.hh"
 #include "lm/vocab.hh"
@@ -70,9 +71,10 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
   private:
     typedef base::ModelFacade<GenericModel<Search, VocabularyT>, State, VocabularyT> P;
   public:
-    // Get the size of memory that will be mapped given ngram counts.  This
-    // does not include small non-mapped control structures, such as this class
-    // itself.  
+    /* Get the size of memory that will be mapped given ngram counts.  This
+     * does not include small non-mapped control structures, such as this class
+     * itself.  
+     */
     static size_t Size(const std::vector<uint64_t> &counts, const Config &config = Config());
 
     /* Load the model from a file.  It may be an ARPA or binary file.  Binary
@@ -111,6 +113,11 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
   private:
     friend void LoadLM<>(const char *file, const Config &config, GenericModel<Search, VocabularyT> &to);
 
+    static void UpdateConfigFromBinary(int fd, const std::vector<uint64_t> &counts, Config &config) {
+      AdvanceOrThrow(fd, VocabularyT::Size(counts[0], config));
+      Search::UpdateConfigFromBinary(fd, counts, config);
+    }
+
     float SlowBackoffLookup(const WordIndex *const context_rbegin, const WordIndex *const context_rend, unsigned char start) const;
 
     FullScoreReturn ScoreExceptBackoff(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, State &out_state) const;
@@ -130,9 +137,7 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
     
     VocabularyT vocab_;
 
-    typedef typename Search::Unigram Unigram;
     typedef typename Search::Middle Middle;
-    typedef typename Search::Longest Longest;
 
     Search search_;
 };
@@ -141,13 +146,15 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
 
 // These must also be instantiated in the cc file.  
 typedef ::lm::ngram::ProbingVocabulary Vocabulary;
-typedef detail::GenericModel<detail::ProbingHashedSearch, Vocabulary> ProbingModel;
+typedef detail::GenericModel<detail::ProbingHashedSearch, Vocabulary> ProbingModel; // HASH_PROBING
 // Default implementation.  No real reason for it to be the default.  
 typedef ProbingModel Model;
 
 // Smaller implementation.
 typedef ::lm::ngram::SortedVocabulary SortedVocabulary;
-typedef detail::GenericModel<trie::TrieSearch, SortedVocabulary> TrieModel;
+typedef detail::GenericModel<trie::TrieSearch<DontQuantize>, SortedVocabulary> TrieModel; // TRIE_SORTED
+
+typedef detail::GenericModel<trie::TrieSearch<SeparatelyQuantize>, SortedVocabulary> QuantTrieModel; // QUANT_TRIE_SORTED
 
 } // namespace ngram
 } // namespace lm
author	Chris Dyer <cdyer@cs.cmu.edu>	2011-07-05 23:19:54 -0400
committer	Chris Dyer <cdyer@cs.cmu.edu>	2011-07-05 23:19:54 -0400
commit	c3e46171f722f6276e2613ea6cb087b07325d794 (patch)
tree	4a289539c4e7a972009dc2f1b680004b959547df /klm/lm/model.hh
parent	f91319978f6e74e5c4e5701da8fbbacb96a3161e (diff)
parent	59932be2de387ecfcaa81a8387e8f21d5123c050 (diff)