kenlm update including being nicer to NFS

author: Kenneth Heafield <kheafiel@cluster12.lti.ece.cmu.local> 2011-05-20 16:19:04 -0400
committer: Kenneth Heafield <kheafiel@cluster12.lti.ece.cmu.local> 2011-05-20 16:19:04 -0400
commit: 461c2670efb0968ccc6789ff0c9ca6f88ab31e80 (patch)
tree: 6b344dcf320674213fd449e6a4915236ad78c29f /klm/lm/model.hh
parent: 0e7b303879baf95a8167194ad7c75ef738e79f15 (diff)
1 files changed, 23 insertions, 10 deletions
diff --git a/klm/lm/model.hh b/klm/lm/model.hh
index fd9640c3..b85ccdcc 100644
--- a/klm/lm/model.hh
+++ b/klm/lm/model.hh
@@ -65,7 +65,7 @@ size_t hash_value(const State &state);
 namespace detail {
 
 // Should return the same results as SRI.  
-// Why VocabularyT instead of just Vocabulary?  ModelFacade defines Vocabulary.  
+// ModelFacade typedefs Vocabulary so we use VocabularyT to avoid naming conflicts.
 template <class Search, class VocabularyT> class GenericModel : public base::ModelFacade<GenericModel<Search, VocabularyT>, State, VocabularyT> {
   private:
     typedef base::ModelFacade<GenericModel<Search, VocabularyT>, State, VocabularyT> P;
@@ -75,23 +75,37 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
     // itself.  
     static size_t Size(const std::vector<uint64_t> &counts, const Config &config = Config());
 
+    /* Load the model from a file.  It may be an ARPA or binary file.  Binary
+     * files must have the format expected by this class or you'll get an
+     * exception.  So TrieModel can only load ARPA or binary created by
+     * TrieModel.  To classify binary files, call RecognizeBinary in
+     * lm/binary_format.hh.  
+     */
     GenericModel(const char *file, const Config &config = Config());
 
+    /* Score p(new_word | in_state) and incorporate new_word into out_state.
+     * Note that in_state and out_state must be different references:
+     * &in_state != &out_state.  
+     */
     FullScoreReturn FullScore(const State &in_state, const WordIndex new_word, State &out_state) const;
 
-    /* Slower call without in_state.  Don't use this if you can avoid it.  This
-     * is mostly a hack for Hieu to integrate it into Moses which sometimes
-     * forgets LM state (i.e. it doesn't store it with the phrase).  Sigh.   
-     * The context indices should be in an array.  
-     * If context_rbegin != context_rend then *context_rbegin is the word
-     * before new_word.  
+    /* Slower call without in_state.  Try to remember state, but sometimes it
+     * would cost too much memory or your decoder isn't setup properly.  
+     * To use this function, make an array of WordIndex containing the context
+     * vocabulary ids in reverse order.  Then, pass the bounds of the array:
+     * [context_rbegin, context_rend).  The new_word is not part of the context
+     * array unless you intend to repeat words.  
      */
     FullScoreReturn FullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, State &out_state) const;
 
     /* Get the state for a context.  Don't use this if you can avoid it.  Use
      * BeginSentenceState or EmptyContextState and extend from those.  If
      * you're only going to use this state to call FullScore once, use
-     * FullScoreForgotState. */
+     * FullScoreForgotState. 
+     * To use this function, make an array of WordIndex containing the context
+     * vocabulary ids in reverse order.  Then, pass the bounds of the array:
+     * [context_rbegin, context_rend).  
+     */
     void GetState(const WordIndex *context_rbegin, const WordIndex *context_rend, State &out_state) const;
 
   private:
@@ -131,9 +145,8 @@ typedef detail::GenericModel<detail::ProbingHashedSearch, Vocabulary> ProbingMod
 // Default implementation.  No real reason for it to be the default.  
 typedef ProbingModel Model;
 
+// Smaller implementation.
 typedef ::lm::ngram::SortedVocabulary SortedVocabulary;
-typedef detail::GenericModel<detail::SortedHashedSearch, SortedVocabulary> SortedModel;
-
 typedef detail::GenericModel<trie::TrieSearch, SortedVocabulary> TrieModel;
 
 } // namespace ngram
author	Kenneth Heafield <kheafiel@cluster12.lti.ece.cmu.local>	2011-05-20 16:19:04 -0400
committer	Kenneth Heafield <kheafiel@cluster12.lti.ece.cmu.local>	2011-05-20 16:19:04 -0400
commit	461c2670efb0968ccc6789ff0c9ca6f88ab31e80 (patch)
tree	6b344dcf320674213fd449e6a4915236ad78c29f /klm/lm/model.hh
parent	0e7b303879baf95a8167194ad7c75ef738e79f15 (diff)