Merge branch 'master' of git://github.com/redpony/cdec

author: Jonathan Clark <jon.h.clark@gmail.com> 2011-03-10 06:54:59 -0500
committer: Jonathan Clark <jon.h.clark@gmail.com> 2011-03-10 06:54:59 -0500
commit: 7491d7f72bd50cbc08a28f64cc3939a310d65801 (patch)
tree: fe18c5bc6ca1e0bf18f0ca473833162ecd3a908b /decoder/ff_lm.cc
parent: 8c8ff6c6915ebc5ce30156e3f05bf8d1966ec0a1 (diff)
parent: 4f9933d668d247ea5831c3f2af0b996a94da28f7 (diff)
1 files changed, 22 insertions, 89 deletions
diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc
index a9929253..afa36b96 100644
--- a/decoder/ff_lm.cc
+++ b/decoder/ff_lm.cc
@@ -59,8 +59,6 @@ char const* usage_verbose="-n determines the name of the feature (and its weight
 #include "fast_lexical_cast.hpp"
 
 #include "tdict.h"
-#include "Vocab.h"
-#include "Ngram.h"
 #include "hg.h"
 #include "stringlib.h"
 
@@ -80,41 +78,9 @@ string LanguageModel::usage(bool param,bool verbose) {
 }
 
 
-// NgramShare will keep track of all loaded lms and reuse them.
-//TODO: ref counting by shared_ptr?  for now, first one to load LM needs to stick around as long as all subsequent users.
-
 #include <boost/shared_ptr.hpp>
 using namespace boost;
 
-//WARNING: first person to add a pointer to ngram must keep it around until others are done using it.
-struct NgramShare
-{
-//  typedef shared_ptr<Ngram> NP;
-  typedef Ngram *NP;
-  map<string,NP> ns;
-  bool have(string const& file) const
-  {
-    return ns.find(file)!=ns.end();
-  }
-  NP get(string const& file) const
-  {
-    assert(have(file));
-    return ns.find(file)->second;
-  }
-  void set(string const& file,NP n)
-  {
-    ns[file]=n;
-  }
-  void add(string const& file,NP n)
-  {
-    assert(!have(file));
-    set(file,n);
-  }
-};
-
-//TODO: namespace or static?
-NgramShare ngs;
-
 namespace NgramCache {
   struct Cache {
     map<WordID, Cache> tree;
@@ -215,37 +181,28 @@ class LanguageModelImpl : public LanguageModelInterface {
     state_size_ = OrderToStateSize(order)-1;
     unigram=(order<=1);
     floor_ = -100;
-    kSTART = TD::ss;
-    kSTOP = TD::se;
-    kUNKNOWN = TD::unk;
-    kNONE = TD::none;
+    kSTART = TD::Convert("<s>");
+    kSTOP = TD::Convert("</s>");
+    kUNKNOWN = TD::Convert("<unk>");
+    kNONE = 0;
     kSTAR = TD::Convert("<{STAR}>");
   }
 
  public:
-  explicit LanguageModelImpl(int order) : ngram_(TD::dict_, order)
+  explicit LanguageModelImpl(int order)
   {
     init(order);
   }
 
 
-//TODO: show that unigram special case (0 state) computes what it should.
-  LanguageModelImpl(int order, const string& f, int load_order=0) :
-    ngram_(TD::dict_, load_order ? load_order : order)
-  {
-    init(order);
-    File file(f.c_str(), "r", 0);
-    assert(file);
-    cerr << "Reading " << order_ << "-gram LM from " << f << endl;
-    ngram_.read(file, false);
-  }
-
   virtual ~LanguageModelImpl() {
   }
 
-  Ngram *get_lm() // for make_lm_impl ngs sharing only.
+  //Ngram *get_lm() // for make_lm_impl ngs sharing only.
+  void *get_lm() // for make_lm_impl ngs sharing only.
   {
-    return &ngram_;
+    //return &ngram_;
+    return 0;
   }
 
 
@@ -258,17 +215,19 @@ class LanguageModelImpl : public LanguageModelInterface {
   }
 
   virtual double WordProb(WordID word, WordID const* context) {
-    return ngram_.wordProb(word, (VocabIndex*)context);
+    return -100;
+    //return ngram_.wordProb(word, (VocabIndex*)context);
   }
 
   // may be shorter than actual null-terminated length.  context must be null terminated.  len is just to save effort for subclasses that don't support contextID
   virtual int ContextSize(WordID const* context,int len) {
     unsigned ret;
-    ngram_.contextID((VocabIndex*)context,ret);
+    //ngram_.contextID((VocabIndex*)context,ret);
     return ret;
   }
   virtual double ContextBOW(WordID const* context,int shortened_len) {
-    return ngram_.contextBOW((VocabIndex*)context,shortened_len);
+    //return ngram_.contextBOW((VocabIndex*)context,shortened_len);
+    return -100;
   }
 
   inline double LookupProbForBufferContents(int i) {
@@ -457,7 +416,6 @@ public:
   }
 
  protected:
-  Ngram ngram_;
   vector<WordID> buffer_;
   int order_;
   int state_size_;
@@ -470,8 +428,7 @@ public:
   bool unigram;
 };
 
-struct ClientLMI : public LanguageModelImpl
-{
+struct ClientLMI : public LanguageModelImpl {
   ClientLMI(int order,string const& server) : LanguageModelImpl(order), client_(server)
   {}
 
@@ -489,37 +446,13 @@ protected:
   LMClient client_;
 };
 
-struct ReuseLMI : public LanguageModelImpl
-{
-  ReuseLMI(int order, Ngram *ng) : LanguageModelImpl(order), ng(ng)
-  {}
-  double WordProb(int word, WordID const* context) {
-    return ng->wordProb(word, (VocabIndex*)context);
-  }
-  virtual int ContextSize(WordID const* context, int len) {
-    unsigned ret;
-    ng->contextID((VocabIndex*)context,ret);
-    return ret;
-  }
-  virtual double ContextBOW(WordID const* context,int shortened_len) {
-    return ng->contextBOW((VocabIndex*)context,shortened_len);
-  }
-protected:
-  Ngram *ng;
-};
-
 LanguageModelImpl *make_lm_impl(int order, string const& f, int load_order)
 {
   if (f.find("lm://") == 0) {
     return new ClientLMI(order,f.substr(5));
-  } else if (load_order==0 && ngs.have(f)) {
-    cerr<<"Reusing already loaded Ngram LM: "<<f<<endl;
-    return new ReuseLMI(order,ngs.get(f));
   } else {
-    LanguageModelImpl *r=new LanguageModelImpl(order,f,load_order);
-    if (!load_order || !ngs.have(f))
-      ngs.add(f,r->get_lm());
-    return r;
+    cerr << "LanguageModel no longer supports non-remote LMs. Please use KLanguageModel!\nPlease see http://cdec-decoder.org/index.php?title=Language_model_notes\n";
+    abort();
   }
 }
 
@@ -600,12 +533,12 @@ void LanguageModelFsa::set_ngram_order(int i) {
   WordID *ss=(WordID*)start.begin();
   WordID *hs=(WordID*)h_start.begin();
   if (ctxlen_) { // avoid segfault in case of unigram lm (0 state)
-    set_end_phrase(TD::se);
+    set_end_phrase(TD::Convert("</s>"));
 // se is pretty boring in unigram case, just adds constant prob.  check that this is what we want
-    ss[0]=TD::ss; // start-sentence context (length 1)
-    hs[0]=TD::none; // empty context
+    ss[0]=TD::Convert("<s>"); // start-sentence context (length 1)
+    hs[0]=0; // empty context
     for (int i=1;i<ctxlen_;++i) {
-      ss[i]=hs[i]=TD::none; // need this so storage is initialized for hashing.
+      ss[i]=hs[i]=0; // need this so storage is initialized for hashing.
       //TODO: reevaluate whether state space comes cleared by allocator or not.
     }
   }
@@ -627,7 +560,7 @@ void LanguageModelFsa::print_state(ostream &o,void const* st) const {
   for (int i=ctxlen_;i>0;sp=true) {
     --i;
     WordID w=wst[i];
-    if (w==TD::none) continue;
+    if (w==0) continue;
     if (sp) o<<' ';
     o << TD::Convert(w);
   }
author	Jonathan Clark <jon.h.clark@gmail.com>	2011-03-10 06:54:59 -0500
committer	Jonathan Clark <jon.h.clark@gmail.com>	2011-03-10 06:54:59 -0500
commit	7491d7f72bd50cbc08a28f64cc3939a310d65801 (patch)
tree	fe18c5bc6ca1e0bf18f0ca473833162ecd3a908b /decoder/ff_lm.cc
parent	8c8ff6c6915ebc5ce30156e3f05bf8d1966ec0a1 (diff)
parent	4f9933d668d247ea5831c3f2af0b996a94da28f7 (diff)