diff options
author | Chris Dyer <cdyer@cab.ark.cs.cmu.edu> | 2012-10-02 00:19:43 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@cab.ark.cs.cmu.edu> | 2012-10-02 00:19:43 -0400 |
commit | e26434979adc33bd949566ba7bf02dff64e80a3e (patch) | |
tree | d1c72495e3af6301bd28e7e66c42de0c7a944d1f /gi/pf/unigrams.h | |
parent | 0870d4a1f5e14cc7daf553b180d599f09f6614a2 (diff) |
cdec cleanup, remove bayesian stuff, parsing stuff
Diffstat (limited to 'gi/pf/unigrams.h')
-rw-r--r-- | gi/pf/unigrams.h | 69 |
1 files changed, 0 insertions, 69 deletions
diff --git a/gi/pf/unigrams.h b/gi/pf/unigrams.h deleted file mode 100644 index 1660d1ed..00000000 --- a/gi/pf/unigrams.h +++ /dev/null @@ -1,69 +0,0 @@ -#ifndef _UNIGRAMS_H_ -#define _UNIGRAMS_H_ - -#include <vector> -#include <string> -#include <tr1/unordered_map> -#include <boost/functional.hpp> - -#include "wordid.h" -#include "prob.h" -#include "tdict.h" - -struct UnigramModel { - explicit UnigramModel(const std::string& fname, unsigned vocab_size) : - use_uniform_(fname.size() == 0), - uniform_(1.0 / vocab_size), - probs_() { - if (fname.size() > 0) { - probs_.resize(TD::NumWords() + 1); - LoadUnigrams(fname); - } - } - - const prob_t& operator()(const WordID& w) const { - assert(w); - if (use_uniform_) return uniform_; - return probs_[w]; - } - - private: - void LoadUnigrams(const std::string& fname); - - const bool use_uniform_; - const prob_t uniform_; - std::vector<prob_t> probs_; -}; - - -// reads an ARPA unigram file and converts words like 'cat' into a string 'c a t' -struct UnigramWordModel { - explicit UnigramWordModel(const std::string& fname) : - use_uniform_(false), - uniform_(1.0), - probs_() { - LoadUnigrams(fname); - } - - explicit UnigramWordModel(const unsigned vocab_size) : - use_uniform_(true), - uniform_(1.0 / vocab_size), - probs_() {} - - const prob_t& operator()(const std::vector<WordID>& s) const { - if (use_uniform_) return uniform_; - const VectorProbHash::const_iterator it = probs_.find(s); - assert(it != probs_.end()); - return it->second; - } - - private: - void LoadUnigrams(const std::string& fname); - - const bool use_uniform_; - const prob_t uniform_; - typedef std::tr1::unordered_map<std::vector<WordID>, prob_t, boost::hash<std::vector<WordID> > > VectorProbHash; - VectorProbHash probs_; -}; - -#endif |