From d3e2ec203a5cf550320caa8023ac3dd103b0be7d Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 13 Oct 2014 00:42:37 -0400 Subject: new kenlm --- klm/lm/wrappers/nplm.hh | 83 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 klm/lm/wrappers/nplm.hh (limited to 'klm/lm/wrappers/nplm.hh') diff --git a/klm/lm/wrappers/nplm.hh b/klm/lm/wrappers/nplm.hh new file mode 100644 index 00000000..b7dd4a21 --- /dev/null +++ b/klm/lm/wrappers/nplm.hh @@ -0,0 +1,83 @@ +#ifndef LM_WRAPPERS_NPLM_H +#define LM_WRAPPERS_NPLM_H + +#include "lm/facade.hh" +#include "lm/max_order.hh" +#include "util/string_piece.hh" + +#include +#include + +/* Wrapper to NPLM "by Ashish Vaswani, with contributions from David Chiang + * and Victoria Fossum." + * http://nlg.isi.edu/software/nplm/ + */ + +namespace nplm { +class vocabulary; +class neuralLM; +} // namespace nplm + +namespace lm { +namespace np { + +class Vocabulary : public base::Vocabulary { + public: + Vocabulary(const nplm::vocabulary &vocab); + + ~Vocabulary(); + + WordIndex Index(const std::string &str) const; + + // TODO: lobby them to support StringPiece + WordIndex Index(const StringPiece &str) const { + return Index(std::string(str.data(), str.size())); + } + + lm::WordIndex NullWord() const { return null_word_; } + + private: + const nplm::vocabulary &vocab_; + + const lm::WordIndex null_word_; +}; + +// Sorry for imposing my limitations on your code. +#define NPLM_MAX_ORDER 7 + +struct State { + WordIndex words[NPLM_MAX_ORDER - 1]; +}; + +class Model : public lm::base::ModelFacade { + private: + typedef lm::base::ModelFacade P; + + public: + // Does this look like an NPLM? + static bool Recognize(const std::string &file); + + explicit Model(const std::string &file, std::size_t cache_size = 1 << 20); + + ~Model(); + + FullScoreReturn FullScore(const State &from, const WordIndex new_word, State &out_state) const; + + FullScoreReturn FullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, State &out_state) const; + + private: + boost::scoped_ptr base_instance_; + + mutable boost::thread_specific_ptr backend_; + + Vocabulary vocab_; + + lm::WordIndex null_word_; + + const std::size_t cache_size_; +}; + +} // namespace np +} // namespace lm + +#endif // LM_WRAPPERS_NPLM_H -- cgit v1.2.3