#ifndef LM_VIRTUAL_INTERFACE__ #define LM_VIRTUAL_INTERFACE__ #include "lm/return.hh" #include "lm/word_index.hh" #include "util/string_piece.hh" #include <string> #include <string.h> namespace lm { namespace base { template <class T, class U, class V> class ModelFacade; /* Vocabulary interface. Call Index(string) and get a word index for use in * calling Model. It provides faster convenience functions for <s>, </s>, and * <unk> although you can also find these using Index. * * Some models do not load the mapping from index to string. If you need this, * check if the model Vocabulary class implements such a function and access it * directly. * * The Vocabulary object is always owned by the Model and can be retrieved from * the Model using BaseVocabulary() for this abstract interface or * GetVocabulary() for the actual implementation (in which case you'll need the * actual implementation of the Model too). */ class Vocabulary { public: virtual ~Vocabulary(); WordIndex BeginSentence() const { return begin_sentence_; } WordIndex EndSentence() const { return end_sentence_; } WordIndex NotFound() const { return not_found_; } /* Most implementations allow StringPiece lookups and need only override * Index(StringPiece). SRI requires null termination and overrides all * three methods. */ virtual WordIndex Index(const StringPiece &str) const = 0; virtual WordIndex Index(const std::string &str) const { return Index(StringPiece(str)); } virtual WordIndex Index(const char *str) const { return Index(StringPiece(str)); } protected: // Call SetSpecial afterward. Vocabulary() {} Vocabulary(WordIndex begin_sentence, WordIndex end_sentence, WordIndex not_found) { SetSpecial(begin_sentence, end_sentence, not_found); } void SetSpecial(WordIndex begin_sentence, WordIndex end_sentence, WordIndex not_found); WordIndex begin_sentence_, end_sentence_, not_found_; private: // Disable copy constructors. They're private and undefined. // Ersatz boost::noncopyable. Vocabulary(const Vocabulary &); Vocabulary &operator=(const Vocabulary &); }; /* There are two ways to access a Model. * * * OPTION 1: Access the Model directly (e.g. lm::ngram::Model in model.hh). * * Every Model implements the scoring function: * float Score( * const Model::State &in_state, * const WordIndex new_word, * Model::State &out_state) const; * * It can also return the length of n-gram matched by the model: * FullScoreReturn FullScore( * const Model::State &in_state, * const WordIndex new_word, * Model::State &out_state) const; * * * There are also accessor functions: * const State &BeginSentenceState() const; * const State &NullContextState() const; * const Vocabulary &GetVocabulary() const; * unsigned int Order() const; * * NB: In case you're wondering why the model implementation looks like it's * missing these methods, see facade.hh. * * This is the fastest way to use a model and presents a normal State class to * be included in a hypothesis state structure. * * * OPTION 2: Use the virtual interface below. * * The virtual interface allow you to decide which Model to use at runtime * without templatizing everything on the Model type. However, each Model has * its own State class, so a single State cannot be efficiently provided (it * would require using the maximum memory of any Model's State or memory * allocation with each lookup). This means you become responsible for * allocating memory with size StateSize() and passing it to the Score or * FullScore functions provided here. * * For example, cdec has a std::string containing the entire state of a * hypothesis. It can reserve StateSize bytes in this string for the model * state. * * All the State objects are POD, so it's ok to use raw memory for storing * State. * in_state and out_state must not have the same address. */ class Model { public: virtual ~Model(); size_t StateSize() const { return state_size_; } const void *BeginSentenceMemory() const { return begin_sentence_memory_; } void BeginSentenceWrite(void *to) const { memcpy(to, begin_sentence_memory_, StateSize()); } const void *NullContextMemory() const { return null_context_memory_; } void NullContextWrite(void *to) const { memcpy(to, null_context_memory_, StateSize()); } // Requires in_state != out_state virtual float BaseScore(const void *in_state, const WordIndex new_word, void *out_state) const = 0; // Requires in_state != out_state virtual FullScoreReturn BaseFullScore(const void *in_state, const WordIndex new_word, void *out_state) const = 0; // Prefer to use FullScore. The context words should be provided in reverse order. virtual FullScoreReturn BaseFullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, void *out_state) const = 0; unsigned char Order() const { return order_; } const Vocabulary &BaseVocabulary() const { return *base_vocab_; } private: template <class T, class U, class V> friend class ModelFacade; explicit Model(size_t state_size) : state_size_(state_size) {} const size_t state_size_; const void *begin_sentence_memory_, *null_context_memory_; const Vocabulary *base_vocab_; unsigned char order_; // Disable copy constructors. They're private and undefined. // Ersatz boost::noncopyable. Model(const Model &); Model &operator=(const Model &); }; } // mamespace base } // namespace lm #endif // LM_VIRTUAL_INTERFACE__