diff options
author | Kenneth Heafield <github@kheafield.com> | 2011-08-18 12:14:01 +0100 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2011-08-18 12:14:01 +0100 |
commit | 7607b0a7873f52d6e3ea387bf88c773cbb55f8ee (patch) | |
tree | 908fd94fea8d09725bc86ec9b3752b89c78338e5 /decoder | |
parent | d92124ccc866192e4cdc689f2b41f0324d35dd3b (diff) |
KenLM update: Bhiksha's trick, simple test for lms without unk, auto-detect binary files instead of requiring them to be specified at runtime.
Diffstat (limited to 'decoder')
-rw-r--r-- | decoder/cdec_ff.cc | 5 | ||||
-rw-r--r-- | decoder/ff_klm.cc | 38 | ||||
-rw-r--r-- | decoder/ff_klm.h | 7 |
3 files changed, 41 insertions, 9 deletions
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index 3451c9fb..1ef76a05 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -55,10 +55,7 @@ void register_feature_functions() { ff_registry.Register("NgramFeatures", new FFFactory<NgramDetector>()); ff_registry.Register("RuleNgramFeatures", new FFFactory<RuleNgramFeatures>()); ff_registry.Register("CMR2008ReorderingFeatures", new FFFactory<CMR2008ReorderingFeatures>()); - ff_registry.Register("KLanguageModel", new FFFactory<KLanguageModel<lm::ngram::ProbingModel> >()); - ff_registry.Register("KLanguageModel_Trie", new FFFactory<KLanguageModel<lm::ngram::TrieModel> >()); - ff_registry.Register("KLanguageModel_QuantTrie", new FFFactory<KLanguageModel<lm::ngram::QuantTrieModel> >()); - ff_registry.Register("KLanguageModel_Probing", new FFFactory<KLanguageModel<lm::ngram::ProbingModel> >()); + ff_registry.Register("KLanguageModel", new KLanguageModelFactory()); ff_registry.Register("NonLatinCount", new FFFactory<NonLatinCount>); ff_registry.Register("RuleShape", new FFFactory<RuleShapeFeatures>); ff_registry.Register("RelativeSentencePosition", new FFFactory<RelativeSentencePosition>); diff --git a/decoder/ff_klm.cc b/decoder/ff_klm.cc index 9b7fe2d3..24dcb9c3 100644 --- a/decoder/ff_klm.cc +++ b/decoder/ff_klm.cc @@ -9,6 +9,7 @@ #include "stringlib.h" #include "hg.h" #include "tdict.h" +#include "lm/model.hh" #include "lm/enumerate_vocab.hh" using namespace std; @@ -434,8 +435,37 @@ void KLanguageModel<Model>::FinalTraversalFeatures(const void* ant_state, features->set_value(oov_fid_, oovs); } -// instantiate templates -template class KLanguageModel<lm::ngram::ProbingModel>; -template class KLanguageModel<lm::ngram::TrieModel>; -template class KLanguageModel<lm::ngram::QuantTrieModel>; +template <class Model> boost::shared_ptr<FeatureFunction> CreateModel(const std::string ¶m) { + KLanguageModel<Model> *ret = new KLanguageModel<Model>(param); + ret->Init(); + return boost::shared_ptr<FeatureFunction>(ret); +} +boost::shared_ptr<FeatureFunction> KLanguageModelFactory::Create(std::string param) const { + using namespace lm::ngram; + std::string filename, ignored_map; + bool ignored_markers; + std::string ignored_featname; + ParseLMArgs(param, &filename, &ignored_map, &ignored_markers, &ignored_featname); + ModelType m; + if (!RecognizeBinary(filename.c_str(), m)) m = HASH_PROBING; + + switch (m) { + case HASH_PROBING: + return CreateModel<ProbingModel>(param); + case TRIE_SORTED: + return CreateModel<TrieModel>(param); + case ARRAY_TRIE_SORTED: + return CreateModel<ArrayTrieModel>(param); + case QUANT_TRIE_SORTED: + return CreateModel<QuantTrieModel>(param); + case QUANT_ARRAY_TRIE_SORTED: + return CreateModel<QuantArrayTrieModel>(param); + default: + UTIL_THROW(util::Exception, "Unrecognized kenlm binary file type " << (unsigned)m); + } +} + +std::string KLanguageModelFactory::usage(bool params,bool verbose) const { + return KLanguageModel<lm::ngram::Model>::usage(params, verbose); +} diff --git a/decoder/ff_klm.h b/decoder/ff_klm.h index 5eafe8be..6efe50f6 100644 --- a/decoder/ff_klm.h +++ b/decoder/ff_klm.h @@ -4,8 +4,8 @@ #include <vector> #include <string> +#include "ff_factory.h" #include "ff.h" -#include "lm/model.hh" template <class Model> struct KLanguageModelImpl; @@ -34,4 +34,9 @@ class KLanguageModel : public FeatureFunction { KLanguageModelImpl<Model>* pimpl_; }; +struct KLanguageModelFactory : public FactoryBase<FeatureFunction> { + FP Create(std::string param) const; + std::string usage(bool params,bool verbose) const; +}; + #endif |