summaryrefslogtreecommitdiff
path: root/decoder
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2011-08-18 12:14:01 +0100
committerKenneth Heafield <github@kheafield.com>2011-08-18 12:14:01 +0100
commit7607b0a7873f52d6e3ea387bf88c773cbb55f8ee (patch)
tree908fd94fea8d09725bc86ec9b3752b89c78338e5 /decoder
parentd92124ccc866192e4cdc689f2b41f0324d35dd3b (diff)
KenLM update: Bhiksha's trick, simple test for lms without unk, auto-detect binary files instead of requiring them to be specified at runtime.
Diffstat (limited to 'decoder')
-rw-r--r--decoder/cdec_ff.cc5
-rw-r--r--decoder/ff_klm.cc38
-rw-r--r--decoder/ff_klm.h7
3 files changed, 41 insertions, 9 deletions
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index 3451c9fb..1ef76a05 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -55,10 +55,7 @@ void register_feature_functions() {
ff_registry.Register("NgramFeatures", new FFFactory<NgramDetector>());
ff_registry.Register("RuleNgramFeatures", new FFFactory<RuleNgramFeatures>());
ff_registry.Register("CMR2008ReorderingFeatures", new FFFactory<CMR2008ReorderingFeatures>());
- ff_registry.Register("KLanguageModel", new FFFactory<KLanguageModel<lm::ngram::ProbingModel> >());
- ff_registry.Register("KLanguageModel_Trie", new FFFactory<KLanguageModel<lm::ngram::TrieModel> >());
- ff_registry.Register("KLanguageModel_QuantTrie", new FFFactory<KLanguageModel<lm::ngram::QuantTrieModel> >());
- ff_registry.Register("KLanguageModel_Probing", new FFFactory<KLanguageModel<lm::ngram::ProbingModel> >());
+ ff_registry.Register("KLanguageModel", new KLanguageModelFactory());
ff_registry.Register("NonLatinCount", new FFFactory<NonLatinCount>);
ff_registry.Register("RuleShape", new FFFactory<RuleShapeFeatures>);
ff_registry.Register("RelativeSentencePosition", new FFFactory<RelativeSentencePosition>);
diff --git a/decoder/ff_klm.cc b/decoder/ff_klm.cc
index 9b7fe2d3..24dcb9c3 100644
--- a/decoder/ff_klm.cc
+++ b/decoder/ff_klm.cc
@@ -9,6 +9,7 @@
#include "stringlib.h"
#include "hg.h"
#include "tdict.h"
+#include "lm/model.hh"
#include "lm/enumerate_vocab.hh"
using namespace std;
@@ -434,8 +435,37 @@ void KLanguageModel<Model>::FinalTraversalFeatures(const void* ant_state,
features->set_value(oov_fid_, oovs);
}
-// instantiate templates
-template class KLanguageModel<lm::ngram::ProbingModel>;
-template class KLanguageModel<lm::ngram::TrieModel>;
-template class KLanguageModel<lm::ngram::QuantTrieModel>;
+template <class Model> boost::shared_ptr<FeatureFunction> CreateModel(const std::string &param) {
+ KLanguageModel<Model> *ret = new KLanguageModel<Model>(param);
+ ret->Init();
+ return boost::shared_ptr<FeatureFunction>(ret);
+}
+boost::shared_ptr<FeatureFunction> KLanguageModelFactory::Create(std::string param) const {
+ using namespace lm::ngram;
+ std::string filename, ignored_map;
+ bool ignored_markers;
+ std::string ignored_featname;
+ ParseLMArgs(param, &filename, &ignored_map, &ignored_markers, &ignored_featname);
+ ModelType m;
+ if (!RecognizeBinary(filename.c_str(), m)) m = HASH_PROBING;
+
+ switch (m) {
+ case HASH_PROBING:
+ return CreateModel<ProbingModel>(param);
+ case TRIE_SORTED:
+ return CreateModel<TrieModel>(param);
+ case ARRAY_TRIE_SORTED:
+ return CreateModel<ArrayTrieModel>(param);
+ case QUANT_TRIE_SORTED:
+ return CreateModel<QuantTrieModel>(param);
+ case QUANT_ARRAY_TRIE_SORTED:
+ return CreateModel<QuantArrayTrieModel>(param);
+ default:
+ UTIL_THROW(util::Exception, "Unrecognized kenlm binary file type " << (unsigned)m);
+ }
+}
+
+std::string KLanguageModelFactory::usage(bool params,bool verbose) const {
+ return KLanguageModel<lm::ngram::Model>::usage(params, verbose);
+}
diff --git a/decoder/ff_klm.h b/decoder/ff_klm.h
index 5eafe8be..6efe50f6 100644
--- a/decoder/ff_klm.h
+++ b/decoder/ff_klm.h
@@ -4,8 +4,8 @@
#include <vector>
#include <string>
+#include "ff_factory.h"
#include "ff.h"
-#include "lm/model.hh"
template <class Model> struct KLanguageModelImpl;
@@ -34,4 +34,9 @@ class KLanguageModel : public FeatureFunction {
KLanguageModelImpl<Model>* pimpl_;
};
+struct KLanguageModelFactory : public FactoryBase<FeatureFunction> {
+ FP Create(std::string param) const;
+ std::string usage(bool params,bool verbose) const;
+};
+
#endif