summaryrefslogtreecommitdiff
path: root/klm/lm/vocab.hh
diff options
context:
space:
mode:
Diffstat (limited to 'klm/lm/vocab.hh')
-rw-r--r--klm/lm/vocab.hh10
1 files changed, 10 insertions, 0 deletions
diff --git a/klm/lm/vocab.hh b/klm/lm/vocab.hh
index b584c82f..546c1649 100644
--- a/klm/lm/vocab.hh
+++ b/klm/lm/vocab.hh
@@ -2,6 +2,7 @@
#define LM_VOCAB__
#include "lm/enumerate_vocab.hh"
+#include "lm/lm_exception.hh"
#include "lm/virtual_interface.hh"
#include "util/key_value_packing.hh"
#include "util/probing_hash_table.hh"
@@ -134,6 +135,15 @@ class ProbingVocabulary : public base::Vocabulary {
EnumerateVocab *enumerate_;
};
+void MissingUnknown(const Config &config) throw(SpecialWordMissingException);
+void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);
+
+template <class Vocab> void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {
+ if (!vocab.SawUnk()) MissingUnknown(config);
+ if (vocab.BeginSentence() == vocab.NotFound()) MissingSentenceMarker(config, "<s>");
+ if (vocab.EndSentence() == vocab.NotFound()) MissingSentenceMarker(config, "</s>");
+}
+
} // namespace ngram
} // namespace lm