summaryrefslogtreecommitdiff
path: root/klm/lm/vocab.cc
diff options
context:
space:
mode:
Diffstat (limited to 'klm/lm/vocab.cc')
-rw-r--r--klm/lm/vocab.cc24
1 files changed, 24 insertions, 0 deletions
diff --git a/klm/lm/vocab.cc b/klm/lm/vocab.cc
index ae79c727..415f8331 100644
--- a/klm/lm/vocab.cc
+++ b/klm/lm/vocab.cc
@@ -187,5 +187,29 @@ void ProbingVocabulary::LoadedBinary(int fd, EnumerateVocab *to) {
SetSpecial(Index("<s>"), Index("</s>"), 0);
}
+void MissingUnknown(const Config &config) throw(SpecialWordMissingException) {
+ switch(config.unknown_missing) {
+ case Config::SILENT:
+ return;
+ case Config::COMPLAIN:
+ if (config.messages) *config.messages << "The ARPA file is missing <unk>. Substituting probability " << config.unknown_missing_prob << "." << std::endl;
+ break;
+ case Config::THROW_UP:
+ UTIL_THROW(SpecialWordMissingException, "The ARPA file is missing <unk> and the model is configured to throw an exception.");
+ }
+}
+
+void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException) {
+ switch (config.sentence_marker_missing) {
+ case Config::SILENT:
+ return;
+ case Config::COMPLAIN:
+ if (config.messages) *config.messages << "Missing special word " << str << "; will treat it as <unk>.";
+ break;
+ case Config::THROW_UP:
+ UTIL_THROW(SpecialWordMissingException, "The ARPA file is missing " << str << " and the model is configured to reject these models. Run build_binary -s to disable this check.");
+ }
+}
+
} // namespace ngram
} // namespace lm