diff options
Diffstat (limited to 'klm/lm/vocab.cc')
-rw-r--r-- | klm/lm/vocab.cc | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/klm/lm/vocab.cc b/klm/lm/vocab.cc index ae79c727..415f8331 100644 --- a/klm/lm/vocab.cc +++ b/klm/lm/vocab.cc @@ -187,5 +187,29 @@ void ProbingVocabulary::LoadedBinary(int fd, EnumerateVocab *to) { SetSpecial(Index("<s>"), Index("</s>"), 0); } +void MissingUnknown(const Config &config) throw(SpecialWordMissingException) { + switch(config.unknown_missing) { + case Config::SILENT: + return; + case Config::COMPLAIN: + if (config.messages) *config.messages << "The ARPA file is missing <unk>. Substituting probability " << config.unknown_missing_prob << "." << std::endl; + break; + case Config::THROW_UP: + UTIL_THROW(SpecialWordMissingException, "The ARPA file is missing <unk> and the model is configured to throw an exception."); + } +} + +void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException) { + switch (config.sentence_marker_missing) { + case Config::SILENT: + return; + case Config::COMPLAIN: + if (config.messages) *config.messages << "Missing special word " << str << "; will treat it as <unk>."; + break; + case Config::THROW_UP: + UTIL_THROW(SpecialWordMissingException, "The ARPA file is missing " << str << " and the model is configured to reject these models. Run build_binary -s to disable this check."); + } +} + } // namespace ngram } // namespace lm |