summaryrefslogtreecommitdiff
path: root/klm/lm/read_arpa.cc
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2011-10-24 18:17:24 +0100
committerKenneth Heafield <github@kheafield.com>2011-10-24 18:17:24 +0100
commit437ee131bc4c2574768a187bf29d62dbb715abf4 (patch)
tree6324cd6dd5fcbd987a5fd6f4aa51b92c1e9f941f /klm/lm/read_arpa.cc
parent230ef6e8f417bf3f7e573a712c79388721e70188 (diff)
KenLM update. EnumerateVocab moved up a namespace. Fix trie building when bigrams are pruned. Make Chris feel better about MurmurHashNative.
Diffstat (limited to 'klm/lm/read_arpa.cc')
-rw-r--r--klm/lm/read_arpa.cc2
1 files changed, 2 insertions, 0 deletions
diff --git a/klm/lm/read_arpa.cc b/klm/lm/read_arpa.cc
index 455bc4ba..dce73f77 100644
--- a/klm/lm/read_arpa.cc
+++ b/klm/lm/read_arpa.cc
@@ -38,6 +38,8 @@ void ReadARPACounts(util::FilePiece &in, std::vector<uint64_t> &number) {
}
if (static_cast<size_t>(line.size()) >= strlen(kBinaryMagic) && StringPiece(line.data(), strlen(kBinaryMagic)) == kBinaryMagic)
UTIL_THROW(FormatLoadException, "This looks like a binary file but got sent to the ARPA parser. Did you compress the binary file or pass a binary file where only ARPA files are accepted?");
+ UTIL_THROW_IF(line.size() >= 4 && StringPiece(line.data(), 4) == "blmt", FormatLoadException, "This looks like an IRSTLM binary file. Did you forget to pass --text yes to compile-lm?");
+ UTIL_THROW_IF(line == "iARPA", FormatLoadException, "This looks like an IRSTLM iARPA file. You need an ARPA file. Run\n compile-lm --text yes " << in.FileName() << " " << in.FileName() << ".arpa\nfirst.");
UTIL_THROW(FormatLoadException, "first non-empty line was \"" << line << "\" not \\data\\.");
}
while (!IsEntirelyWhiteSpace(line = in.ReadLine())) {