summaryrefslogtreecommitdiff
path: root/klm/lm/read_arpa.cc
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2011-10-24 18:17:24 +0100
committerKenneth Heafield <github@kheafield.com>2011-10-24 18:17:24 +0100
commit3106cf8eca76df8b46d139b8f5ce5002200d660d (patch)
tree637930011a082de54bf21a97078cb67483ea248c /klm/lm/read_arpa.cc
parentb2171f53c6c597ac4326f63250269aa13df84718 (diff)
KenLM update. EnumerateVocab moved up a namespace. Fix trie building when bigrams are pruned. Make Chris feel better about MurmurHashNative.
Diffstat (limited to 'klm/lm/read_arpa.cc')
-rw-r--r--klm/lm/read_arpa.cc2
1 files changed, 2 insertions, 0 deletions
diff --git a/klm/lm/read_arpa.cc b/klm/lm/read_arpa.cc
index 455bc4ba..dce73f77 100644
--- a/klm/lm/read_arpa.cc
+++ b/klm/lm/read_arpa.cc
@@ -38,6 +38,8 @@ void ReadARPACounts(util::FilePiece &in, std::vector<uint64_t> &number) {
}
if (static_cast<size_t>(line.size()) >= strlen(kBinaryMagic) && StringPiece(line.data(), strlen(kBinaryMagic)) == kBinaryMagic)
UTIL_THROW(FormatLoadException, "This looks like a binary file but got sent to the ARPA parser. Did you compress the binary file or pass a binary file where only ARPA files are accepted?");
+ UTIL_THROW_IF(line.size() >= 4 && StringPiece(line.data(), 4) == "blmt", FormatLoadException, "This looks like an IRSTLM binary file. Did you forget to pass --text yes to compile-lm?");
+ UTIL_THROW_IF(line == "iARPA", FormatLoadException, "This looks like an IRSTLM iARPA file. You need an ARPA file. Run\n compile-lm --text yes " << in.FileName() << " " << in.FileName() << ".arpa\nfirst.");
UTIL_THROW(FormatLoadException, "first non-empty line was \"" << line << "\" not \\data\\.");
}
while (!IsEntirelyWhiteSpace(line = in.ReadLine())) {