From e2d7d88579e291ab2120b813270e51f6274025f6 Mon Sep 17 00:00:00 2001 From: redpony Date: Wed, 10 Nov 2010 22:44:30 +0000 Subject: forgotten files git-svn-id: https://ws10smt.googlecode.com/svn/trunk@708 ec762483-ff6d-05da-a07a-a48fb63a330f --- klm/lm/read_arpa.hh | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 klm/lm/read_arpa.hh (limited to 'klm/lm/read_arpa.hh') diff --git a/klm/lm/read_arpa.hh b/klm/lm/read_arpa.hh new file mode 100644 index 00000000..cabdb195 --- /dev/null +++ b/klm/lm/read_arpa.hh @@ -0,0 +1,64 @@ +#ifndef LM_READ_ARPA__ +#define LM_READ_ARPA__ + +#include "lm/lm_exception.hh" +#include "lm/word_index.hh" +#include "lm/weights.hh" +#include "util/file_piece.hh" + +#include +#include +#include + +namespace lm { + +void ReadARPACounts(util::FilePiece &in, std::vector &number); +void ReadARPACounts(std::istream &in, std::vector &number); +void ReadNGramHeader(util::FilePiece &in, unsigned int length); +void ReadNGramHeader(std::istream &in, unsigned int length); + +void ReadBackoff(util::FilePiece &in, Prob &weights); +void ReadBackoff(util::FilePiece &in, ProbBackoff &weights); + +void ReadEnd(util::FilePiece &in); +void ReadEnd(std::istream &in); + +template void Read1Gram(util::FilePiece &f, Voc &vocab, ProbBackoff *unigrams) { + try { + float prob = f.ReadFloat(); + if (prob > 0) UTIL_THROW(FormatLoadException, "Positive probability " << prob); + if (f.get() != '\t') UTIL_THROW(FormatLoadException, "Expected tab after probability"); + ProbBackoff &value = unigrams[vocab.Insert(f.ReadDelimited())]; + value.prob = prob; + ReadBackoff(f, value); + } catch(util::Exception &e) { + e << " in the 1-gram at byte " << f.Offset(); + throw; + } +} + +template void Read1Grams(util::FilePiece &f, std::size_t count, Voc &vocab, ProbBackoff *unigrams) { + ReadNGramHeader(f, 1); + for (std::size_t i = 0; i < count; ++i) { + Read1Gram(f, vocab, unigrams); + } + vocab.FinishedLoading(unigrams); +} + +template void ReadNGram(util::FilePiece &f, const unsigned char n, const Voc &vocab, WordIndex *const reverse_indices, Weights &weights) { + try { + weights.prob = f.ReadFloat(); + if (weights.prob > 0) UTIL_THROW(FormatLoadException, "Positive probability " << weights.prob); + for (WordIndex *vocab_out = reverse_indices + n - 1; vocab_out >= reverse_indices; --vocab_out) { + *vocab_out = vocab.Index(f.ReadDelimited()); + } + ReadBackoff(f, weights); + } catch(util::Exception &e) { + e << " in the " << n << "-gram at byte " << f.Offset(); + throw; + } +} + +} // namespace lm + +#endif // LM_READ_ARPA__ -- cgit v1.2.3