diff options
| author | Kenneth Heafield <kheafiel@cluster12.lti.ece.cmu.local> | 2011-05-20 16:19:04 -0400 | 
|---|---|---|
| committer | Kenneth Heafield <kheafiel@cluster12.lti.ece.cmu.local> | 2011-05-20 16:19:04 -0400 | 
| commit | 461c2670efb0968ccc6789ff0c9ca6f88ab31e80 (patch) | |
| tree | 6b344dcf320674213fd449e6a4915236ad78c29f /klm/lm/read_arpa.hh | |
| parent | 0e7b303879baf95a8167194ad7c75ef738e79f15 (diff) | |
kenlm update including being nicer to NFS
Diffstat (limited to 'klm/lm/read_arpa.hh')
| -rw-r--r-- | klm/lm/read_arpa.hh | 33 | 
1 files changed, 27 insertions, 6 deletions
| diff --git a/klm/lm/read_arpa.hh b/klm/lm/read_arpa.hh index 4953d40e..ab996bde 100644 --- a/klm/lm/read_arpa.hh +++ b/klm/lm/read_arpa.hh @@ -22,10 +22,26 @@ void ReadEnd(util::FilePiece &in);  extern const bool kARPASpaces[256]; -template <class Voc> void Read1Gram(util::FilePiece &f, Voc &vocab, ProbBackoff *unigrams) { +// Positive log probability warning.   +class PositiveProbWarn { +  public: +    PositiveProbWarn() : action_(THROW_UP) {} + +    explicit PositiveProbWarn(WarningAction action) : action_(action) {} + +    void Warn(float prob); + +  private: +    WarningAction action_; +}; + +template <class Voc> void Read1Gram(util::FilePiece &f, Voc &vocab, ProbBackoff *unigrams, PositiveProbWarn &warn) {    try {      float prob = f.ReadFloat(); -    if (prob > 0) UTIL_THROW(FormatLoadException, "Positive probability " << prob); +    if (prob > 0.0) { +      warn.Warn(prob); +      prob = 0.0; +    }      if (f.get() != '\t') UTIL_THROW(FormatLoadException, "Expected tab after probability");      ProbBackoff &value = unigrams[vocab.Insert(f.ReadDelimited(kARPASpaces))];      value.prob = prob; @@ -36,18 +52,23 @@ template <class Voc> void Read1Gram(util::FilePiece &f, Voc &vocab, ProbBackoff    }  } -template <class Voc> void Read1Grams(util::FilePiece &f, std::size_t count, Voc &vocab, ProbBackoff *unigrams) { +// Return true if a positive log probability came out. +template <class Voc> void Read1Grams(util::FilePiece &f, std::size_t count, Voc &vocab, ProbBackoff *unigrams, PositiveProbWarn &warn) {    ReadNGramHeader(f, 1);    for (std::size_t i = 0; i < count; ++i) { -    Read1Gram(f, vocab, unigrams); +    Read1Gram(f, vocab, unigrams, warn);    }    vocab.FinishedLoading(unigrams);  } -template <class Voc, class Weights> void ReadNGram(util::FilePiece &f, const unsigned char n, const Voc &vocab, WordIndex *const reverse_indices, Weights &weights) { +// Return true if a positive log probability came out. +template <class Voc, class Weights> void ReadNGram(util::FilePiece &f, const unsigned char n, const Voc &vocab, WordIndex *const reverse_indices, Weights &weights, PositiveProbWarn &warn) {    try {      weights.prob = f.ReadFloat(); -    if (weights.prob > 0) UTIL_THROW(FormatLoadException, "Positive probability " << weights.prob); +    if (weights.prob > 0.0) { +      warn.Warn(weights.prob); +      weights.prob = 0.0; +    }      for (WordIndex *vocab_out = reverse_indices + n - 1; vocab_out >= reverse_indices; --vocab_out) {        *vocab_out = vocab.Index(f.ReadDelimited(kARPASpaces));      } | 
