diff options
author | Kenneth Heafield <github@kheafield.com> | 2013-01-18 17:12:51 +0000 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2013-01-18 17:12:51 +0000 |
commit | d884099e0db8b4510847ec106b59ef7dca3c245b (patch) | |
tree | b45a3f17eb002e224a7b728e0f985a15e2503196 /klm/lm/builder/adjust_counts.hh | |
parent | bae5fe99037ae7e101953ad0df118127191c711c (diff) |
KenLM dffafbf with lmplz source (but not built)
Diffstat (limited to 'klm/lm/builder/adjust_counts.hh')
-rw-r--r-- | klm/lm/builder/adjust_counts.hh | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/klm/lm/builder/adjust_counts.hh b/klm/lm/builder/adjust_counts.hh new file mode 100644 index 00000000..f38ff79d --- /dev/null +++ b/klm/lm/builder/adjust_counts.hh @@ -0,0 +1,44 @@ +#ifndef LM_BUILDER_ADJUST_COUNTS__ +#define LM_BUILDER_ADJUST_COUNTS__ + +#include "lm/builder/discount.hh" +#include "util/exception.hh" + +#include <vector> + +#include <stdint.h> + +namespace lm { +namespace builder { + +class ChainPositions; + +class BadDiscountException : public util::Exception { + public: + BadDiscountException() throw(); + ~BadDiscountException() throw(); +}; + +/* Compute adjusted counts. + * Input: unique suffix sorted N-grams (and just the N-grams) with raw counts. + * Output: [1,N]-grams with adjusted counts. + * [1,N)-grams are in suffix order + * N-grams are in undefined order (they're going to be sorted anyway). + */ +class AdjustCounts { + public: + AdjustCounts(std::vector<uint64_t> &counts, std::vector<Discount> &discounts) + : counts_(counts), discounts_(discounts) {} + + void Run(const ChainPositions &positions); + + private: + std::vector<uint64_t> &counts_; + std::vector<Discount> &discounts_; +}; + +} // namespace builder +} // namespace lm + +#endif // LM_BUILDER_ADJUST_COUNTS__ + |