#include "lm/builder/adjust_counts.hh" #include "lm/builder/ngram_stream.hh" #include "util/scoped.hh" #include <boost/thread/thread.hpp> #define BOOST_TEST_MODULE AdjustCounts #include <boost/test/unit_test.hpp> namespace lm { namespace builder { namespace { class KeepCopy { public: KeepCopy() : size_(0) {} void Run(const util::stream::ChainPosition &position) { for (util::stream::Link link(position); link; ++link) { mem_.call_realloc(size_ + link->ValidSize()); memcpy(static_cast<uint8_t*>(mem_.get()) + size_, link->Get(), link->ValidSize()); size_ += link->ValidSize(); } } uint8_t *Get() { return static_cast<uint8_t*>(mem_.get()); } std::size_t Size() const { return size_; } private: util::scoped_malloc mem_; std::size_t size_; }; struct Gram4 { WordIndex ids[4]; uint64_t count; }; class WriteInput { public: void Run(const util::stream::ChainPosition &position) { NGramStream input(position); Gram4 grams[] = { {{0,0,0,0},10}, {{0,0,3,0},3}, // bos {{1,1,1,2},5}, {{0,0,3,2},5}, }; for (size_t i = 0; i < sizeof(grams) / sizeof(Gram4); ++i, ++input) { memcpy(input->begin(), grams[i].ids, sizeof(WordIndex) * 4); input->Count() = grams[i].count; } input.Poison(); } }; BOOST_AUTO_TEST_CASE(Simple) { KeepCopy outputs[4]; std::vector<uint64_t> counts; std::vector<Discount> discount; { util::stream::ChainConfig config; config.total_memory = 100; config.block_count = 1; util::stream::Chains chains(4); for (unsigned i = 0; i < 4; ++i) { config.entry_size = NGram::TotalSize(i + 1); chains.push_back(config); } chains[3] >> WriteInput(); util::stream::ChainPositions for_adjust(chains); for (unsigned i = 0; i < 4; ++i) { chains[i] >> boost::ref(outputs[i]); } chains >> util::stream::kRecycle; std::vector<uint64_t> counts_pruned(4); std::vector<uint64_t> prune_thresholds(4); DiscountConfig discount_config; discount_config.fallback = Discount(); discount_config.bad_action = THROW_UP; BOOST_CHECK_THROW(AdjustCounts(prune_thresholds, counts, counts_pruned, discount_config, discount).Run(for_adjust), BadDiscountException); } BOOST_REQUIRE_EQUAL(4UL, counts.size()); BOOST_CHECK_EQUAL(4UL, counts[0]); // These are no longer set because the discounts are bad. /* BOOST_CHECK_EQUAL(4UL, counts[1]); BOOST_CHECK_EQUAL(3UL, counts[2]); BOOST_CHECK_EQUAL(3UL, counts[3]);*/ BOOST_REQUIRE_EQUAL(NGram::TotalSize(1) * 4, outputs[0].Size()); NGram uni(outputs[0].Get(), 1); BOOST_CHECK_EQUAL(kUNK, *uni.begin()); BOOST_CHECK_EQUAL(0ULL, uni.Count()); uni.NextInMemory(); BOOST_CHECK_EQUAL(kBOS, *uni.begin()); BOOST_CHECK_EQUAL(0ULL, uni.Count()); uni.NextInMemory(); BOOST_CHECK_EQUAL(0UL, *uni.begin()); BOOST_CHECK_EQUAL(2ULL, uni.Count()); uni.NextInMemory(); BOOST_CHECK_EQUAL(2ULL, uni.Count()); BOOST_CHECK_EQUAL(2UL, *uni.begin()); BOOST_REQUIRE_EQUAL(NGram::TotalSize(2) * 4, outputs[1].Size()); NGram bi(outputs[1].Get(), 2); BOOST_CHECK_EQUAL(0UL, *bi.begin()); BOOST_CHECK_EQUAL(0UL, *(bi.begin() + 1)); BOOST_CHECK_EQUAL(1ULL, bi.Count()); bi.NextInMemory(); } }}} // namespaces