summaryrefslogtreecommitdiff
path: root/tests/system_tests/hmm/gold.statistics
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2011-11-14 01:20:50 +0100
committerPatrick Simianer <p@simianer.de>2011-11-14 01:20:50 +0100
commitbb602ade9c970272f281cd8a42b643cc09a9fa29 (patch)
tree5b24a9eceae7e64ca27c3ea202882b7e57195883 /tests/system_tests/hmm/gold.statistics
parentf99ba621e5bd10b069b453d11b3b4981dc482b6c (diff)
l1 regularzation
Diffstat (limited to 'tests/system_tests/hmm/gold.statistics')
0 files changed, 0 insertions, 0 deletions
width: 6.8%;'/> -rw-r--r--klm/lm/binary_format.hh10
-rw-r--r--klm/lm/blank.hh4
-rw-r--r--klm/lm/build_binary.cc83
-rw-r--r--klm/lm/config.cc2
-rw-r--r--klm/lm/config.hh5
-rw-r--r--klm/lm/model.cc25
-rw-r--r--klm/lm/model.hh21
-rw-r--r--klm/lm/model_test.cc13
-rw-r--r--klm/lm/quantize.cc84
-rw-r--r--klm/lm/quantize.hh207
-rw-r--r--klm/lm/search_hashed.cc38
-rw-r--r--klm/lm/search_hashed.hh122
-rw-r--r--klm/lm/search_trie.cc126
-rw-r--r--klm/lm/search_trie.hh132
-rw-r--r--klm/lm/trie.cc123
-rw-r--r--klm/lm/trie.hh33
-rw-r--r--klm/lm/vocab.cc18
-rw-r--r--klm/lm/vocab.hh35
20 files changed, 788 insertions, 311 deletions
diff --git a/klm/lm/Makefile.am b/klm/lm/Makefile.am
index 61d98d97..395494bc 100644
--- a/klm/lm/Makefile.am
+++ b/klm/lm/Makefile.am
@@ -15,6 +15,7 @@ libklm_a_SOURCES = \
binary_format.cc \
config.cc \
lm_exception.cc \
+ quantize.cc \
model.cc \
ngram_query.cc \
read_arpa.cc \
diff --git a/klm/lm/binary_format.cc b/klm/lm/binary_format.cc
index 34d9ffca..92b1008b 100644
--- a/klm/lm/binary_format.cc
+++ b/klm/lm/binary_format.cc
@@ -80,6 +80,14 @@ void WriteHeader(void *to, const Parameters &params) {
} // namespace
+void SeekOrThrow(int fd, off_t off) {
+ if ((off_t)-1 == lseek(fd, off, SEEK_SET)) UTIL_THROW(util::ErrnoException, "Seek failed");
+}
+
+void AdvanceOrThrow(int fd, off_t off) {
+ if ((off_t)-1 == lseek(fd, off, SEEK_CUR)) UTIL_THROW(util::ErrnoException, "Seek failed");
+}
+
uint8_t *SetupJustVocab(const Config &config, uint8_t order, std::size_t memory_size, Backing &backing) {
if (config.write_mmap) {
std::size_t total = TotalHeaderSize(order) + memory_size;
@@ -156,7 +164,7 @@ bool IsBinaryFormat(int fd) {
}
void ReadHeader(int fd, Parameters &out) {
- if ((off_t)-1 == lseek(fd, sizeof(Sanity), SEEK_SET)) UTIL_THROW(util::ErrnoException, "Seek failed in binary file");
+ SeekOrThrow(fd, sizeof(Sanity));
ReadLoop(fd, &out.fixed, sizeof(out.fixed));
if (out.fixed.probing_multiplier < 1.0)
UTIL_THROW(FormatLoadException, "Binary format claims to have a probing multiplier of " << out.fixed.probing_multiplier << " which is < 1.0.");
@@ -173,6 +181,10 @@ void MatchCheck(ModelType model_type, const Parameters &params) {
}
}
+void SeekPastHeader(int fd, const Parameters &params) {
+ SeekOrThrow(fd, TotalHeaderSize(params.counts.size()));
+}
+
uint8_t *SetupBinary(const Config &config, const Parameters &params, std::size_t memory_size, Backing &backing) {
const off_t file_size = util::SizeFile(backing.file.get());
// The header is smaller than a page, so we have to map the whole header as well.
@@ -186,8 +198,7 @@ uint8_t *SetupBinary(const Config &config, const Parameters &params, std::size_t
UTIL_THROW(FormatLoadException, "The decoder requested all the vocabulary strings, but this binary file does not have them. You may need to rebuild the binary file with an updated version of build_binary.");
if (config.enumerate_vocab) {
- if ((off_t)-1 == lseek(backing.file.get(), total_map, SEEK_SET))
- UTIL_THROW(util::ErrnoException, "Failed to seek in binary file to vocab words");
+ SeekOrThrow(backing.file.get(), total_map);
}
return reinterpret_cast<uint8_t*>(backing.search.get()) + TotalHeaderSize(params.counts.size());
}
diff --git a/klm/lm/binary_format.hh b/klm/lm/binary_format.hh
index 1fc71be4..2b32b450 100644
--- a/klm/lm/binary_format.hh
+++ b/klm/lm/binary_format.hh
@@ -16,7 +16,7 @@
namespace lm {
namespace ngram {
-typedef enum {HASH_PROBING=0, HASH_SORTED=1, TRIE_SORTED=2} ModelType;
+typedef enum {HASH_PROBING=0, HASH_SORTED=1, TRIE_SORTED=2, QUANT_TRIE_SORTED=3} ModelType;
/*Inspect a file to determine if it is a binary lm. If not, return false.
* If so, return true and set recognized to the type. This is the only API in
@@ -48,6 +48,10 @@ struct Backing {
util::scoped_memory search;
};
+void SeekOrThrow(int fd, off_t off);
+// Seek forward
+void AdvanceOrThrow(int fd, off_t off);
+
// Create just enough of a binary file to write vocabulary to it.
uint8_t *SetupJustVocab(const Config &config, uint8_t order, std::size_t memory_size, Backing &backing);
// Grow the binary file for the search data structure and set backing.search, returning the memory address where the search data structure should begin.
@@ -65,6 +69,8 @@ void ReadHeader(int fd, Parameters &params);
void MatchCheck(ModelType model_type, const Parameters &params);
+void SeekPastHeader(int fd, const Parameters &params);
+
uint8_t *SetupBinary(const Config &config, const Parameters &params, std::size_t memory_size, Backing &backing);
void ComplainAboutARPA(const Config &config, ModelType model_type);
@@ -83,6 +89,8 @@ template <class To> void LoadLM(const char *file, const Config &config, To &to)
// Replace the run-time configured probing_multiplier with the one in the file.
Config new_config(config);
new_config.probing_multiplier = params.fixed.probing_multiplier;
+ detail::SeekPastHeader(backing.file.get(), params);
+ To::UpdateConfigFromBinary(backing.file.get(), params.counts, new_config);
std::size_t memory_size = To::Size(params.counts, new_config);
uint8_t *start = detail::SetupBinary(new_config, params, memory_size, backing);
to.InitializeFromBinary(start, params, new_config, backing.file.get());
diff --git a/klm/lm/blank.hh b/klm/lm/blank.hh
index 4615a09e..162411a9 100644
--- a/klm/lm/blank.hh
+++ b/klm/lm/blank.hh
@@ -22,6 +22,8 @@ namespace ngram {
*/
const float kNoExtensionBackoff = -0.0;
const float kExtensionBackoff = 0.0;
+const uint64_t kNoExtensionQuant = 0;
+const uint64_t kExtensionQuant = 1;
inline void SetExtension(float &backoff) {
if (backoff == kNoExtensionBackoff) backoff = kExtensionBackoff;
@@ -47,6 +49,8 @@ inline bool HasExtension(const float &backoff) {
*/
const float kBlankProb = -std::numeric_limits<float>::infinity();
const float kBlankBackoff = kNoExtensionBackoff;
+const uint32_t kBlankProbQuant = 0;
+const uint32_t kBlankBackoffQuant = 0;
} // namespace ngram
} // namespace lm
diff --git a/klm/lm/build_binary.cc b/klm/lm/build_binary.cc
index 91ad2fb9..4552c419 100644
--- a/klm/lm/build_binary.cc
+++ b/klm/lm/build_binary.cc
@@ -15,22 +15,21 @@ namespace ngram {
namespace {
void Usage(const char *name) {
- std::cerr << "Usage: " << name << " [-u log10_unknown_probability] [-s] [-n] [-p probing_multiplier] [-t trie_temporary] [-m trie_building_megabytes] [type] input.arpa output.mmap\n\n"
+ std::cerr << "Usage: " << name << " [-u log10_unknown_probability] [-s] [-n] [-p probing_multiplier] [-t trie_temporary] [-m trie_building_megabytes] [-q bits] [-b bits] [type] input.arpa output.mmap\n\n"
"-u sets the default log10 probability for <unk> if the ARPA file does not have\n"
"one.\n"
"-s allows models to be built even if they do not have <s> and </s>.\n"
"-i allows buggy models from IRSTLM by mapping positive log probability to 0.\n"
-"type is one of probing, trie, or sorted:\n\n"
+"type is either probing or trie:\n\n"
"probing uses a probing hash table. It is the fastest but uses the most memory.\n"
"-p sets the space multiplier and must be >1.0. The default is 1.5.\n\n"
"trie is a straightforward trie with bit-level packing. It uses the least\n"
"memory and is still faster than SRI or IRST. Building the trie format uses an\n"
"on-disk sort to save memory.\n"
"-t is the temporary directory prefix. Default is the output file name.\n"
-"-m limits memory use for sorting. Measured in MB. Default is 1024MB.\n\n"
-/*"sorted is like probing but uses a sorted uniform map instead of a hash table.\n"
-"It uses more memory than trie and is also slower, so there's no real reason to\n"
-"use it.\n\n"*/
+"-m limits memory use for sorting. Measured in MB. Default is 1024MB.\n"
+"-q turns quantization on and sets the number of bits (e.g. -q 8).\n"
+"-b sets backoff quantization bits. Requires -q and defaults to that value.\n\n"
"See http://kheafield.com/code/kenlm/benchmark/ for data structure benchmarks.\n"
"Passing only an input file will print memory usage of each data structure.\n"
"If the ARPA file does not have <unk>, -u sets <unk>'s probability; default 0.0.\n";
@@ -51,19 +50,53 @@ unsigned long int ParseUInt(const char *from) {
return ret;
}
+uint8_t ParseBitCount(const char *from) {
+ unsigned long val = ParseUInt(from);
+ if (val > 25) {
+ util::ParseNumberException e(from);
+ e << " bit counts are limited to 256.";
+ }
+ return val;
+}
+
void ShowSizes(const char *file, const lm::ngram::Config &config) {
std::vector<uint64_t> counts;
util::FilePiece f(file);
lm::ReadARPACounts(f, counts);
- std::size_t probing_size = ProbingModel::Size(counts, config);
- // probing is always largest so use it to determine number of columns.
- long int length = std::max<long int>(5, lrint(ceil(log10(probing_size))));
+ std::size_t sizes[3];
+ sizes[0] = ProbingModel::Size(counts, config);
+ sizes[1] = TrieModel::Size(counts, config);
+ sizes[2] = QuantTrieModel::Size(counts, config);
+ std::size_t max_length = *std::max_element(sizes, sizes + 3);
+ std::size_t min_length = *std::max_element(sizes, sizes + 3);
+ std::size_t divide;
+ char prefix;
+ if (min_length < (1 << 10) * 10) {
+ prefix = ' ';
+ divide = 1;
+ } else if (min_length < (1 << 20) * 10) {
+ prefix = 'k';
+ divide = 1 << 10;
+ } else if (min_length < (1ULL << 30) * 10) {
+ prefix = 'M';
+ divide = 1 << 20;
+ } else {
+ prefix = 'G';
+ divide = 1 << 30;
+ }
+ long int length = std::max<long int>(2, lrint(ceil(log10(max_length / divide))));
std::cout << "Memory estimate:\ntype ";
// right align bytes.
- for (long int i = 0; i < length - 5; ++i) std::cout << ' ';
- std::cout << "bytes\n"
- "probing " << std::setw(length) << probing_size << " assuming -p " << config.probing_multiplier << "\n"
- "trie " << std::setw(length) << TrieModel::Size(counts, config) << "\n";
+ for (long int i = 0; i < length - 2; ++i) std::cout << ' ';
+ std::cout << prefix << "B\n"
+ "probing " << std::setw(length) << (sizes[0] / divide) << " assuming -p " << config.probing_multiplier << "\n"
+ "trie " << std::setw(length) << (sizes[1] / divide) << " without quantization\n"
+ "trie " << std::setw(length) << (sizes[2] / divide) << " assuming -q " << (unsigned)config.prob_bits << " -b " << (unsigned)config.backoff_bits << " quantization \n";
+}
+
+void ProbingQuantizationUnsupported() {
+ std::cerr << "Quantization is only implemented in the trie data structure." << std::endl;
+ exit(1);
}
} // namespace ngram
@@ -73,11 +106,21 @@ void ShowSizes(const char *file, const lm::ngram::Config &config) {
int main(int argc, char *argv[]) {
using namespace lm::ngram;
+ bool quantize = false, set_backoff_bits = false;
try {
lm::ngram::Config config;
int opt;
- while ((opt = getopt(argc, argv, "siu:p:t:m:")) != -1) {
+ while ((opt = getopt(argc, argv, "siu:p:t:m:q:b:")) != -1) {
switch(opt) {
+ case 'q':
+ config.prob_bits = ParseBitCount(optarg);
+ if (!set_backoff_bits) config.backoff_bits = config.prob_bits;
+ quantize = true;
+ break;
+ case 'b':
+ config.backoff_bits = ParseBitCount(optarg);
+ set_backoff_bits = true;
+ break;
case 'u':
config.unknown_missing_logprob = ParseFloat(optarg);
break;
@@ -100,19 +143,29 @@ int main(int argc, char *argv[]) {
Usage(argv[0]);
}
}
+ if (!quantize && set_backoff_bits) {
+ std::cerr << "You specified backoff quantization (-b) but not probability quantization (-q)" << std::endl;
+ abort();
+ }
if (optind + 1 == argc) {
ShowSizes(argv[optind], config);
} else if (optind + 2 == argc) {
config.write_mmap = argv[optind + 1];
+ if (quantize || set_backoff_bits) ProbingQuantizationUnsupported();
ProbingModel(argv[optind], config);
} else if (optind + 3 == argc) {
const char *model_type = argv[optind];
const char *from_file = argv[optind + 1];
config.write_mmap = argv[optind + 2];
if (!strcmp(model_type, "probing")) {
+ if (quantize || set_backoff_bits) ProbingQuantizationUnsupported();
ProbingModel(from_file, config);
} else if (!strcmp(model_type, "trie")) {
- TrieModel(from_file, config);
+ if (quantize) {
+ QuantTrieModel(from_file, config);
+ } else {
+ TrieModel(from_file, config);
+ }
} else {
Usage(argv[0]);
}
diff --git a/klm/lm/config.cc b/klm/lm/config.cc
index cee8fce2..08e1af5c 100644
--- a/klm/lm/config.cc
+++ b/klm/lm/config.cc
@@ -18,6 +18,8 @@ Config::Config() :
arpa_complain(ALL),
write_mmap(NULL),
include_vocab(true),
+ prob_bits(8),
+ backoff_bits(8),
load_method(util::POPULATE_OR_READ) {}
} // namespace ngram
diff --git a/klm/lm/config.hh b/klm/lm/config.hh
index 6c7fe39b..dcc7cf35 100644
--- a/klm/lm/config.hh
+++ b/klm/lm/config.hh
@@ -71,6 +71,11 @@ struct Config {
// Include the vocab in the binary file? Only effective if write_mmap != NULL.
bool include_vocab;
+ // Quantization options. Only effective for QuantTrieModel. One value is
+ // reserved for each of prob and backoff, so 2^bits - 1 buckets will be used
+ // to quantize.
+ uint8_t prob_bits, backoff_bits;
+
// ONLY EFFECTIVE WHEN READING BINARY
diff --git a/klm/lm/model.cc b/klm/lm/model.cc
index f0579c0c..a1d10b3d 100644
--- a/klm/lm/model.cc
+++ b/klm/lm/model.cc
@@ -44,17 +44,13 @@ template <class Search, class VocabularyT> GenericModel<Search, VocabularyT>::Ge
begin_sentence.backoff_[0] = search_.unigram.Lookup(begin_sentence.history_[0]).backoff;
State null_context = State();
null_context.valid_length_ = 0;
- P::Init(begin_sentence, null_context, vocab_, search_.middle.size() + 2);
+ P::Init(begin_sentence, null_context, vocab_, search_.MiddleEnd() - search_.MiddleBegin() + 2);
}
template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT>::InitializeFromBinary(void *start, const Parameters &params, const Config &config, int fd) {
SetupMemory(start, params.counts, config);
vocab_.LoadedBinary(fd, config.enumerate_vocab);
- search_.unigram.LoadedBinary();
- for (typename std::vector<Middle>::iterator i = search_.middle.begin(); i != search_.middle.end(); ++i) {
- i->LoadedBinary();
- }
- search_.longest.LoadedBinary();
+ search_.LoadedBinary();
}
template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT>::InitializeFromARPA(const char *file, const Config &config) {
@@ -116,8 +112,9 @@ template <class Search, class VocabularyT> FullScoreReturn GenericModel<Search,
}
float backoff;
// i is the order of the backoff we're looking for.
- for (const WordIndex *i = context_rbegin + start - 1; i < context_rend; ++i) {
- if (!search_.LookupMiddleNoProb(search_.middle[i - context_rbegin - 1], *i, backoff, node)) break;
+ const Middle *mid_iter = search_.MiddleBegin() + start - 2;
+ for (const WordIndex *i = context_rbegin + start - 1; i < context_rend; ++i, ++mid_iter) {
+ if (!search_.LookupMiddleNoProb(*mid_iter, *i, backoff, node)) break;
ret.prob += backoff;
}
return ret;
@@ -135,7 +132,7 @@ template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT
search_.LookupUnigram(*context_rbegin, ignored_prob, out_state.backoff_[0], node);
out_state.valid_length_ = HasExtension(out_state.backoff_[0]) ? 1 : 0;
float *backoff_out = out_state.backoff_ + 1;
- const typename Search::Middle *mid = &*search_.middle.begin();
+ const typename Search::Middle *mid = search_.MiddleBegin();
for (const WordIndex *i = context_rbegin + 1; i < context_rend; ++i, ++backoff_out, ++mid) {
if (!search_.LookupMiddleNoProb(*mid, *i, *backoff_out, node)) {
std::copy(context_rbegin, context_rbegin + out_state.valid_length_, out_state.history_);
@@ -183,7 +180,7 @@ template <class Search, class VocabularyT> FullScoreReturn GenericModel<Search,
// Ok now we now that the bigram contains known words. Start by looking it up.
const WordIndex *hist_iter = context_rbegin;
- typename std::vector<Middle>::const_iterator mid_iter = search_.middle.begin();
+ const typename Search::Middle *mid_iter = search_.MiddleBegin();
for (; ; ++mid_iter, ++hist_iter, ++backoff_out) {
if (hist_iter == context_rend) {
// Ran out of history. Typically no backoff, but this could be a blank.
@@ -192,7 +189,7 @@ template <class Search, class VocabularyT> FullScoreReturn GenericModel<Search,
return ret;
}
- if (mid_iter == search_.middle.end()) break;
+ if (mid_iter == search_.MiddleEnd()) break;
float revert = ret.prob;
if (!search_.LookupMiddle(*mid_iter, *hist_iter, ret.prob, *backoff_out, node)) {
@@ -227,9 +224,9 @@ template <class Search, class VocabularyT> FullScoreReturn GenericModel<Search,
return ret;
}
-template class GenericModel<ProbingHashedSearch, ProbingVocabulary>;
-template class GenericModel<SortedHashedSearch, SortedVocabulary>;
-template class GenericModel<trie::TrieSearch, SortedVocabulary>;
+template class GenericModel<ProbingHashedSearch, ProbingVocabulary>; // HASH_PROBING
+template class GenericModel<trie::TrieSearch<DontQuantize>, SortedVocabulary>; // TRIE_SORTED
+template class GenericModel<trie::TrieSearch<SeparatelyQuantize>, SortedVocabulary>; // TRIE_SORTED_QUANT
} // namespace detail
} // namespace ngram
diff --git a/klm/lm/model.hh b/klm/lm/model.hh
index b85ccdcc..1f49a382 100644
--- a/klm/lm/model.hh
+++ b/klm/lm/model.hh
@@ -5,6 +5,7 @@
#include "lm/config.hh"
#include "lm/facade.hh"
#include "lm/max_order.hh"
+#include "lm/quantize.hh"
#include "lm/search_hashed.hh"
#include "lm/search_trie.hh"
#include "lm/vocab.hh"
@@ -70,9 +71,10 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
private:
typedef base::ModelFacade<GenericModel<Search, VocabularyT>, State, VocabularyT> P;
public:
- // Get the size of memory that will be mapped given ngram counts. This
- // does not include small non-mapped control structures, such as this class
- // itself.
+ /* Get the size of memory that will be mapped given ngram counts. This
+ * does not include small non-mapped control structures, such as this class
+ * itself.
+ */
static size_t Size(const std::vector<uint64_t> &counts, const Config &config = Config());
/* Load the model from a file. It may be an ARPA or binary file. Binary
@@ -111,6 +113,11 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
private:
friend void LoadLM<>(const char *file, const Config &config, GenericModel<Search, VocabularyT> &to);
+ static void UpdateConfigFromBinary(int fd, const std::vector<uint64_t> &counts, Config &config) {
+ AdvanceOrThrow(fd, VocabularyT::Size(counts[0], config));
+ Search::UpdateConfigFromBinary(fd, counts, config);
+ }
+
float SlowBackoffLookup(const WordIndex *const context_rbegin, const WordIndex *const context_rend, unsigned char start) const;
FullScoreReturn ScoreExceptBackoff(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, State &out_state) const;
@@ -130,9 +137,7 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
VocabularyT vocab_;
- typedef typename Search::Unigram Unigram;
typedef typename Search::Middle Middle;
- typedef typename Search::Longest Longest;
Search search_;
};
@@ -141,13 +146,15 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
// These must also be instantiated in the cc file.
typedef ::lm::ngram::ProbingVocabulary Vocabulary;
-typedef detail::GenericModel<detail::ProbingHashedSearch, Vocabulary> ProbingModel;
+typedef detail::GenericModel<detail::ProbingHashedSearch, Vocabulary> ProbingModel; // HASH_PROBING
// Default implementation. No real reason for it to be the default.
typedef ProbingModel Model;
// Smaller implementation.
typedef ::lm::ngram::SortedVocabulary SortedVocabulary;
-typedef detail::GenericModel<trie::TrieSearch, SortedVocabulary> TrieModel;
+typedef detail::GenericModel<trie::TrieSearch<DontQuantize>, SortedVocabulary> TrieModel; // TRIE_SORTED
+
+typedef detail::GenericModel<trie::TrieSearch<SeparatelyQuantize>, SortedVocabulary> QuantTrieModel; // QUANT_TRIE_SORTED
} // namespace ngram
} // namespace lm
diff --git a/klm/lm/model_test.cc b/klm/lm/model_test.cc
index 548c098d..8bf040ff 100644
--- a/klm/lm/model_test.cc
+++ b/klm/lm/model_test.cc
@@ -243,13 +243,14 @@ BOOST_AUTO_TEST_CASE(probing) {
LoadingTest<Model>();
}
-/*BOOST_AUTO_TEST_CASE(sorted) {
- LoadingTest<SortedModel>();
-}*/
BOOST_AUTO_TEST_CASE(trie) {
LoadingTest<TrieModel>();
}
+BOOST_AUTO_TEST_CASE(quant) {
+ LoadingTest<QuantTrieModel>();
+}
+
template <class ModelT> void BinaryTest() {
Config config;
config.write_mmap = "test.binary";
@@ -275,12 +276,12 @@ template <class ModelT> void BinaryTest() {
BOOST_AUTO_TEST_CASE(write_and_read_probing) {
BinaryTest<Model>();
}
-/*BOOST_AUTO_TEST_CASE(write_and_read_sorted) {
- BinaryTest<SortedModel>();
-}*/
BOOST_AUTO_TEST_CASE(write_and_read_trie) {
BinaryTest<TrieModel>();
}
+BOOST_AUTO_TEST_CASE(write_and_read_quant_trie) {
+ BinaryTest<QuantTrieModel>();
+}
} // namespace
} // namespace ngram
diff --git a/klm/lm/quantize.cc b/klm/lm/quantize.cc
new file mode 100644
index 00000000..4bb6b1b8
--- /dev/null
+++ b/