diff options
author | Kenneth Heafield <github@kheafield.com> | 2013-01-22 21:37:49 +0000 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2013-01-22 21:37:49 +0000 |
commit | 516c132fb683b5bf77ae3230a1b3709beb57618e (patch) | |
tree | d37fb5d1d8f4273dac3509291495ff9797c399e0 /klm/lm | |
parent | 53532304714256f692fd5f7305b2fab10a7d7cca (diff) |
KenLM 58da338b
Diffstat (limited to 'klm/lm')
-rw-r--r-- | klm/lm/Makefile.am | 4 | ||||
-rw-r--r-- | klm/lm/build_binary_main.cc (renamed from klm/lm/build_binary.cc) | 0 | ||||
-rw-r--r-- | klm/lm/builder/Makefile.am | 2 | ||||
-rw-r--r-- | klm/lm/builder/discount.hh | 2 | ||||
-rw-r--r-- | klm/lm/builder/lmplz_main.cc (renamed from klm/lm/builder/main.cc) | 0 | ||||
-rw-r--r-- | klm/lm/filter/filter_main.cc (renamed from klm/lm/filter/main.cc) | 19 | ||||
-rw-r--r-- | klm/lm/filter/phrase.hh | 1 | ||||
-rw-r--r-- | klm/lm/filter/vocab.hh | 1 | ||||
-rw-r--r-- | klm/lm/fragment_main.cc (renamed from klm/lm/fragment.cc) | 0 | ||||
-rw-r--r-- | klm/lm/kenlm_max_order_main.cc (renamed from klm/lm/max_order.cc) | 0 | ||||
-rw-r--r-- | klm/lm/query_main.cc (renamed from klm/lm/ngram_query.cc) | 0 |
11 files changed, 15 insertions, 14 deletions
diff --git a/klm/lm/Makefile.am b/klm/lm/Makefile.am index 45f40c43..48b0ba34 100644 --- a/klm/lm/Makefile.am +++ b/klm/lm/Makefile.am @@ -1,9 +1,9 @@ bin_PROGRAMS = build_binary ngram_query -build_binary_SOURCES = build_binary.cc +build_binary_SOURCES = build_binary_main.cc build_binary_LDADD = libklm.a ../util/libklm_util.a ../util/double-conversion/libklm_util_double.a -lz -ngram_query_SOURCES = ngram_query.cc +ngram_query_SOURCES = query_main.cc ngram_query_LDADD = libklm.a ../util/libklm_util.a ../util/double-conversion/libklm_util_double.a -lz #noinst_PROGRAMS = \ diff --git a/klm/lm/build_binary.cc b/klm/lm/build_binary_main.cc index ab2c0c32..ab2c0c32 100644 --- a/klm/lm/build_binary.cc +++ b/klm/lm/build_binary_main.cc diff --git a/klm/lm/builder/Makefile.am b/klm/lm/builder/Makefile.am index b5c147fd..317e03ce 100644 --- a/klm/lm/builder/Makefile.am +++ b/klm/lm/builder/Makefile.am @@ -1,7 +1,7 @@ bin_PROGRAMS = builder builder_SOURCES = \ - main.cc \ + lmplz_main.cc \ adjust_counts.cc \ adjust_counts.hh \ corpus_count.cc \ diff --git a/klm/lm/builder/discount.hh b/klm/lm/builder/discount.hh index 754fb20d..4d0aa4fd 100644 --- a/klm/lm/builder/discount.hh +++ b/klm/lm/builder/discount.hh @@ -3,7 +3,7 @@ #include <algorithm> -#include <inttypes.h> +#include <stdint.h> namespace lm { namespace builder { diff --git a/klm/lm/builder/main.cc b/klm/lm/builder/lmplz_main.cc index 90b9dca2..90b9dca2 100644 --- a/klm/lm/builder/main.cc +++ b/klm/lm/builder/lmplz_main.cc diff --git a/klm/lm/filter/main.cc b/klm/lm/filter/filter_main.cc index c42243e2..1a4ba84f 100644 --- a/klm/lm/filter/main.cc +++ b/klm/lm/filter/filter_main.cc @@ -53,7 +53,7 @@ void DisplayHelp(const char *name) { " stream i.e. /dev/stdout\n"; } -typedef enum {MODE_COPY, MODE_SINGLE, MODE_MULTIPLE, MODE_UNION} FilterMode; +typedef enum {MODE_COPY, MODE_SINGLE, MODE_MULTIPLE, MODE_UNION, MODE_UNSET} FilterMode; typedef enum {FORMAT_ARPA, FORMAT_COUNT} Format; struct Config { @@ -162,19 +162,19 @@ int main(int argc, char *argv[]) { return 1; } - // I used to have boost::program_options, but some users didn't want to compile boost. + // I used to have boost::program_options, but some users didn't want to compile boost. lm::Config config; - boost::optional<lm::FilterMode> mode; + config.mode = lm::MODE_UNSET; for (int i = 1; i < argc - 2; ++i) { const char *str = argv[i]; if (!std::strcmp(str, "copy")) { - mode = lm::MODE_COPY; + config.mode = lm::MODE_COPY; } else if (!std::strcmp(str, "single")) { - mode = lm::MODE_SINGLE; + config.mode = lm::MODE_SINGLE; } else if (!std::strcmp(str, "multiple")) { - mode = lm::MODE_MULTIPLE; + config.mode = lm::MODE_MULTIPLE; } else if (!std::strcmp(str, "union")) { - mode = lm::MODE_UNION; + config.mode = lm::MODE_UNION; } else if (!std::strcmp(str, "phrase")) { config.phrase = true; } else if (!std::strcmp(str, "context")) { @@ -203,13 +203,12 @@ int main(int argc, char *argv[]) { } } - if (!mode) { + if (config.mode == lm::MODE_UNSET) { lm::DisplayHelp(argv[0]); return 1; } - config.mode = *mode; - if (config.phrase && config.mode != lm::MODE_UNION && mode != lm::MODE_MULTIPLE) { + if (config.phrase && config.mode != lm::MODE_UNION && config.mode != lm::MODE_MULTIPLE) { std::cerr << "Phrase constraint currently only works in multiple or union mode. If you really need it for single, put everything on one line and use union." << std::endl; return 1; } diff --git a/klm/lm/filter/phrase.hh b/klm/lm/filter/phrase.hh index 07479dea..b4edff41 100644 --- a/klm/lm/filter/phrase.hh +++ b/klm/lm/filter/phrase.hh @@ -57,6 +57,7 @@ class Substrings { LM_FILTER_PHRASE_METHOD(Right, right) LM_FILTER_PHRASE_METHOD(Phrase, phrase) +#pragma GCC diagnostic ignored "-Wuninitialized" // end != finish so there's always an initialization // sentence_id must be non-decreasing. Iterators are over words in the phrase. template <class Iterator> void AddPhrase(unsigned int sentence_id, const Iterator &begin, const Iterator &end) { // Iterate over all substrings. diff --git a/klm/lm/filter/vocab.hh b/klm/lm/filter/vocab.hh index e2b6adff..7f0fadaa 100644 --- a/klm/lm/filter/vocab.hh +++ b/klm/lm/filter/vocab.hh @@ -5,6 +5,7 @@ #include "util/multi_intersection.hh" #include "util/string_piece.hh" +#include "util/string_piece_hash.hh" #include "util/tokenize_piece.hh" #include <boost/noncopyable.hpp> diff --git a/klm/lm/fragment.cc b/klm/lm/fragment_main.cc index 0267cd4e..0267cd4e 100644 --- a/klm/lm/fragment.cc +++ b/klm/lm/fragment_main.cc diff --git a/klm/lm/max_order.cc b/klm/lm/kenlm_max_order_main.cc index 94221201..94221201 100644 --- a/klm/lm/max_order.cc +++ b/klm/lm/kenlm_max_order_main.cc diff --git a/klm/lm/ngram_query.cc b/klm/lm/query_main.cc index 49757d9a..49757d9a 100644 --- a/klm/lm/ngram_query.cc +++ b/klm/lm/query_main.cc |