summaryrefslogtreecommitdiff
path: root/klm/lm
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2013-01-22 21:37:49 +0000
committerKenneth Heafield <github@kheafield.com>2013-01-22 21:37:49 +0000
commit516c132fb683b5bf77ae3230a1b3709beb57618e (patch)
treed37fb5d1d8f4273dac3509291495ff9797c399e0 /klm/lm
parent53532304714256f692fd5f7305b2fab10a7d7cca (diff)
KenLM 58da338b
Diffstat (limited to 'klm/lm')
-rw-r--r--klm/lm/Makefile.am4
-rw-r--r--klm/lm/build_binary_main.cc (renamed from klm/lm/build_binary.cc)0
-rw-r--r--klm/lm/builder/Makefile.am2
-rw-r--r--klm/lm/builder/discount.hh2
-rw-r--r--klm/lm/builder/lmplz_main.cc (renamed from klm/lm/builder/main.cc)0
-rw-r--r--klm/lm/filter/filter_main.cc (renamed from klm/lm/filter/main.cc)19
-rw-r--r--klm/lm/filter/phrase.hh1
-rw-r--r--klm/lm/filter/vocab.hh1
-rw-r--r--klm/lm/fragment_main.cc (renamed from klm/lm/fragment.cc)0
-rw-r--r--klm/lm/kenlm_max_order_main.cc (renamed from klm/lm/max_order.cc)0
-rw-r--r--klm/lm/query_main.cc (renamed from klm/lm/ngram_query.cc)0
11 files changed, 15 insertions, 14 deletions
diff --git a/klm/lm/Makefile.am b/klm/lm/Makefile.am
index 45f40c43..48b0ba34 100644
--- a/klm/lm/Makefile.am
+++ b/klm/lm/Makefile.am
@@ -1,9 +1,9 @@
bin_PROGRAMS = build_binary ngram_query
-build_binary_SOURCES = build_binary.cc
+build_binary_SOURCES = build_binary_main.cc
build_binary_LDADD = libklm.a ../util/libklm_util.a ../util/double-conversion/libklm_util_double.a -lz
-ngram_query_SOURCES = ngram_query.cc
+ngram_query_SOURCES = query_main.cc
ngram_query_LDADD = libklm.a ../util/libklm_util.a ../util/double-conversion/libklm_util_double.a -lz
#noinst_PROGRAMS = \
diff --git a/klm/lm/build_binary.cc b/klm/lm/build_binary_main.cc
index ab2c0c32..ab2c0c32 100644
--- a/klm/lm/build_binary.cc
+++ b/klm/lm/build_binary_main.cc
diff --git a/klm/lm/builder/Makefile.am b/klm/lm/builder/Makefile.am
index b5c147fd..317e03ce 100644
--- a/klm/lm/builder/Makefile.am
+++ b/klm/lm/builder/Makefile.am
@@ -1,7 +1,7 @@
bin_PROGRAMS = builder
builder_SOURCES = \
- main.cc \
+ lmplz_main.cc \
adjust_counts.cc \
adjust_counts.hh \
corpus_count.cc \
diff --git a/klm/lm/builder/discount.hh b/klm/lm/builder/discount.hh
index 754fb20d..4d0aa4fd 100644
--- a/klm/lm/builder/discount.hh
+++ b/klm/lm/builder/discount.hh
@@ -3,7 +3,7 @@
#include <algorithm>
-#include <inttypes.h>
+#include <stdint.h>
namespace lm {
namespace builder {
diff --git a/klm/lm/builder/main.cc b/klm/lm/builder/lmplz_main.cc
index 90b9dca2..90b9dca2 100644
--- a/klm/lm/builder/main.cc
+++ b/klm/lm/builder/lmplz_main.cc
diff --git a/klm/lm/filter/main.cc b/klm/lm/filter/filter_main.cc
index c42243e2..1a4ba84f 100644
--- a/klm/lm/filter/main.cc
+++ b/klm/lm/filter/filter_main.cc
@@ -53,7 +53,7 @@ void DisplayHelp(const char *name) {
" stream i.e. /dev/stdout\n";
}
-typedef enum {MODE_COPY, MODE_SINGLE, MODE_MULTIPLE, MODE_UNION} FilterMode;
+typedef enum {MODE_COPY, MODE_SINGLE, MODE_MULTIPLE, MODE_UNION, MODE_UNSET} FilterMode;
typedef enum {FORMAT_ARPA, FORMAT_COUNT} Format;
struct Config {
@@ -162,19 +162,19 @@ int main(int argc, char *argv[]) {
return 1;
}
- // I used to have boost::program_options, but some users didn't want to compile boost.
+ // I used to have boost::program_options, but some users didn't want to compile boost.
lm::Config config;
- boost::optional<lm::FilterMode> mode;
+ config.mode = lm::MODE_UNSET;
for (int i = 1; i < argc - 2; ++i) {
const char *str = argv[i];
if (!std::strcmp(str, "copy")) {
- mode = lm::MODE_COPY;
+ config.mode = lm::MODE_COPY;
} else if (!std::strcmp(str, "single")) {
- mode = lm::MODE_SINGLE;
+ config.mode = lm::MODE_SINGLE;
} else if (!std::strcmp(str, "multiple")) {
- mode = lm::MODE_MULTIPLE;
+ config.mode = lm::MODE_MULTIPLE;
} else if (!std::strcmp(str, "union")) {
- mode = lm::MODE_UNION;
+ config.mode = lm::MODE_UNION;
} else if (!std::strcmp(str, "phrase")) {
config.phrase = true;
} else if (!std::strcmp(str, "context")) {
@@ -203,13 +203,12 @@ int main(int argc, char *argv[]) {
}
}
- if (!mode) {
+ if (config.mode == lm::MODE_UNSET) {
lm::DisplayHelp(argv[0]);
return 1;
}
- config.mode = *mode;
- if (config.phrase && config.mode != lm::MODE_UNION && mode != lm::MODE_MULTIPLE) {
+ if (config.phrase && config.mode != lm::MODE_UNION && config.mode != lm::MODE_MULTIPLE) {
std::cerr << "Phrase constraint currently only works in multiple or union mode. If you really need it for single, put everything on one line and use union." << std::endl;
return 1;
}
diff --git a/klm/lm/filter/phrase.hh b/klm/lm/filter/phrase.hh
index 07479dea..b4edff41 100644
--- a/klm/lm/filter/phrase.hh
+++ b/klm/lm/filter/phrase.hh
@@ -57,6 +57,7 @@ class Substrings {
LM_FILTER_PHRASE_METHOD(Right, right)
LM_FILTER_PHRASE_METHOD(Phrase, phrase)
+#pragma GCC diagnostic ignored "-Wuninitialized" // end != finish so there's always an initialization
// sentence_id must be non-decreasing. Iterators are over words in the phrase.
template <class Iterator> void AddPhrase(unsigned int sentence_id, const Iterator &begin, const Iterator &end) {
// Iterate over all substrings.
diff --git a/klm/lm/filter/vocab.hh b/klm/lm/filter/vocab.hh
index e2b6adff..7f0fadaa 100644
--- a/klm/lm/filter/vocab.hh
+++ b/klm/lm/filter/vocab.hh
@@ -5,6 +5,7 @@
#include "util/multi_intersection.hh"
#include "util/string_piece.hh"
+#include "util/string_piece_hash.hh"
#include "util/tokenize_piece.hh"
#include <boost/noncopyable.hpp>
diff --git a/klm/lm/fragment.cc b/klm/lm/fragment_main.cc
index 0267cd4e..0267cd4e 100644
--- a/klm/lm/fragment.cc
+++ b/klm/lm/fragment_main.cc
diff --git a/klm/lm/max_order.cc b/klm/lm/kenlm_max_order_main.cc
index 94221201..94221201 100644
--- a/klm/lm/max_order.cc
+++ b/klm/lm/kenlm_max_order_main.cc
diff --git a/klm/lm/ngram_query.cc b/klm/lm/query_main.cc
index 49757d9a..49757d9a 100644
--- a/klm/lm/ngram_query.cc
+++ b/klm/lm/query_main.cc