summaryrefslogtreecommitdiff
path: root/klm/lm/filter
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2013-01-22 21:37:49 +0000
committerKenneth Heafield <github@kheafield.com>2013-01-22 21:37:49 +0000
commitb35a7f3a96ff8ae42e15922dd6949bf9f5d15501 (patch)
tree26edbe78931ffc50864a899c087d851005fe560b /klm/lm/filter
parent51a412aa7f5f50035cf28a274a70508c839f3d40 (diff)
KenLM 58da338b
Diffstat (limited to 'klm/lm/filter')
-rw-r--r--klm/lm/filter/filter_main.cc (renamed from klm/lm/filter/main.cc)19
-rw-r--r--klm/lm/filter/phrase.hh1
-rw-r--r--klm/lm/filter/vocab.hh1
3 files changed, 11 insertions, 10 deletions
diff --git a/klm/lm/filter/main.cc b/klm/lm/filter/filter_main.cc
index c42243e2..1a4ba84f 100644
--- a/klm/lm/filter/main.cc
+++ b/klm/lm/filter/filter_main.cc
@@ -53,7 +53,7 @@ void DisplayHelp(const char *name) {
" stream i.e. /dev/stdout\n";
}
-typedef enum {MODE_COPY, MODE_SINGLE, MODE_MULTIPLE, MODE_UNION} FilterMode;
+typedef enum {MODE_COPY, MODE_SINGLE, MODE_MULTIPLE, MODE_UNION, MODE_UNSET} FilterMode;
typedef enum {FORMAT_ARPA, FORMAT_COUNT} Format;
struct Config {
@@ -162,19 +162,19 @@ int main(int argc, char *argv[]) {
return 1;
}
- // I used to have boost::program_options, but some users didn't want to compile boost.
+ // I used to have boost::program_options, but some users didn't want to compile boost.
lm::Config config;
- boost::optional<lm::FilterMode> mode;
+ config.mode = lm::MODE_UNSET;
for (int i = 1; i < argc - 2; ++i) {
const char *str = argv[i];
if (!std::strcmp(str, "copy")) {
- mode = lm::MODE_COPY;
+ config.mode = lm::MODE_COPY;
} else if (!std::strcmp(str, "single")) {
- mode = lm::MODE_SINGLE;
+ config.mode = lm::MODE_SINGLE;
} else if (!std::strcmp(str, "multiple")) {
- mode = lm::MODE_MULTIPLE;
+ config.mode = lm::MODE_MULTIPLE;
} else if (!std::strcmp(str, "union")) {
- mode = lm::MODE_UNION;
+ config.mode = lm::MODE_UNION;
} else if (!std::strcmp(str, "phrase")) {
config.phrase = true;
} else if (!std::strcmp(str, "context")) {
@@ -203,13 +203,12 @@ int main(int argc, char *argv[]) {
}
}
- if (!mode) {
+ if (config.mode == lm::MODE_UNSET) {
lm::DisplayHelp(argv[0]);
return 1;
}
- config.mode = *mode;
- if (config.phrase && config.mode != lm::MODE_UNION && mode != lm::MODE_MULTIPLE) {
+ if (config.phrase && config.mode != lm::MODE_UNION && config.mode != lm::MODE_MULTIPLE) {
std::cerr << "Phrase constraint currently only works in multiple or union mode. If you really need it for single, put everything on one line and use union." << std::endl;
return 1;
}
diff --git a/klm/lm/filter/phrase.hh b/klm/lm/filter/phrase.hh
index 07479dea..b4edff41 100644
--- a/klm/lm/filter/phrase.hh
+++ b/klm/lm/filter/phrase.hh
@@ -57,6 +57,7 @@ class Substrings {
LM_FILTER_PHRASE_METHOD(Right, right)
LM_FILTER_PHRASE_METHOD(Phrase, phrase)
+#pragma GCC diagnostic ignored "-Wuninitialized" // end != finish so there's always an initialization
// sentence_id must be non-decreasing. Iterators are over words in the phrase.
template <class Iterator> void AddPhrase(unsigned int sentence_id, const Iterator &begin, const Iterator &end) {
// Iterate over all substrings.
diff --git a/klm/lm/filter/vocab.hh b/klm/lm/filter/vocab.hh
index e2b6adff..7f0fadaa 100644
--- a/klm/lm/filter/vocab.hh
+++ b/klm/lm/filter/vocab.hh
@@ -5,6 +5,7 @@
#include "util/multi_intersection.hh"
#include "util/string_piece.hh"
+#include "util/string_piece_hash.hh"
#include "util/tokenize_piece.hh"
#include <boost/noncopyable.hpp>