diff options
author | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-06-29 17:21:28 +0000 |
---|---|---|
committer | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-06-29 17:21:28 +0000 |
commit | 7d3557c846bc51bbbaa04794690501dad6c3b27e (patch) | |
tree | 13d36d32eb6ee53dfec4fd733653555079439028 | |
parent | d6bc05e098a2db25fc1bb74fde4c27b093dcb230 (diff) |
isolate warning messages
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@56 ec762483-ff6d-05da-a07a-a48fb63a330f
-rw-r--r-- | decoder/Makefile.am | 1 | ||||
-rw-r--r-- | decoder/dict.cc | 11 | ||||
-rw-r--r-- | decoder/dict.h | 8 | ||||
-rw-r--r-- | extools/mr_stripe_rule_reduce.cc | 1 | ||||
-rw-r--r-- | extools/sentence_pair.cc | 6 | ||||
-rwxr-xr-x | gi/pipeline/local-gi-pipeline.pl | 2 | ||||
-rw-r--r-- | gi/pyp-topics/src/Makefile.am | 4 | ||||
-rw-r--r-- | gi/pyp-topics/src/contexts_corpus.hh | 4 |
8 files changed, 24 insertions, 13 deletions
diff --git a/decoder/Makefile.am b/decoder/Makefile.am index 44d6adc8..fd4589e4 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -47,6 +47,7 @@ libcdec_a_SOURCES = \ rule_lexer.cc \ fst_translator.cc \ csplit.cc \ + dict.cc \ translator.cc \ scfg_translator.cc \ hg.cc \ diff --git a/decoder/dict.cc b/decoder/dict.cc new file mode 100644 index 00000000..485fa348 --- /dev/null +++ b/decoder/dict.cc @@ -0,0 +1,11 @@ +#include "dict.h" + +#include <string> +#include <vector> +#include <boost/regex.hpp> +#include <boost/algorithm/string/regex.hpp> + +void Dict::AsVector(const WordID& id, std::vector<std::string>* results) const { + boost::algorithm::split_regex(*results, Convert(id), boost::regex("\\|\\|\\|")); +} + diff --git a/decoder/dict.h b/decoder/dict.h index 39baf6ed..1c8ebb67 100644 --- a/decoder/dict.h +++ b/decoder/dict.h @@ -8,8 +8,6 @@ #include <vector> #include <boost/functional/hash.hpp> -#include <boost/regex.hpp> -#include <boost/algorithm/string/regex.hpp> #include "wordid.h" @@ -51,11 +49,7 @@ class Dict { return words_[id-1]; } - inline std::vector<std::string> AsVector(const WordID& id) const { - std::vector<std::string> result; - boost::algorithm::split_regex(result, Convert(id), boost::regex("\\|\\|\\|")); - return result; - } + void AsVector(const WordID& id, std::vector<std::string>* results) const; void clear() { words_.clear(); d_.clear(); } diff --git a/extools/mr_stripe_rule_reduce.cc b/extools/mr_stripe_rule_reduce.cc index eaf1b6d7..902b6a07 100644 --- a/extools/mr_stripe_rule_reduce.cc +++ b/extools/mr_stripe_rule_reduce.cc @@ -73,6 +73,7 @@ int ReadPhraseUntilDividerOrEnd(const char* buf, const int sstart, const int end if (w == kDIV) return ptr; p->push_back(w); } + assert(p->size() > 0); return ptr; } diff --git a/extools/sentence_pair.cc b/extools/sentence_pair.cc index 91286059..5706398f 100644 --- a/extools/sentence_pair.cc +++ b/extools/sentence_pair.cc @@ -84,8 +84,10 @@ int AnnotatedParallelSentence::ReadAlignmentPoint(const char* buf, void AnnotatedParallelSentence::ParseAlignmentPoint(const char* buf, int start, int end) { short a, b; ReadAlignmentPoint(buf, start, end, false, &a, &b); - assert(a < f_len); - assert(b < e_len); + if (a >= f_len || b >= e_len) { + cerr << "(" << a << ',' << b << ") is out of bounds. INPUT=\n" << buf << endl; + exit(1); + } aligned(a,b) = true; ++f_aligned[a]; ++e_aligned[b]; diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl index be91f9ad..27d2047c 100755 --- a/gi/pipeline/local-gi-pipeline.pl +++ b/gi/pipeline/local-gi-pipeline.pl @@ -8,7 +8,7 @@ my $GZIP = 'gzip'; my $ZCAT = 'gunzip -c'; my $BASE_PHRASE_MAX_SIZE = 10; my $COMPLETE_CACHE = 1; -my $ITEMS_IN_MEMORY = 3000000; # cache size in extractors +my $ITEMS_IN_MEMORY = 10000000; # cache size in extractors my $NUM_TOPICS = 50; my $NUM_SAMPLES = 100; my $CONTEXT_SIZE = 1; diff --git a/gi/pyp-topics/src/Makefile.am b/gi/pyp-topics/src/Makefile.am index 7ca269a5..e4c4c1b9 100644 --- a/gi/pyp-topics/src/Makefile.am +++ b/gi/pyp-topics/src/Makefile.am @@ -4,10 +4,10 @@ contexts_lexer.cc: contexts_lexer.l $(LEX) -s -CF -8 -o$@ $< pyp_topics_train_SOURCES = corpus.cc gammadist.c gzstream.cc mt19937ar.c pyp-topics.cc train.cc contexts_lexer.cc contexts_corpus.cc -pyp_topics_train_LDADD = -lz +pyp_topics_train_LDADD = $(top_srcdir)/decoder/libcdec.a -lz pyp_contexts_train_SOURCES = corpus.cc gammadist.c gzstream.cc mt19937ar.c pyp-topics.cc contexts_lexer.cc contexts_corpus.cc train-contexts.cc -pyp_contexts_train_LDADD = -lz +pyp_contexts_train_LDADD = $(top_srcdir)/decoder/libcdec.a -lz AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops diff --git a/gi/pyp-topics/src/contexts_corpus.hh b/gi/pyp-topics/src/contexts_corpus.hh index 9614e7e3..a55e52f2 100644 --- a/gi/pyp-topics/src/contexts_corpus.hh +++ b/gi/pyp-topics/src/contexts_corpus.hh @@ -60,7 +60,9 @@ public: } std::vector<std::string> context2string(const WordID& id) const { - return m_dict.AsVector(id); + std::vector<std::string> res; + m_dict.AsVector(id, &res); + return res; } const std::string& key(const int& i) const { |