From 7d3557c846bc51bbbaa04794690501dad6c3b27e Mon Sep 17 00:00:00 2001 From: redpony Date: Tue, 29 Jun 2010 17:21:28 +0000 Subject: isolate warning messages git-svn-id: https://ws10smt.googlecode.com/svn/trunk@56 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/Makefile.am | 1 + decoder/dict.cc | 11 +++++++++++ decoder/dict.h | 8 +------- extools/mr_stripe_rule_reduce.cc | 1 + extools/sentence_pair.cc | 6 ++++-- gi/pipeline/local-gi-pipeline.pl | 2 +- gi/pyp-topics/src/Makefile.am | 4 ++-- gi/pyp-topics/src/contexts_corpus.hh | 4 +++- 8 files changed, 24 insertions(+), 13 deletions(-) create mode 100644 decoder/dict.cc diff --git a/decoder/Makefile.am b/decoder/Makefile.am index 44d6adc8..fd4589e4 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -47,6 +47,7 @@ libcdec_a_SOURCES = \ rule_lexer.cc \ fst_translator.cc \ csplit.cc \ + dict.cc \ translator.cc \ scfg_translator.cc \ hg.cc \ diff --git a/decoder/dict.cc b/decoder/dict.cc new file mode 100644 index 00000000..485fa348 --- /dev/null +++ b/decoder/dict.cc @@ -0,0 +1,11 @@ +#include "dict.h" + +#include +#include +#include +#include + +void Dict::AsVector(const WordID& id, std::vector* results) const { + boost::algorithm::split_regex(*results, Convert(id), boost::regex("\\|\\|\\|")); +} + diff --git a/decoder/dict.h b/decoder/dict.h index 39baf6ed..1c8ebb67 100644 --- a/decoder/dict.h +++ b/decoder/dict.h @@ -8,8 +8,6 @@ #include #include -#include -#include #include "wordid.h" @@ -51,11 +49,7 @@ class Dict { return words_[id-1]; } - inline std::vector AsVector(const WordID& id) const { - std::vector result; - boost::algorithm::split_regex(result, Convert(id), boost::regex("\\|\\|\\|")); - return result; - } + void AsVector(const WordID& id, std::vector* results) const; void clear() { words_.clear(); d_.clear(); } diff --git a/extools/mr_stripe_rule_reduce.cc b/extools/mr_stripe_rule_reduce.cc index eaf1b6d7..902b6a07 100644 --- a/extools/mr_stripe_rule_reduce.cc +++ b/extools/mr_stripe_rule_reduce.cc @@ -73,6 +73,7 @@ int ReadPhraseUntilDividerOrEnd(const char* buf, const int sstart, const int end if (w == kDIV) return ptr; p->push_back(w); } + assert(p->size() > 0); return ptr; } diff --git a/extools/sentence_pair.cc b/extools/sentence_pair.cc index 91286059..5706398f 100644 --- a/extools/sentence_pair.cc +++ b/extools/sentence_pair.cc @@ -84,8 +84,10 @@ int AnnotatedParallelSentence::ReadAlignmentPoint(const char* buf, void AnnotatedParallelSentence::ParseAlignmentPoint(const char* buf, int start, int end) { short a, b; ReadAlignmentPoint(buf, start, end, false, &a, &b); - assert(a < f_len); - assert(b < e_len); + if (a >= f_len || b >= e_len) { + cerr << "(" << a << ',' << b << ") is out of bounds. INPUT=\n" << buf << endl; + exit(1); + } aligned(a,b) = true; ++f_aligned[a]; ++e_aligned[b]; diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl index be91f9ad..27d2047c 100755 --- a/gi/pipeline/local-gi-pipeline.pl +++ b/gi/pipeline/local-gi-pipeline.pl @@ -8,7 +8,7 @@ my $GZIP = 'gzip'; my $ZCAT = 'gunzip -c'; my $BASE_PHRASE_MAX_SIZE = 10; my $COMPLETE_CACHE = 1; -my $ITEMS_IN_MEMORY = 3000000; # cache size in extractors +my $ITEMS_IN_MEMORY = 10000000; # cache size in extractors my $NUM_TOPICS = 50; my $NUM_SAMPLES = 100; my $CONTEXT_SIZE = 1; diff --git a/gi/pyp-topics/src/Makefile.am b/gi/pyp-topics/src/Makefile.am index 7ca269a5..e4c4c1b9 100644 --- a/gi/pyp-topics/src/Makefile.am +++ b/gi/pyp-topics/src/Makefile.am @@ -4,10 +4,10 @@ contexts_lexer.cc: contexts_lexer.l $(LEX) -s -CF -8 -o$@ $< pyp_topics_train_SOURCES = corpus.cc gammadist.c gzstream.cc mt19937ar.c pyp-topics.cc train.cc contexts_lexer.cc contexts_corpus.cc -pyp_topics_train_LDADD = -lz +pyp_topics_train_LDADD = $(top_srcdir)/decoder/libcdec.a -lz pyp_contexts_train_SOURCES = corpus.cc gammadist.c gzstream.cc mt19937ar.c pyp-topics.cc contexts_lexer.cc contexts_corpus.cc train-contexts.cc -pyp_contexts_train_LDADD = -lz +pyp_contexts_train_LDADD = $(top_srcdir)/decoder/libcdec.a -lz AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops diff --git a/gi/pyp-topics/src/contexts_corpus.hh b/gi/pyp-topics/src/contexts_corpus.hh index 9614e7e3..a55e52f2 100644 --- a/gi/pyp-topics/src/contexts_corpus.hh +++ b/gi/pyp-topics/src/contexts_corpus.hh @@ -60,7 +60,9 @@ public: } std::vector context2string(const WordID& id) const { - return m_dict.AsVector(id); + std::vector res; + m_dict.AsVector(id, &res); + return res; } const std::string& key(const int& i) const { -- cgit v1.2.3