isolate warning messages

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@56 ec762483-ff6d-05da-a07a-a48fb63a330f
author: redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-06-29 17:21:28 +0000
committer: redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-06-29 17:21:28 +0000
commit: 1a1f3094010bd85acbfefe64743ebde04f0b41a4 (patch)
tree: 18a740aac72eee3eef70d3b3788b8bca121424ad
parent: 321b5cfe9ce2a84e62cdd185b55cbac5c133b549 (diff)
8 files changed, 24 insertions, 13 deletions
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index 44d6adc8..fd4589e4 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -47,6 +47,7 @@ libcdec_a_SOURCES = \
   rule_lexer.cc \
   fst_translator.cc \
   csplit.cc \
+  dict.cc \
   translator.cc \
   scfg_translator.cc \
   hg.cc \
diff --git a/decoder/dict.cc b/decoder/dict.cc
new file mode 100644
index 00000000..485fa348
--- /dev/null
+++ b/decoder/dict.cc
@@ -0,0 +1,11 @@
+#include "dict.h"
+
+#include <string>
+#include <vector>
+#include <boost/regex.hpp>
+#include <boost/algorithm/string/regex.hpp>
+
+void Dict::AsVector(const WordID& id, std::vector<std::string>* results) const {
+  boost::algorithm::split_regex(*results, Convert(id), boost::regex("\\|\\|\\|"));
+}
+
diff --git a/decoder/dict.h b/decoder/dict.h
index 39baf6ed..1c8ebb67 100644
--- a/decoder/dict.h
+++ b/decoder/dict.h
@@ -8,8 +8,6 @@
 #include <vector>
 
 #include <boost/functional/hash.hpp>
-#include <boost/regex.hpp>
-#include <boost/algorithm/string/regex.hpp>
 
 #include "wordid.h"
 
@@ -51,11 +49,7 @@ class Dict {
     return words_[id-1];
   }
 
-  inline std::vector<std::string> AsVector(const WordID& id) const {
-    std::vector<std::string> result;
-    boost::algorithm::split_regex(result, Convert(id), boost::regex("\\|\\|\\|"));
-    return result;
-  }
+  void AsVector(const WordID& id, std::vector<std::string>* results) const;
 
   void clear() { words_.clear(); d_.clear(); }
 
diff --git a/extools/mr_stripe_rule_reduce.cc b/extools/mr_stripe_rule_reduce.cc
index eaf1b6d7..902b6a07 100644
--- a/extools/mr_stripe_rule_reduce.cc
+++ b/extools/mr_stripe_rule_reduce.cc
@@ -73,6 +73,7 @@ int ReadPhraseUntilDividerOrEnd(const char* buf, const int sstart, const int end
     if (w == kDIV) return ptr;
     p->push_back(w);
   }
+  assert(p->size() > 0);
   return ptr;
 }
 
diff --git a/extools/sentence_pair.cc b/extools/sentence_pair.cc
index 91286059..5706398f 100644
--- a/extools/sentence_pair.cc
+++ b/extools/sentence_pair.cc
@@ -84,8 +84,10 @@ int AnnotatedParallelSentence::ReadAlignmentPoint(const char* buf,
 void AnnotatedParallelSentence::ParseAlignmentPoint(const char* buf, int start, int end) {
   short a, b;
   ReadAlignmentPoint(buf, start, end, false, &a, &b);
-  assert(a < f_len);
-  assert(b < e_len);
+  if (a >= f_len || b >= e_len) {
+    cerr << "(" << a << ',' << b << ") is out of bounds. INPUT=\n" << buf << endl;
+    exit(1);
+  }
   aligned(a,b) = true;
   ++f_aligned[a];
   ++e_aligned[b];
diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl
index be91f9ad..27d2047c 100755
--- a/gi/pipeline/local-gi-pipeline.pl
+++ b/gi/pipeline/local-gi-pipeline.pl
@@ -8,7 +8,7 @@ my $GZIP = 'gzip';
 my $ZCAT = 'gunzip -c';
 my $BASE_PHRASE_MAX_SIZE = 10;
 my $COMPLETE_CACHE = 1;
-my $ITEMS_IN_MEMORY = 3000000;  # cache size in extractors
+my $ITEMS_IN_MEMORY = 10000000;  # cache size in extractors
 my $NUM_TOPICS = 50;
 my $NUM_SAMPLES = 100;
 my $CONTEXT_SIZE = 1;
diff --git a/gi/pyp-topics/src/Makefile.am b/gi/pyp-topics/src/Makefile.am
index 7ca269a5..e4c4c1b9 100644
--- a/gi/pyp-topics/src/Makefile.am
+++ b/gi/pyp-topics/src/Makefile.am
@@ -4,10 +4,10 @@ contexts_lexer.cc: contexts_lexer.l
 	$(LEX) -s -CF -8 -o$@ $<
 
 pyp_topics_train_SOURCES = corpus.cc gammadist.c gzstream.cc mt19937ar.c pyp-topics.cc train.cc contexts_lexer.cc contexts_corpus.cc
-pyp_topics_train_LDADD = -lz
+pyp_topics_train_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
 
 pyp_contexts_train_SOURCES = corpus.cc gammadist.c gzstream.cc mt19937ar.c pyp-topics.cc contexts_lexer.cc contexts_corpus.cc train-contexts.cc
-pyp_contexts_train_LDADD = -lz
+pyp_contexts_train_LDADD = $(top_srcdir)/decoder/libcdec.a -lz
 
 AM_CPPFLAGS = -W -Wall -Wno-sign-compare -funroll-loops
 
diff --git a/gi/pyp-topics/src/contexts_corpus.hh b/gi/pyp-topics/src/contexts_corpus.hh
index 9614e7e3..a55e52f2 100644
--- a/gi/pyp-topics/src/contexts_corpus.hh
+++ b/gi/pyp-topics/src/contexts_corpus.hh
@@ -60,7 +60,9 @@ public:
     }
 
     std::vector<std::string> context2string(const WordID& id) const {
-      return m_dict.AsVector(id);
+      std::vector<std::string> res;
+      m_dict.AsVector(id, &res);
+      return res;
     }
 
     const std::string& key(const int& i) const {
author	redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-06-29 17:21:28 +0000
committer	redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-06-29 17:21:28 +0000
commit	1a1f3094010bd85acbfefe64743ebde04f0b41a4 (patch)
tree	18a740aac72eee3eef70d3b3788b8bca121424ad
parent	321b5cfe9ce2a84e62cdd185b55cbac5c133b549 (diff)