summaryrefslogtreecommitdiff
path: root/decoder/tdict.cc
diff options
context:
space:
mode:
authorgraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-20 22:44:40 +0000
committergraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-20 22:44:40 +0000
commit0720de0bee526e8e9b311bb91d0a3a1efa8c1438 (patch)
tree10f4f50fbaff6c8ecec0c68a72a31c3aa0d3d2a9 /decoder/tdict.cc
parentac98d8465d9b7a6faf2b51bcd18260375842f6c8 (diff)
beautiful (?) low-level tokenization to fix valgrind problem with previous TD string->Sentence
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@345 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/tdict.cc')
-rw-r--r--decoder/tdict.cc44
1 files changed, 26 insertions, 18 deletions
diff --git a/decoder/tdict.cc b/decoder/tdict.cc
index 93f7b0eb..d7fc7eb7 100644
--- a/decoder/tdict.cc
+++ b/decoder/tdict.cc
@@ -1,7 +1,9 @@
+#include <sstream>
#include "Ngram.h"
#include "dict.h"
#include "tdict.h"
#include "Vocab.h"
+#include "stringlib.h"
using namespace std;
@@ -16,6 +18,10 @@ WordID TD::Convert(const std::string& s) {
return dict_.addWord((VocabString)s.c_str());
}
+WordID TD::Convert(char const* s) {
+ return dict_.addWord((VocabString)s);
+}
+
const char* TD::Convert(const WordID& w) {
return dict_.getWord((VocabIndex)w);
}
@@ -31,25 +37,27 @@ void TD::GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID>
}
std::string TD::GetString(const std::vector<WordID>& str) {
- string res;
- for (vector<WordID>::const_iterator i = str.begin(); i != str.end(); ++i)
- res += (i == str.begin() ? empty : space) + TD::Convert(*i);
- return res;
+ ostringstream o;
+ for (int i=0;i<str.size();++i) {
+ if (i) o << ' ';
+ o << TD::Convert(str[i]);
+ }
+ return o.str();
}
-void TD::ConvertSentence(const std::string& sent, std::vector<WordID>* ids) {
- string s = sent;
- int last = 0;
- ids->clear();
- for (int i=0; i < s.size(); ++i)
- if (s[i] == 32 || s[i] == '\t') {
- s[i]=0;
- if (last != i) {
- ids->push_back(Convert(&s[last]));
- }
- last = i + 1;
- }
- if (last != s.size())
- ids->push_back(Convert(&s[last]));
+namespace {
+struct add_wordids {
+ typedef std::vector<WordID> Ws;
+ Ws *ids;
+ explicit add_wordids(Ws *i) : ids(i) { }
+ void operator()(char const* s) {
+ ids->push_back(TD::Convert(s));
+ }
+};
+
}
+void TD::ConvertSentence(std::string const& s, std::vector<WordID>* ids) {
+ ids->clear();
+ VisitTokens(s,add_wordids(ids));
+}