diff options
author | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-20 23:03:54 +0000 |
---|---|---|
committer | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-20 23:03:54 +0000 |
commit | 42c1346c6bce064601beb81bb954ea5e30e9f43d (patch) | |
tree | 7656e4c6f38cf17775edb707252518effdacb562 /decoder | |
parent | 0720de0bee526e8e9b311bb91d0a3a1efa8c1438 (diff) |
tokenization works. oops.
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@346 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder')
-rw-r--r-- | decoder/stringlib.h | 5 | ||||
-rw-r--r-- | decoder/tdict.cc | 4 |
2 files changed, 9 insertions, 0 deletions
diff --git a/decoder/stringlib.h b/decoder/stringlib.h index a21ffd59..a7c6c3c4 100644 --- a/decoder/stringlib.h +++ b/decoder/stringlib.h @@ -137,6 +137,11 @@ void VisitTokens(char *p,char *const end,F f) { template <class F> void VisitTokens(std::string const& s,F f) { + std::vector<std::string> ss=SplitOnWhitespace(s); + for (int i=0;i<ss.size();++i) + f(ss[i]); + return; + //FIXME: if (s.empty()) return; mutable_c_str mp(s); VisitTokens(mp.p,mp.p+s.size(),f); diff --git a/decoder/tdict.cc b/decoder/tdict.cc index d7fc7eb7..43bc4cbd 100644 --- a/decoder/tdict.cc +++ b/decoder/tdict.cc @@ -50,9 +50,13 @@ struct add_wordids { typedef std::vector<WordID> Ws; Ws *ids; explicit add_wordids(Ws *i) : ids(i) { } + add_wordids(const add_wordids& o) : ids(o.ids) { } void operator()(char const* s) { ids->push_back(TD::Convert(s)); } + void operator()(std::string const& s) { + ids->push_back(TD::Convert(s)); + } }; } |