diff options
Diffstat (limited to 'utils')
-rw-r--r-- | utils/stringlib.h | 15 | ||||
-rw-r--r-- | utils/tdict.cc | 4 | ||||
-rw-r--r-- | utils/tdict.h | 2 |
3 files changed, 18 insertions, 3 deletions
diff --git a/utils/stringlib.h b/utils/stringlib.h index f60b7867..2fdbfff8 100644 --- a/utils/stringlib.h +++ b/utils/stringlib.h @@ -242,6 +242,21 @@ void VisitTokens(std::string const& s,F f) { VisitTokens(mp.p,mp.p+s.size(),f); } +template <class F> +void VisitTokens(std::string const& s,F f, unsigned start) { + if (0) { + std::vector<std::string> ss=SplitOnWhitespace(s); + for (unsigned i=0;i<ss.size();++i) + f(ss[i]); + return; + } + //FIXME: + if (s.empty()) return; + mutable_c_str mp(s); + SLIBDBG("mp="<<mp.p); + VisitTokens(mp.p+start,mp.p+s.size(),f); +} + inline void SplitCommandAndParam(const std::string& in, std::string* cmd, std::string* param) { cmd->clear(); param->clear(); diff --git a/utils/tdict.cc b/utils/tdict.cc index fd2b76cb..c99f1697 100644 --- a/utils/tdict.cc +++ b/utils/tdict.cc @@ -70,7 +70,7 @@ struct add_wordids { } -void TD::ConvertSentence(std::string const& s, std::vector<WordID>* ids) { +void TD::ConvertSentence(std::string const& s, std::vector<WordID>* ids, unsigned start) { ids->clear(); - VisitTokens(s,add_wordids(ids)); + VisitTokens(s,add_wordids(ids),start); } diff --git a/utils/tdict.h b/utils/tdict.h index 03afc2e6..bb19ecd5 100644 --- a/utils/tdict.h +++ b/utils/tdict.h @@ -9,7 +9,7 @@ struct TD { static WordID end(); // next id to be assigned; [begin,end) give the non-reserved tokens seen so far - static void ConvertSentence(std::string const& sent, std::vector<WordID>* ids); + static void ConvertSentence(std::string const& sent, std::vector<WordID>* ids, unsigned start=0); static void GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID>* ids); static std::string GetString(const std::vector<WordID>& str); static std::string GetString(WordID const* i,WordID const* e); |