summaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
Diffstat (limited to 'utils')
-rw-r--r--utils/stringlib.h15
-rw-r--r--utils/tdict.cc4
-rw-r--r--utils/tdict.h2
3 files changed, 18 insertions, 3 deletions
diff --git a/utils/stringlib.h b/utils/stringlib.h
index f60b7867..2fdbfff8 100644
--- a/utils/stringlib.h
+++ b/utils/stringlib.h
@@ -242,6 +242,21 @@ void VisitTokens(std::string const& s,F f) {
VisitTokens(mp.p,mp.p+s.size(),f);
}
+template <class F>
+void VisitTokens(std::string const& s,F f, unsigned start) {
+ if (0) {
+ std::vector<std::string> ss=SplitOnWhitespace(s);
+ for (unsigned i=0;i<ss.size();++i)
+ f(ss[i]);
+ return;
+ }
+ //FIXME:
+ if (s.empty()) return;
+ mutable_c_str mp(s);
+ SLIBDBG("mp="<<mp.p);
+ VisitTokens(mp.p+start,mp.p+s.size(),f);
+}
+
inline void SplitCommandAndParam(const std::string& in, std::string* cmd, std::string* param) {
cmd->clear();
param->clear();
diff --git a/utils/tdict.cc b/utils/tdict.cc
index fd2b76cb..c99f1697 100644
--- a/utils/tdict.cc
+++ b/utils/tdict.cc
@@ -70,7 +70,7 @@ struct add_wordids {
}
-void TD::ConvertSentence(std::string const& s, std::vector<WordID>* ids) {
+void TD::ConvertSentence(std::string const& s, std::vector<WordID>* ids, unsigned start) {
ids->clear();
- VisitTokens(s,add_wordids(ids));
+ VisitTokens(s,add_wordids(ids),start);
}
diff --git a/utils/tdict.h b/utils/tdict.h
index 03afc2e6..bb19ecd5 100644
--- a/utils/tdict.h
+++ b/utils/tdict.h
@@ -9,7 +9,7 @@
struct TD {
static WordID end(); // next id to be assigned; [begin,end) give the non-reserved tokens seen so far
- static void ConvertSentence(std::string const& sent, std::vector<WordID>* ids);
+ static void ConvertSentence(std::string const& sent, std::vector<WordID>* ids, unsigned start=0);
static void GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID>* ids);
static std::string GetString(const std::vector<WordID>& str);
static std::string GetString(WordID const* i,WordID const* e);