diff options
| -rwxr-xr-x | decoder/sentences.h | 5 | ||||
| -rw-r--r-- | decoder/stringlib.h | 30 | ||||
| -rwxr-xr-x | decoder/stringlib_test.cc | 17 | ||||
| -rwxr-xr-x | decoder/value_array.h | 2 | 
4 files changed, 49 insertions, 5 deletions
diff --git a/decoder/sentences.h b/decoder/sentences.h index 622a6f43..6ab216bf 100755 --- a/decoder/sentences.h +++ b/decoder/sentences.h @@ -15,9 +15,12 @@ inline std::ostream & operator<<(std::ostream &out,Sentence const& s) {  inline void StringToSentence(std::string const& str,Sentence &s) {    using namespace std; -  vector<string> ss=SplitOnWhitespace(str);    s.clear(); +  TD::ConvertSentence(str,&s); +/*  vector<string> ss=SplitOnWhitespace(str);    transform(ss.begin(),ss.end(),back_inserter(s),ToTD()); +*/ +  }  inline Sentence StringToSentence(std::string const& str) { diff --git a/decoder/stringlib.h b/decoder/stringlib.h index a7c6c3c4..a0e03624 100644 --- a/decoder/stringlib.h +++ b/decoder/stringlib.h @@ -1,6 +1,13 @@  #ifndef CDEC_STRINGLIB_H_  #define CDEC_STRINGLIB_H_ +#ifdef STRINGLIB_DEBUG +#include <iostream> +#define SLIBDBG(x) do { std::cerr<<"DBG(stringlib): "<<x<<std::endl; } while(0) +#else +#define SLIBDBG(x) +#endif +  #include <map>  #include <vector>  #include <cctype> @@ -112,20 +119,26 @@ inline bool IsWordSep(char c) {  template <class F>  // *end must be 0 (i.e. [p,end] is valid storage, which will be written to with 0 to separate c string tokens  void VisitTokens(char *p,char *const end,F f) { +  SLIBDBG("VisitTokens. p="<<p<<" Nleft="<<end-p);    if (p==end) return;    char *last; // 0 terminated already.  this is ok to mutilate because s is a copy of the string passed in.  well, barring copy on write i guess.    while(IsWordSep(*p)) { ++p;if (p==end) return; } // skip init whitespace    last=p; // first non-ws char    for(;;) { -    ++p; -    // now last is a non-ws char, and p is one past it. +    SLIBDBG("Start of word. last="<<last<<" *p="<<*p<<" Nleft="<<end-p); +    // last==p, pointing at first non-ws char not yet translated into f(word) call      for(;;) {// p to end of word -      if (p==end) { f(last); return; } -      if (!IsWordSep(*p)) break;        ++p; +      if (p==end) { +        f(last); +        SLIBDBG("Returning. word="<<last<<" *p="<<*p<<" Nleft="<<end-p); +        return; +      } +      if (IsWordSep(*p)) break;      }      *p=0;      f(last); +    SLIBDBG("End of word. word="<<last<<" rest="<<p+1<<" Nleft="<<end-p);      for(;;) { // again skip extra whitespace        ++p;        if (p==end) return; @@ -136,14 +149,23 @@ void VisitTokens(char *p,char *const end,F f) {  }  template <class F> +void VisitTokens(char *p,F f) { +  VisitTokens(p,p+std::strlen(p),f); +} + + +template <class F>  void VisitTokens(std::string const& s,F f) { +  if (0) {    std::vector<std::string> ss=SplitOnWhitespace(s);    for (int i=0;i<ss.size();++i)      f(ss[i]);    return; +  }    //FIXME:    if (s.empty()) return;    mutable_c_str mp(s); +  SLIBDBG("mp="<<mp.p);    VisitTokens(mp.p,mp.p+s.size(),f);  } diff --git a/decoder/stringlib_test.cc b/decoder/stringlib_test.cc new file mode 100755 index 00000000..ea39f30f --- /dev/null +++ b/decoder/stringlib_test.cc @@ -0,0 +1,17 @@ +#define STRINGLIB_DEBUG +#include "stringlib.h" + +using namespace std; +struct print { +  template <class S> +  void operator()(S const& s) const { +    cout<<s<<endl; +  } +}; + +int main(int argc, char *argv[]) { +  char *p=" 1 are u 2 serious?"; +  std::string const& w="verylongword"; +  VisitTokens(p,print()); +  VisitTokens(w,print()); +} diff --git a/decoder/value_array.h b/decoder/value_array.h index 7401938a..042247a1 100755 --- a/decoder/value_array.h +++ b/decoder/value_array.h @@ -17,6 +17,8 @@ template <class T, class A = std::allocator<T> >  class ValueArray : A // private inheritance so stateless allocator adds no size.  {  public: +  const int SV_MAX=sizeof(T)/sizeof(T*)>1?sizeof(T)/sizeof(T*):1; +  //space optimization: SV_MAX T will fit inside what would otherwise be a pointer to heap data.  todo in the far future if bored.    typedef T value_type;    typedef T& reference;    typedef T const& const_reference;  | 
