From f6cd72d39cdc7a034b36ad73d613f6f268826e48 Mon Sep 17 00:00:00 2001 From: graehl Date: Tue, 20 Jul 2010 23:57:46 +0000 Subject: fabulous string tokenization tested git-svn-id: https://ws10smt.googlecode.com/svn/trunk@350 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/sentences.h | 5 ++++- decoder/stringlib.h | 30 ++++++++++++++++++++++++++---- decoder/stringlib_test.cc | 17 +++++++++++++++++ decoder/value_array.h | 2 ++ 4 files changed, 49 insertions(+), 5 deletions(-) create mode 100755 decoder/stringlib_test.cc diff --git a/decoder/sentences.h b/decoder/sentences.h index 622a6f43..6ab216bf 100755 --- a/decoder/sentences.h +++ b/decoder/sentences.h @@ -15,9 +15,12 @@ inline std::ostream & operator<<(std::ostream &out,Sentence const& s) { inline void StringToSentence(std::string const& str,Sentence &s) { using namespace std; - vector ss=SplitOnWhitespace(str); s.clear(); + TD::ConvertSentence(str,&s); +/* vector ss=SplitOnWhitespace(str); transform(ss.begin(),ss.end(),back_inserter(s),ToTD()); +*/ + } inline Sentence StringToSentence(std::string const& str) { diff --git a/decoder/stringlib.h b/decoder/stringlib.h index a7c6c3c4..a0e03624 100644 --- a/decoder/stringlib.h +++ b/decoder/stringlib.h @@ -1,6 +1,13 @@ #ifndef CDEC_STRINGLIB_H_ #define CDEC_STRINGLIB_H_ +#ifdef STRINGLIB_DEBUG +#include +#define SLIBDBG(x) do { std::cerr<<"DBG(stringlib): "< #include #include @@ -112,20 +119,26 @@ inline bool IsWordSep(char c) { template // *end must be 0 (i.e. [p,end] is valid storage, which will be written to with 0 to separate c string tokens void VisitTokens(char *p,char *const end,F f) { + SLIBDBG("VisitTokens. p="< + void operator()(S const& s) const { + cout< > class ValueArray : A // private inheritance so stateless allocator adds no size. { public: + const int SV_MAX=sizeof(T)/sizeof(T*)>1?sizeof(T)/sizeof(T*):1; + //space optimization: SV_MAX T will fit inside what would otherwise be a pointer to heap data. todo in the far future if bored. typedef T value_type; typedef T& reference; typedef T const& const_reference; -- cgit v1.2.3