From 80686d4e567bae579ea39e009826a2de92cd4ace Mon Sep 17 00:00:00 2001 From: redpony Date: Wed, 11 Aug 2010 02:37:10 +0000 Subject: major refactor, break bad circular deps git-svn-id: https://ws10smt.googlecode.com/svn/trunk@509 ec762483-ff6d-05da-a07a-a48fb63a330f --- utils/tdict.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 utils/tdict.h (limited to 'utils/tdict.h') diff --git a/utils/tdict.h b/utils/tdict.h new file mode 100644 index 00000000..a7b3ee1c --- /dev/null +++ b/utils/tdict.h @@ -0,0 +1,50 @@ +#ifndef _TDICT_H_ +#define _TDICT_H_ + +#include +#include +#include "wordid.h" +#include + +class Vocab; + +struct TD { + /* // disabled for now + static const int reserved_begin=10; // allow room for SRI special tokens e.g. unk ss se pause. tokens until this get "" + static const int n_reserved=10; // 0...n_reserved-1 get token '' + static inline WordID reserved(int i) { + assert(i>=0 && i"; + static char const* const se_str; //=""; + static char const* const unk_str; //=""; + static WordID ss,se,unk; // x=Convert(x_str) + static WordID end(); // next id to be assigned; [begin,end) give the non-reserved tokens seen so far + static Vocab dict_; + static void ConvertSentence(std::string const& sent, std::vector* ids); + static void GetWordIDs(const std::vector& strings, std::vector* ids); + static std::string GetString(const std::vector& str); + static std::string GetString(WordID const* i,WordID const* e); + static int AppendString(const WordID& w, int pos, int bufsize, char* buffer); + static unsigned int NumWords(); + static WordID Convert(const std::string& s); + static WordID Convert(char const* s); + static const char* Convert(WordID w); +}; + +struct ToTD { + typedef WordID result_type; + result_type operator()(std::string const& t) const { + return TD::Convert(t); + } +}; + + +#endif -- cgit v1.2.3