diff options
Diffstat (limited to 'utils/tdict.h')
-rw-r--r-- | utils/tdict.h | 23 |
1 files changed, 3 insertions, 20 deletions
diff --git a/utils/tdict.h b/utils/tdict.h index dd7f0237..393146fa 100644 --- a/utils/tdict.h +++ b/utils/tdict.h @@ -6,29 +6,10 @@ #include "wordid.h" #include <assert.h> -class Vocab; +class Dict; struct TD { - /* // disabled for now - static const int reserved_begin=10; // allow room for SRI special tokens e.g. unk ss se pause. tokens until this get "<FILLERi>" - static const int n_reserved=10; // 0...n_reserved-1 get token '<RESERVEDi>' - static inline WordID reserved(int i) { - assert(i>=0 && i<n_reserved); - return (WordID)(reserved_begin+i); - } - static inline WordID begin() { - return reserved(n_reserved); - } - */ - static const WordID max_wordid=0x7fffffff; - static const WordID null=max_wordid-1; - static const WordID none=(WordID)-1; // Vocab_None - this will collide with mixed node/variable id / word space, though. max_wordid will be distinct (still positive) - static char const* const ss_str; //="<s>"; - static char const* const se_str; //="</s>"; - static char const* const unk_str; //="<unk>"; - static WordID ss,se,unk; // x=Convert(x_str) static WordID end(); // next id to be assigned; [begin,end) give the non-reserved tokens seen so far - static Vocab dict_; static void ConvertSentence(std::string const& sent, std::vector<WordID>* ids); static void GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID>* ids); static std::string GetString(const std::vector<WordID>& str); @@ -38,6 +19,8 @@ struct TD { static WordID Convert(const std::string& s); static WordID Convert(char const* s); static const char* Convert(WordID w); + private: + static Dict dict_; }; struct ToTD { |