summaryrefslogtreecommitdiff
path: root/utils/tdict.h
diff options
context:
space:
mode:
Diffstat (limited to 'utils/tdict.h')
-rw-r--r--utils/tdict.h23
1 files changed, 3 insertions, 20 deletions
diff --git a/utils/tdict.h b/utils/tdict.h
index dd7f0237..393146fa 100644
--- a/utils/tdict.h
+++ b/utils/tdict.h
@@ -6,29 +6,10 @@
#include "wordid.h"
#include <assert.h>
-class Vocab;
+class Dict;
struct TD {
- /* // disabled for now
- static const int reserved_begin=10; // allow room for SRI special tokens e.g. unk ss se pause. tokens until this get "<FILLERi>"
- static const int n_reserved=10; // 0...n_reserved-1 get token '<RESERVEDi>'
- static inline WordID reserved(int i) {
- assert(i>=0 && i<n_reserved);
- return (WordID)(reserved_begin+i);
- }
- static inline WordID begin() {
- return reserved(n_reserved);
- }
- */
- static const WordID max_wordid=0x7fffffff;
- static const WordID null=max_wordid-1;
- static const WordID none=(WordID)-1; // Vocab_None - this will collide with mixed node/variable id / word space, though. max_wordid will be distinct (still positive)
- static char const* const ss_str; //="<s>";
- static char const* const se_str; //="</s>";
- static char const* const unk_str; //="<unk>";
- static WordID ss,se,unk; // x=Convert(x_str)
static WordID end(); // next id to be assigned; [begin,end) give the non-reserved tokens seen so far
- static Vocab dict_;
static void ConvertSentence(std::string const& sent, std::vector<WordID>* ids);
static void GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID>* ids);
static std::string GetString(const std::vector<WordID>& str);
@@ -38,6 +19,8 @@ struct TD {
static WordID Convert(const std::string& s);
static WordID Convert(char const* s);
static const char* Convert(WordID w);
+ private:
+ static Dict dict_;
};
struct ToTD {