summaryrefslogtreecommitdiff
path: root/decoder/tdict.cc
diff options
context:
space:
mode:
authorgraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-21 20:52:35 +0000
committergraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-21 20:52:35 +0000
commitc946ad175601eda5a8cb3e6cd0e7c973d3656012 (patch)
tree2766abaeb876e0cb6a9bad4308a11349a072c084 /decoder/tdict.cc
parentcb094b00983dabc0393d1fab40b3450266c7c8a9 (diff)
tdict TD:: ss se unk and reserved(i)
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@362 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/tdict.cc')
-rw-r--r--decoder/tdict.cc59
1 files changed, 55 insertions, 4 deletions
diff --git a/decoder/tdict.cc b/decoder/tdict.cc
index 43bc4cbd..04b82c51 100644
--- a/decoder/tdict.cc
+++ b/decoder/tdict.cc
@@ -8,11 +8,51 @@
using namespace std;
//FIXME: valgrind errors (static init order?)
-Vocab TD::dict_;
+Vocab TD::dict_(0,TD::max_wordid);
+WordID TD::ss=dict_.ssIndex();
+WordID TD::se=dict_.seIndex();
+WordID TD::unk=dict_.unkIndex();
+char const*const TD::ss_str=Vocab_SentStart;
+char const*const TD::se_str=Vocab_SentEnd;
+char const*const TD::unk_str=Vocab_Unknown;
+
+// pre+(i-base)+">" for i in [base,e)
+inline void pad(std::string const& pre,int base,int e) {
+ assert(base<=e);
+ ostringstream o;
+ for (int i=base;i<e;++i) {
+ o.str(pre);
+ o<<(i-base)<<'>';
+ WordID id=TD::Convert(o.str());
+ assert(id==i);
+ }
+}
+
+
+namespace {
+struct TD_init {
+ TD_init() {
+ assert(TD::Convert(TD::ss_str)==TD::ss);
+ assert(TD::Convert(TD::se_str)==TD::se);
+ assert(TD::Convert(TD::unk_str)==TD::unk);
+ assert(TD::none==Vocab_None);
+ pad("<FILLER",TD::end(),TD::reserved_begin);
+ assert(TD::end()==TD::reserved_begin);
+ int reserved_end=TD::begin();
+ pad("<RESERVED",TD::end(),reserved_end);
+ assert(TD::end()==reserved_end);
+ }
+};
+
+TD_init td_init;
+}
unsigned int TD::NumWords() {
return dict_.numWords();
}
+WordID TD::end() {
+ return dict_.highIndex();
+}
WordID TD::Convert(const std::string& s) {
return dict_.addWord((VocabString)s.c_str());
@@ -26,9 +66,6 @@ const char* TD::Convert(const WordID& w) {
return dict_.getWord((VocabIndex)w);
}
-static const string empty;
-static const string space = " ";
-
void TD::GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID>* ids) {
ids->clear();
@@ -45,6 +82,20 @@ std::string TD::GetString(const std::vector<WordID>& str) {
return o.str();
}
+int TD::AppendString(const WordID& w, int pos, int bufsize, char* buffer)
+{
+ const char* word = TD::Convert(w);
+ const char* const end_buf = buffer + bufsize;
+ char* dest = buffer + pos;
+ while(dest < end_buf && *word) {
+ *dest = *word;
+ ++dest;
+ ++word;
+ }
+ return (dest - buffer);
+}
+
+
namespace {
struct add_wordids {
typedef std::vector<WordID> Ws;