From 80686d4e567bae579ea39e009826a2de92cd4ace Mon Sep 17 00:00:00 2001 From: redpony Date: Wed, 11 Aug 2010 02:37:10 +0000 Subject: major refactor, break bad circular deps git-svn-id: https://ws10smt.googlecode.com/svn/trunk@509 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/tdict.cc | 154 ------------------------------------------------------- 1 file changed, 154 deletions(-) delete mode 100644 decoder/tdict.cc (limited to 'decoder/tdict.cc') diff --git a/decoder/tdict.cc b/decoder/tdict.cc deleted file mode 100644 index 1f68feae..00000000 --- a/decoder/tdict.cc +++ /dev/null @@ -1,154 +0,0 @@ -#define TD_ALLOW_UNDEFINED_WORDIDS 0 - -// if 1, word ids that are >= end() will give a numeric token name (single per-thread shared buffer), which of course won't be Convert-able back to the id, because it's not added to the dict. This is a convenience for logging fake token indices. Any tokens actually added to the dict may cause end() to overlap the range of fake ids you were using - that's up to you to prevent. - -#include -#include -#include -#include "Ngram.h" -#include "dict.h" -#include "tdict.h" -#include "Vocab.h" -#include "stringlib.h" -#include "threadlocal.h" - -using namespace std; - -Vocab TD::dict_(0,TD::max_wordid); -WordID TD::ss=dict_.ssIndex(); -WordID TD::se=dict_.seIndex(); -WordID TD::unk=dict_.unkIndex(); -char const*const TD::ss_str=Vocab_SentStart; -char const*const TD::se_str=Vocab_SentEnd; -char const*const TD::unk_str=Vocab_Unknown; - -// pre+(i-base)+">" for i in [base,e) -inline void pad(std::string const& pre,int base,int e) { - assert(base<=e); - ostringstream o; - for (int i=base;i'; - WordID id=TD::Convert(o.str()); - assert(id==i); // this fails. why? - } -} - - -namespace { -struct TD_init { - TD_init() { - /* - // disabled for now since it's breaking trunk - assert(TD::Convert(TD::ss_str)==TD::ss); - assert(TD::Convert(TD::se_str)==TD::se); - assert(TD::Convert(TD::unk_str)==TD::unk); - assert(TD::none==Vocab_None); - pad("=dict_.highIndex()) return undef_token(w); -#endif - return dict_.getWord((VocabIndex)w); -} - - -void TD::GetWordIDs(const std::vector& strings, std::vector* ids) { - ids->clear(); - for (vector::const_iterator i = strings.begin(); i != strings.end(); ++i) - ids->push_back(TD::Convert(*i)); -} - -std::string TD::GetString(const std::vector& str) { - ostringstream o; - for (int i=0;i Ws; - Ws *ids; - explicit add_wordids(Ws *i) : ids(i) { } - add_wordids(const add_wordids& o) : ids(o.ids) { } - void operator()(char const* s) { - ids->push_back(TD::Convert(s)); - } - void operator()(std::string const& s) { - ids->push_back(TD::Convert(s)); - } -}; - -} - -void TD::ConvertSentence(std::string const& s, std::vector* ids) { - ids->clear(); - VisitTokens(s,add_wordids(ids)); -} -- cgit v1.2.3