From 4f9933d668d247ea5831c3f2af0b996a94da28f7 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 10 Mar 2011 01:58:30 -0500 Subject: remove dependency on SRILM --- utils/tdict.cc | 74 ++++------------------------------------------------------ 1 file changed, 4 insertions(+), 70 deletions(-) (limited to 'utils/tdict.cc') diff --git a/utils/tdict.cc b/utils/tdict.cc index 1f68feae..23a298f8 100644 --- a/utils/tdict.cc +++ b/utils/tdict.cc @@ -5,93 +5,27 @@ #include #include #include -#include "Ngram.h" #include "dict.h" #include "tdict.h" -#include "Vocab.h" #include "stringlib.h" #include "threadlocal.h" using namespace std; -Vocab TD::dict_(0,TD::max_wordid); -WordID TD::ss=dict_.ssIndex(); -WordID TD::se=dict_.seIndex(); -WordID TD::unk=dict_.unkIndex(); -char const*const TD::ss_str=Vocab_SentStart; -char const*const TD::se_str=Vocab_SentEnd; -char const*const TD::unk_str=Vocab_Unknown; - -// pre+(i-base)+">" for i in [base,e) -inline void pad(std::string const& pre,int base,int e) { - assert(base<=e); - ostringstream o; - for (int i=base;i'; - WordID id=TD::Convert(o.str()); - assert(id==i); // this fails. why? - } -} - - -namespace { -struct TD_init { - TD_init() { - /* - // disabled for now since it's breaking trunk - assert(TD::Convert(TD::ss_str)==TD::ss); - assert(TD::Convert(TD::se_str)==TD::se); - assert(TD::Convert(TD::unk_str)==TD::unk); - assert(TD::none==Vocab_None); - pad("=dict_.highIndex()) return undef_token(w); -#endif - return dict_.getWord((VocabIndex)w); + return dict_.Convert(w).c_str(); } - void TD::GetWordIDs(const std::vector& strings, std::vector* ids) { ids->clear(); for (vector::const_iterator i = strings.begin(); i != strings.end(); ++i) -- cgit v1.2.3