From 851e389dffdd6996ea32d70defb8906de80b9edc Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 14 Dec 2009 20:35:11 -0500 Subject: few small fixes of alignment tools, add new orthographic similarity feature for word aligner, final naming of directories, libraries in cdec --- decoder/tdict.cc | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 decoder/tdict.cc (limited to 'decoder/tdict.cc') diff --git a/decoder/tdict.cc b/decoder/tdict.cc new file mode 100644 index 00000000..c00d20b8 --- /dev/null +++ b/decoder/tdict.cc @@ -0,0 +1,49 @@ +#include "Ngram.h" +#include "dict.h" +#include "tdict.h" +#include "Vocab.h" + +using namespace std; + +Vocab* TD::dict_ = new Vocab; + +static const string empty; +static const string space = " "; + +WordID TD::Convert(const std::string& s) { + return dict_->addWord((VocabString)s.c_str()); +} + +const char* TD::Convert(const WordID& w) { + return dict_->getWord((VocabIndex)w); +} + +void TD::GetWordIDs(const std::vector& strings, std::vector* ids) { + ids->clear(); + for (vector::const_iterator i = strings.begin(); i != strings.end(); ++i) + ids->push_back(TD::Convert(*i)); +} + +std::string TD::GetString(const std::vector& str) { + string res; + for (vector::const_iterator i = str.begin(); i != str.end(); ++i) + res += (i == str.begin() ? empty : space) + TD::Convert(*i); + return res; +} + +void TD::ConvertSentence(const std::string& sent, std::vector* ids) { + string s = sent; + int last = 0; + ids->clear(); + for (int i=0; i < s.size(); ++i) + if (s[i] == 32 || s[i] == '\t') { + s[i]=0; + if (last != i) { + ids->push_back(Convert(&s[last])); + } + last = i + 1; + } + if (last != s.size()) + ids->push_back(Convert(&s[last])); +} + -- cgit v1.2.3