From 648575a8604243259c110a363f02fbb64d44bcf9 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 23 Jun 2012 15:54:21 -0400 Subject: clean up tagger a bit --- decoder/ff_tagger.cc | 17 ++++++++++++++--- decoder/tagger.cc | 1 + 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'decoder') diff --git a/decoder/ff_tagger.cc b/decoder/ff_tagger.cc index 019315a2..fd9210fa 100644 --- a/decoder/ff_tagger.cc +++ b/decoder/ff_tagger.cc @@ -8,6 +8,17 @@ using namespace std; +namespace { + string Escape(const string& x) { + string y = x; + for (int i = 0; i < y.size(); ++i) { + if (y[i] == '=') y[i]='_'; + if (y[i] == ';') y[i]='_'; + } + return y; + } +} + Tagger_BigramIndicator::Tagger_BigramIndicator(const std::string& param) : FeatureFunction(sizeof(WordID)) { no_uni_ = (LowercaseString(param) == "no_uni"); @@ -28,7 +39,7 @@ void Tagger_BigramIndicator::FireFeature(const WordID& left, os << '_'; if (right < 0) { os << "EOS"; } else { os << TD::Convert(right); } } - fid = FD::Convert(os.str()); + fid = FD::Convert(Escape(os.str())); } features->set_value(fid, 1.0); } @@ -90,7 +101,7 @@ void LexicalPairIndicator::FireFeature(WordID src, if (!fid) { ostringstream os; os << name_ << ':' << TD::Convert(src) << ':' << TD::Convert(trg); - fid = FD::Convert(os.str()); + fid = FD::Convert(Escape(os.str())); } features->set_value(fid, 1.0); } @@ -127,7 +138,7 @@ void OutputIndicator::FireFeature(WordID trg, if (escape.count(trg)) trg = escape[trg]; ostringstream os; os << "T:" << TD::Convert(trg); - fid = FD::Convert(os.str()); + fid = FD::Convert(Escape(os.str())); } features->set_value(fid, 1.0); } diff --git a/decoder/tagger.cc b/decoder/tagger.cc index 54890e85..63e855c8 100644 --- a/decoder/tagger.cc +++ b/decoder/tagger.cc @@ -54,6 +54,7 @@ struct TaggerImpl { const int new_node_id = forest->AddNode(kXCAT)->id_; for (int k = 0; k < tagset_.size(); ++k) { TRulePtr rule(TRule::CreateLexicalRule(src, tagset_[k])); + rule->lhs_ = kXCAT; Hypergraph::Edge* edge = forest->AddEdge(rule, Hypergraph::TailNodeVector()); edge->i_ = i; edge->j_ = i+1; -- cgit v1.2.3