diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2012-06-23 15:54:21 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2012-06-23 15:54:21 -0400 |
commit | b9266b068a37dc46f8de813c59ffbef2e4b89280 (patch) | |
tree | ab5f976b47edc2cc6e21bd60426bd8d0fe1f38b7 /decoder/ff_tagger.cc | |
parent | 0b27ea3f91d0ad2f2ed718839d308db3d1baf5ae (diff) |
clean up tagger a bit
Diffstat (limited to 'decoder/ff_tagger.cc')
-rw-r--r-- | decoder/ff_tagger.cc | 17 |
1 files changed, 14 insertions, 3 deletions
diff --git a/decoder/ff_tagger.cc b/decoder/ff_tagger.cc index 019315a2..fd9210fa 100644 --- a/decoder/ff_tagger.cc +++ b/decoder/ff_tagger.cc @@ -8,6 +8,17 @@ using namespace std; +namespace { + string Escape(const string& x) { + string y = x; + for (int i = 0; i < y.size(); ++i) { + if (y[i] == '=') y[i]='_'; + if (y[i] == ';') y[i]='_'; + } + return y; + } +} + Tagger_BigramIndicator::Tagger_BigramIndicator(const std::string& param) : FeatureFunction(sizeof(WordID)) { no_uni_ = (LowercaseString(param) == "no_uni"); @@ -28,7 +39,7 @@ void Tagger_BigramIndicator::FireFeature(const WordID& left, os << '_'; if (right < 0) { os << "EOS"; } else { os << TD::Convert(right); } } - fid = FD::Convert(os.str()); + fid = FD::Convert(Escape(os.str())); } features->set_value(fid, 1.0); } @@ -90,7 +101,7 @@ void LexicalPairIndicator::FireFeature(WordID src, if (!fid) { ostringstream os; os << name_ << ':' << TD::Convert(src) << ':' << TD::Convert(trg); - fid = FD::Convert(os.str()); + fid = FD::Convert(Escape(os.str())); } features->set_value(fid, 1.0); } @@ -127,7 +138,7 @@ void OutputIndicator::FireFeature(WordID trg, if (escape.count(trg)) trg = escape[trg]; ostringstream os; os << "T:" << TD::Convert(trg); - fid = FD::Convert(os.str()); + fid = FD::Convert(Escape(os.str())); } features->set_value(fid, 1.0); } |