diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2012-06-23 15:54:21 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2012-06-23 15:54:21 -0400 |
commit | 648575a8604243259c110a363f02fbb64d44bcf9 (patch) | |
tree | 7d0576238ad29404cd49d446166ca31ebc01eb05 | |
parent | 0695d80013ef4994142d8a069424ea57e45a9d80 (diff) |
clean up tagger a bit
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | decoder/ff_tagger.cc | 17 | ||||
-rw-r--r-- | decoder/tagger.cc | 1 |
3 files changed, 16 insertions, 4 deletions
diff --git a/configure.ac b/configure.ac index 19498794..e22eddbd 100644 --- a/configure.ac +++ b/configure.ac @@ -130,6 +130,6 @@ then AM_CONDITIONAL([GLC], true) fi -CPPFLAGS="-fPIC $CPPFLAGS -DHAVE_CONFIG_H" +CPPFLAGS="-DPIC -fPIC $CPPFLAGS -DHAVE_CONFIG_H" AC_OUTPUT(Makefile rst_parser/Makefile utils/Makefile mteval/Makefile extools/Makefile decoder/Makefile phrasinator/Makefile training/Makefile training/liblbfgs/Makefile dpmert/Makefile pro-train/Makefile rampion/Makefile klm/util/Makefile klm/lm/Makefile mira/Makefile dtrain/Makefile gi/pyp-topics/src/Makefile gi/clda/src/Makefile gi/pf/Makefile gi/markov_al/Makefile) diff --git a/decoder/ff_tagger.cc b/decoder/ff_tagger.cc index 019315a2..fd9210fa 100644 --- a/decoder/ff_tagger.cc +++ b/decoder/ff_tagger.cc @@ -8,6 +8,17 @@ using namespace std; +namespace { + string Escape(const string& x) { + string y = x; + for (int i = 0; i < y.size(); ++i) { + if (y[i] == '=') y[i]='_'; + if (y[i] == ';') y[i]='_'; + } + return y; + } +} + Tagger_BigramIndicator::Tagger_BigramIndicator(const std::string& param) : FeatureFunction(sizeof(WordID)) { no_uni_ = (LowercaseString(param) == "no_uni"); @@ -28,7 +39,7 @@ void Tagger_BigramIndicator::FireFeature(const WordID& left, os << '_'; if (right < 0) { os << "EOS"; } else { os << TD::Convert(right); } } - fid = FD::Convert(os.str()); + fid = FD::Convert(Escape(os.str())); } features->set_value(fid, 1.0); } @@ -90,7 +101,7 @@ void LexicalPairIndicator::FireFeature(WordID src, if (!fid) { ostringstream os; os << name_ << ':' << TD::Convert(src) << ':' << TD::Convert(trg); - fid = FD::Convert(os.str()); + fid = FD::Convert(Escape(os.str())); } features->set_value(fid, 1.0); } @@ -127,7 +138,7 @@ void OutputIndicator::FireFeature(WordID trg, if (escape.count(trg)) trg = escape[trg]; ostringstream os; os << "T:" << TD::Convert(trg); - fid = FD::Convert(os.str()); + fid = FD::Convert(Escape(os.str())); } features->set_value(fid, 1.0); } diff --git a/decoder/tagger.cc b/decoder/tagger.cc index 54890e85..63e855c8 100644 --- a/decoder/tagger.cc +++ b/decoder/tagger.cc @@ -54,6 +54,7 @@ struct TaggerImpl { const int new_node_id = forest->AddNode(kXCAT)->id_; for (int k = 0; k < tagset_.size(); ++k) { TRulePtr rule(TRule::CreateLexicalRule(src, tagset_[k])); + rule->lhs_ = kXCAT; Hypergraph::Edge* edge = forest->AddEdge(rule, Hypergraph::TailNodeVector()); edge->i_ = i; edge->j_ = i+1; |