summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-06-23 15:54:21 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2012-06-23 15:54:21 -0400
commitb9266b068a37dc46f8de813c59ffbef2e4b89280 (patch)
treeab5f976b47edc2cc6e21bd60426bd8d0fe1f38b7
parent0b27ea3f91d0ad2f2ed718839d308db3d1baf5ae (diff)
clean up tagger a bit
-rw-r--r--configure.ac2
-rw-r--r--decoder/ff_tagger.cc17
-rw-r--r--decoder/tagger.cc1
3 files changed, 16 insertions, 4 deletions
diff --git a/configure.ac b/configure.ac
index 19498794..e22eddbd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -130,6 +130,6 @@ then
AM_CONDITIONAL([GLC], true)
fi
-CPPFLAGS="-fPIC $CPPFLAGS -DHAVE_CONFIG_H"
+CPPFLAGS="-DPIC -fPIC $CPPFLAGS -DHAVE_CONFIG_H"
AC_OUTPUT(Makefile rst_parser/Makefile utils/Makefile mteval/Makefile extools/Makefile decoder/Makefile phrasinator/Makefile training/Makefile training/liblbfgs/Makefile dpmert/Makefile pro-train/Makefile rampion/Makefile klm/util/Makefile klm/lm/Makefile mira/Makefile dtrain/Makefile gi/pyp-topics/src/Makefile gi/clda/src/Makefile gi/pf/Makefile gi/markov_al/Makefile)
diff --git a/decoder/ff_tagger.cc b/decoder/ff_tagger.cc
index 019315a2..fd9210fa 100644
--- a/decoder/ff_tagger.cc
+++ b/decoder/ff_tagger.cc
@@ -8,6 +8,17 @@
using namespace std;
+namespace {
+ string Escape(const string& x) {
+ string y = x;
+ for (int i = 0; i < y.size(); ++i) {
+ if (y[i] == '=') y[i]='_';
+ if (y[i] == ';') y[i]='_';
+ }
+ return y;
+ }
+}
+
Tagger_BigramIndicator::Tagger_BigramIndicator(const std::string& param) :
FeatureFunction(sizeof(WordID)) {
no_uni_ = (LowercaseString(param) == "no_uni");
@@ -28,7 +39,7 @@ void Tagger_BigramIndicator::FireFeature(const WordID& left,
os << '_';
if (right < 0) { os << "EOS"; } else { os << TD::Convert(right); }
}
- fid = FD::Convert(os.str());
+ fid = FD::Convert(Escape(os.str()));
}
features->set_value(fid, 1.0);
}
@@ -90,7 +101,7 @@ void LexicalPairIndicator::FireFeature(WordID src,
if (!fid) {
ostringstream os;
os << name_ << ':' << TD::Convert(src) << ':' << TD::Convert(trg);
- fid = FD::Convert(os.str());
+ fid = FD::Convert(Escape(os.str()));
}
features->set_value(fid, 1.0);
}
@@ -127,7 +138,7 @@ void OutputIndicator::FireFeature(WordID trg,
if (escape.count(trg)) trg = escape[trg];
ostringstream os;
os << "T:" << TD::Convert(trg);
- fid = FD::Convert(os.str());
+ fid = FD::Convert(Escape(os.str()));
}
features->set_value(fid, 1.0);
}
diff --git a/decoder/tagger.cc b/decoder/tagger.cc
index 54890e85..63e855c8 100644
--- a/decoder/tagger.cc
+++ b/decoder/tagger.cc
@@ -54,6 +54,7 @@ struct TaggerImpl {
const int new_node_id = forest->AddNode(kXCAT)->id_;
for (int k = 0; k < tagset_.size(); ++k) {
TRulePtr rule(TRule::CreateLexicalRule(src, tagset_[k]));
+ rule->lhs_ = kXCAT;
Hypergraph::Edge* edge = forest->AddEdge(rule, Hypergraph::TailNodeVector());
edge->i_ = i;
edge->j_ = i+1;