summaryrefslogtreecommitdiff
path: root/rst_parser/arc_ff.cc
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-04-15 17:28:08 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2012-04-15 17:28:08 -0400
commit8d51973c21337a1633e559cd09a649265600cc4c (patch)
tree43b212e2a577d49ddee43b32aa3ef391de4cc1a6 /rst_parser/arc_ff.cc
parentc22e9248a1fa24b0255a55d21afb94a9ed3ddc22 (diff)
crf training of arc-factored dep parser
Diffstat (limited to 'rst_parser/arc_ff.cc')
-rw-r--r--rst_parser/arc_ff.cc64
1 files changed, 64 insertions, 0 deletions
diff --git a/rst_parser/arc_ff.cc b/rst_parser/arc_ff.cc
new file mode 100644
index 00000000..f9effbda
--- /dev/null
+++ b/rst_parser/arc_ff.cc
@@ -0,0 +1,64 @@
+#include "arc_ff.h"
+
+#include "tdict.h"
+#include "fdict.h"
+#include "sentence_metadata.h"
+
+using namespace std;
+
+ArcFeatureFunction::~ArcFeatureFunction() {}
+
+void ArcFeatureFunction::PrepareForInput(const TaggedSentence&) {}
+
+DistancePenalty::DistancePenalty(const string&) : fidw_(FD::Convert("Distance")), fidr_(FD::Convert("RootDistance")) {}
+
+void DistancePenalty::EdgeFeaturesImpl(const TaggedSentence& sent,
+ short h,
+ short m,
+ SparseVector<weight_t>* features) const {
+ const bool dir = m < h;
+ const bool is_root = (h == -1);
+ int v = m - h;
+ if (v < 0) {
+ v= -1 - int(log(-v) / log(2));
+ } else {
+ v= int(log(v) / log(2));
+ }
+ static map<int, int> lenmap;
+ int& lenfid = lenmap[v];
+ if (!lenfid) {
+ ostringstream os;
+ if (v < 0) os << "LenL" << -v; else os << "LenR" << v;
+ lenfid = FD::Convert(os.str());
+ }
+ features->set_value(lenfid, 1.0);
+ const string& lenstr = FD::Convert(lenfid);
+ if (!is_root) {
+ static int modl = FD::Convert("ModLeft");
+ static int modr = FD::Convert("ModRight");
+ if (dir) features->set_value(modl, 1);
+ else features->set_value(modr, 1);
+ }
+ if (is_root) {
+ ostringstream os;
+ os << "ROOT:" << TD::Convert(sent.pos[m]);
+ features->set_value(FD::Convert(os.str()), 1.0);
+ os << "_" << lenstr;
+ features->set_value(FD::Convert(os.str()), 1.0);
+ } else { // not root
+ ostringstream os;
+ os << "HM:" << TD::Convert(sent.pos[h]) << '_' << TD::Convert(sent.pos[m]);
+ features->set_value(FD::Convert(os.str()), 1.0);
+ os << '_' << dir;
+ features->set_value(FD::Convert(os.str()), 1.0);
+ os << '_' << lenstr;
+ features->set_value(FD::Convert(os.str()), 1.0);
+ ostringstream os2;
+ os2 << "LexHM:" << TD::Convert(sent.words[h]) << '_' << TD::Convert(sent.words[m]);
+ features->set_value(FD::Convert(os2.str()), 1.0);
+ os2 << '_' << dir;
+ features->set_value(FD::Convert(os2.str()), 1.0);
+ os2 << '_' << lenstr;
+ features->set_value(FD::Convert(os2.str()), 1.0);
+ }
+}