From ea830224a256aa85702f74d3a0073839d30b6747 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Tue, 17 Apr 2012 23:12:18 -0400
Subject: most features

---
 rst_parser/arc_ff.cc | 110 ++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 73 insertions(+), 37 deletions(-)

(limited to 'rst_parser')

diff --git a/rst_parser/arc_ff.cc b/rst_parser/arc_ff.cc
index 10885716..fba36e24 100644
--- a/rst_parser/arc_ff.cc
+++ b/rst_parser/arc_ff.cc
@@ -7,11 +7,48 @@
 using namespace std;
 
 struct ArcFFImpl {
-  ArcFFImpl() : kROOT("ROOT") {}
+  ArcFFImpl() : kROOT("ROOT"), kLEFT_POS("LEFT"), kRIGHT_POS("RIGHT") {}
   const string kROOT;
+  const string kLEFT_POS;
+  const string kRIGHT_POS;
 
-  void PrepareForInput(const TaggedSentence& sentence) {
-    (void) sentence;
+  void PrepareForInput(const TaggedSentence& sent) {
+    (void) sent;
+  }
+
+  template <typename A>
+  static void Fire(SparseVector<weight_t>* v, const A& a) {
+    ostringstream os;
+    os << a;
+    v->set_value(FD::Convert(os.str()), 1);
+  }
+
+  template <typename A, typename B>
+  static void Fire(SparseVector<weight_t>* v, const A& a, const B& b) {
+    ostringstream os;
+    os << a << '_' << b;
+    v->set_value(FD::Convert(os.str()), 1);
+  }
+
+  template <typename A, typename B, typename C>
+  static void Fire(SparseVector<weight_t>* v, const A& a, const B& b, const C& c) {
+    ostringstream os;
+    os << a << '_' << b << '_' << c;
+    v->set_value(FD::Convert(os.str()), 1);
+  }
+
+  template <typename A, typename B, typename C, typename D>
+  static void Fire(SparseVector<weight_t>* v, const A& a, const B& b, const C& c, const D& d) {
+    ostringstream os;
+    os << a << '_' << b << '_' << c << '_' << d;
+    v->set_value(FD::Convert(os.str()), 1);
+  }
+
+  template <typename A, typename B, typename C, typename D, typename E>
+  static void Fire(SparseVector<weight_t>* v, const A& a, const B& b, const C& c, const D& d, const E& e) {
+    ostringstream os;
+    os << a << '_' << b << '_' << c << '_' << d << '_' << e;
+    v->set_value(FD::Convert(os.str()), 1);
   }
 
   void EdgeFeatures(const TaggedSentence& sent,
@@ -19,10 +56,13 @@ struct ArcFFImpl {
                     short m,
                     SparseVector<weight_t>* features) const {
     const bool is_root = (h == -1);
+    int num_words = sent.words.size();
     const string& head_word = (is_root ? kROOT : TD::Convert(sent.words[h]));
     const string& head_pos = (is_root ? kROOT : TD::Convert(sent.pos[h]));
     const string& mod_word = TD::Convert(sent.words[m]);
     const string& mod_pos = TD::Convert(sent.pos[m]);
+    const string& mod_pos_L = (m > 0 ? TD::Convert(sent.pos[m-1]) : kLEFT_POS);
+    const string& mod_pos_R = (m < sent.pos.size() - 1 ? TD::Convert(sent.pos[m]) : kRIGHT_POS);
     const bool dir = m < h;
     int v = m - h;
     if (v < 0) {
@@ -30,42 +70,38 @@ struct ArcFFImpl {
     } else {
       v= int(log(v) / log(2));
     }
-    static map<int, int> lenmap;
-    int& lenfid = lenmap[v];
-    if (!lenfid) {
-      ostringstream os;
-      if (v < 0) os << "LenL" << -v; else os << "LenR" << v;
-      lenfid = FD::Convert(os.str());
-    }
-    features->set_value(lenfid, 1.0);
-    const string& lenstr = FD::Convert(lenfid);
-    if (!is_root) {
-      static int modl = FD::Convert("ModLeft");
-      static int modr = FD::Convert("ModRight");
-      if (dir) features->set_value(modl, 1);
-      else features->set_value(modr, 1);
-    }
+    ostringstream os;
+    if (v < 0) os << "LenL" << -v; else os << "LenR" << v;
+    const string lenstr = os.str();
     if (is_root) {
-      ostringstream os;
-      os << "ROOT:" << mod_pos;
-      features->set_value(FD::Convert(os.str()), 1.0);
-      os << "_" << lenstr;
-      features->set_value(FD::Convert(os.str()), 1.0);
+      Fire(features, "ROOT", mod_pos);
+      Fire(features, "DROOT", mod_pos, lenstr);
+      Fire(features, "LROOT", mod_pos_L);
+      Fire(features, "RROOT", mod_pos_R);
+      Fire(features, "LROOT", mod_pos_L, mod_pos);
+      Fire(features, "RROOT", mod_pos_R, mod_pos);
+      Fire(features, "LDist", m);
+      Fire(features, "RDist", m - num_words);
     } else { // not root
-      ostringstream os;
-      os << "HM:" << head_pos << '_' << mod_pos;
-      features->set_value(FD::Convert(os.str()), 1.0);
-      os << '_' << dir;
-      features->set_value(FD::Convert(os.str()), 1.0);
-      os << '_' << lenstr;
-      features->set_value(FD::Convert(os.str()), 1.0);
-      ostringstream os2;
-      os2 << "LexHM:" << head_word << '_' << mod_word;
-      features->set_value(FD::Convert(os2.str()), 1.0);
-      os2 << '_' << dir;
-      features->set_value(FD::Convert(os2.str()), 1.0);
-      os2 << '_' << lenstr;
-      features->set_value(FD::Convert(os2.str()), 1.0);
+      const string& head_pos_L = (h > 0 ? TD::Convert(sent.pos[h-1]) : kLEFT_POS);
+      const string& head_pos_R = (h < sent.pos.size() - 1 ? TD::Convert(sent.pos[h]) : kRIGHT_POS);
+      Fire(features, lenstr);
+      Fire(features, "H", head_pos);
+      Fire(features, "M", mod_pos);
+      Fire(features, "HM", head_pos, mod_pos);
+      Fire(features, "HM", head_pos, mod_pos, dir);
+      Fire(features, "HM", head_pos, mod_pos, lenstr);
+      Fire(features, "LexH", head_word);
+      Fire(features, "LexM", mod_word);
+      Fire(features, "LexHM", head_word, mod_word);
+      Fire(features, "LexHM", head_word, mod_word, dir);
+      Fire(features, "LexHM", head_word, mod_word, lenstr);
+      // surrounders
+      Fire(features, "posLL", head_pos, mod_pos, head_pos_L, mod_pos_L);
+      Fire(features, "posRR", head_pos, mod_pos, head_pos_R, mod_pos_R);
+      Fire(features, "posLR", head_pos, mod_pos, head_pos_L, mod_pos_R);
+      Fire(features, "posRL", head_pos, mod_pos, head_pos_R, mod_pos_L);
+      Fire(features, "lexRL", head_word, head_pos_L, mod_pos_L);
     }
   }
 };
-- 
cgit v1.2.3