From 600d87af320e790a31f1d902ae48331cd371012b Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 17 Apr 2012 23:12:18 -0400 Subject: most features --- rst_parser/arc_ff.cc | 110 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 73 insertions(+), 37 deletions(-) diff --git a/rst_parser/arc_ff.cc b/rst_parser/arc_ff.cc index 10885716..fba36e24 100644 --- a/rst_parser/arc_ff.cc +++ b/rst_parser/arc_ff.cc @@ -7,11 +7,48 @@ using namespace std; struct ArcFFImpl { - ArcFFImpl() : kROOT("ROOT") {} + ArcFFImpl() : kROOT("ROOT"), kLEFT_POS("LEFT"), kRIGHT_POS("RIGHT") {} const string kROOT; + const string kLEFT_POS; + const string kRIGHT_POS; - void PrepareForInput(const TaggedSentence& sentence) { - (void) sentence; + void PrepareForInput(const TaggedSentence& sent) { + (void) sent; + } + + template + static void Fire(SparseVector* v, const A& a) { + ostringstream os; + os << a; + v->set_value(FD::Convert(os.str()), 1); + } + + template + static void Fire(SparseVector* v, const A& a, const B& b) { + ostringstream os; + os << a << '_' << b; + v->set_value(FD::Convert(os.str()), 1); + } + + template + static void Fire(SparseVector* v, const A& a, const B& b, const C& c) { + ostringstream os; + os << a << '_' << b << '_' << c; + v->set_value(FD::Convert(os.str()), 1); + } + + template + static void Fire(SparseVector* v, const A& a, const B& b, const C& c, const D& d) { + ostringstream os; + os << a << '_' << b << '_' << c << '_' << d; + v->set_value(FD::Convert(os.str()), 1); + } + + template + static void Fire(SparseVector* v, const A& a, const B& b, const C& c, const D& d, const E& e) { + ostringstream os; + os << a << '_' << b << '_' << c << '_' << d << '_' << e; + v->set_value(FD::Convert(os.str()), 1); } void EdgeFeatures(const TaggedSentence& sent, @@ -19,10 +56,13 @@ struct ArcFFImpl { short m, SparseVector* features) const { const bool is_root = (h == -1); + int num_words = sent.words.size(); const string& head_word = (is_root ? kROOT : TD::Convert(sent.words[h])); const string& head_pos = (is_root ? kROOT : TD::Convert(sent.pos[h])); const string& mod_word = TD::Convert(sent.words[m]); const string& mod_pos = TD::Convert(sent.pos[m]); + const string& mod_pos_L = (m > 0 ? TD::Convert(sent.pos[m-1]) : kLEFT_POS); + const string& mod_pos_R = (m < sent.pos.size() - 1 ? TD::Convert(sent.pos[m]) : kRIGHT_POS); const bool dir = m < h; int v = m - h; if (v < 0) { @@ -30,42 +70,38 @@ struct ArcFFImpl { } else { v= int(log(v) / log(2)); } - static map lenmap; - int& lenfid = lenmap[v]; - if (!lenfid) { - ostringstream os; - if (v < 0) os << "LenL" << -v; else os << "LenR" << v; - lenfid = FD::Convert(os.str()); - } - features->set_value(lenfid, 1.0); - const string& lenstr = FD::Convert(lenfid); - if (!is_root) { - static int modl = FD::Convert("ModLeft"); - static int modr = FD::Convert("ModRight"); - if (dir) features->set_value(modl, 1); - else features->set_value(modr, 1); - } + ostringstream os; + if (v < 0) os << "LenL" << -v; else os << "LenR" << v; + const string lenstr = os.str(); if (is_root) { - ostringstream os; - os << "ROOT:" << mod_pos; - features->set_value(FD::Convert(os.str()), 1.0); - os << "_" << lenstr; - features->set_value(FD::Convert(os.str()), 1.0); + Fire(features, "ROOT", mod_pos); + Fire(features, "DROOT", mod_pos, lenstr); + Fire(features, "LROOT", mod_pos_L); + Fire(features, "RROOT", mod_pos_R); + Fire(features, "LROOT", mod_pos_L, mod_pos); + Fire(features, "RROOT", mod_pos_R, mod_pos); + Fire(features, "LDist", m); + Fire(features, "RDist", m - num_words); } else { // not root - ostringstream os; - os << "HM:" << head_pos << '_' << mod_pos; - features->set_value(FD::Convert(os.str()), 1.0); - os << '_' << dir; - features->set_value(FD::Convert(os.str()), 1.0); - os << '_' << lenstr; - features->set_value(FD::Convert(os.str()), 1.0); - ostringstream os2; - os2 << "LexHM:" << head_word << '_' << mod_word; - features->set_value(FD::Convert(os2.str()), 1.0); - os2 << '_' << dir; - features->set_value(FD::Convert(os2.str()), 1.0); - os2 << '_' << lenstr; - features->set_value(FD::Convert(os2.str()), 1.0); + const string& head_pos_L = (h > 0 ? TD::Convert(sent.pos[h-1]) : kLEFT_POS); + const string& head_pos_R = (h < sent.pos.size() - 1 ? TD::Convert(sent.pos[h]) : kRIGHT_POS); + Fire(features, lenstr); + Fire(features, "H", head_pos); + Fire(features, "M", mod_pos); + Fire(features, "HM", head_pos, mod_pos); + Fire(features, "HM", head_pos, mod_pos, dir); + Fire(features, "HM", head_pos, mod_pos, lenstr); + Fire(features, "LexH", head_word); + Fire(features, "LexM", mod_word); + Fire(features, "LexHM", head_word, mod_word); + Fire(features, "LexHM", head_word, mod_word, dir); + Fire(features, "LexHM", head_word, mod_word, lenstr); + // surrounders + Fire(features, "posLL", head_pos, mod_pos, head_pos_L, mod_pos_L); + Fire(features, "posRR", head_pos, mod_pos, head_pos_R, mod_pos_R); + Fire(features, "posLR", head_pos, mod_pos, head_pos_L, mod_pos_R); + Fire(features, "posRL", head_pos, mod_pos, head_pos_R, mod_pos_L); + Fire(features, "lexRL", head_word, head_pos_L, mod_pos_L); } } }; -- cgit v1.2.3