summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-04-21 13:33:33 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2012-04-21 13:33:33 -0400
commit06718177056fe5262262e00d98dc89f67cefb193 (patch)
treee623fce3ddbb3fdb12a05d65155eaacdbd0af9d6
parent1e206220aa506ac0e8eabcfe0cbd0ab851dee262 (diff)
full feature set
-rw-r--r--rst_parser/arc_ff.cc80
1 files changed, 56 insertions, 24 deletions
diff --git a/rst_parser/arc_ff.cc b/rst_parser/arc_ff.cc
index b2a87a7d..c4e5aa17 100644
--- a/rst_parser/arc_ff.cc
+++ b/rst_parser/arc_ff.cc
@@ -3,6 +3,7 @@
#include <iostream>
#include <sstream>
+#include "stringlib.h"
#include "tdict.h"
#include "fdict.h"
#include "sentence_metadata.h"
@@ -66,15 +67,30 @@ struct ArcFFImpl {
v->set_value(FD::Convert(os.str()), 1);
}
+ static void AddConjoin(const SparseVector<double>& v, const string& feat, SparseVector<double>* pf) {
+ for (SparseVector<double>::const_iterator it = v.begin(); it != v.end(); ++it)
+ pf->set_value(FD::Convert(FD::Convert(it->first) + "_" + feat), it->second);
+ }
+
+ static inline string Fixup(const string& str) {
+ string res = LowercaseString(str);
+ if (res.size() < 6) return res;
+ return res.substr(0, 5) + "*";
+ }
+
+ static inline string Suffix(const string& str) {
+ if (str.size() < 4) return ""; else return str.substr(str.size() - 3);
+ }
+
void EdgeFeatures(const TaggedSentence& sent,
short h,
short m,
SparseVector<weight_t>* features) const {
const bool is_root = (h == -1);
+ const string head_word = (is_root ? kROOT : Fixup(TD::Convert(sent.words[h])));
int num_words = sent.words.size();
- const string& head_word = (is_root ? kROOT : TD::Convert(sent.words[h]));
const string& head_pos = (is_root ? kROOT : TD::Convert(sent.pos[h]));
- const string& mod_word = TD::Convert(sent.words[m]);
+ const string mod_word = Fixup(TD::Convert(sent.words[m]));
const string& mod_pos = TD::Convert(sent.pos[m]);
const string& mod_pos_L = (m > 0 ? TD::Convert(sent.pos[m-1]) : kLEFT_POS);
const string& mod_pos_R = (m < sent.pos.size() - 1 ? TD::Convert(sent.pos[m]) : kRIGHT_POS);
@@ -82,42 +98,41 @@ struct ArcFFImpl {
const string dir = (bdir ? "MLeft" : "MRight");
int v = m - h;
if (v < 0) {
- v= -1 - int(log(-v) / log(2));
+ v= -1 - int(log(-v) / log(1.6));
} else {
- v= int(log(v) / log(2));
+ v= int(log(v) / log(1.6)) + 1;
}
ostringstream os;
if (v < 0) os << "LenL" << -v; else os << "LenR" << v;
const string lenstr = os.str();
+ Fire(features, dir);
+ Fire(features, lenstr);
+ // dir, lenstr
if (is_root) {
- Fire(features, "ROOT", mod_pos);
+ Fire(features, "wROOT", mod_word);
+ Fire(features, "pROOT", mod_pos);
+ Fire(features, "wpROOT", mod_word, mod_pos);
Fire(features, "DROOT", mod_pos, lenstr);
Fire(features, "LROOT", mod_pos_L);
Fire(features, "RROOT", mod_pos_R);
Fire(features, "LROOT", mod_pos_L, mod_pos);
Fire(features, "RROOT", mod_pos_R, mod_pos);
Fire(features, "LDist", m);
- Fire(features, "RDist", m - num_words);
+ Fire(features, "RDist", num_words - m);
} else { // not root
const string& head_pos_L = (h > 0 ? TD::Convert(sent.pos[h-1]) : kLEFT_POS);
const string& head_pos_R = (h < sent.pos.size() - 1 ? TD::Convert(sent.pos[h]) : kRIGHT_POS);
- Fire(features, lenstr);
- Fire(features, "H", head_pos);
- Fire(features, "M", mod_pos);
- Fire(features, "HM", head_pos, mod_pos);
- Fire(features, "HM", head_pos, mod_pos, dir);
- Fire(features, "HM", head_pos, mod_pos, lenstr);
- Fire(features, "LexH", head_word);
- Fire(features, "LexM", mod_word);
- Fire(features, "LexHM", head_word, mod_word);
- Fire(features, "LexHM", head_word, mod_word, dir);
- Fire(features, "LexHM", head_word, mod_word, lenstr);
+ SparseVector<double> fv;
+ SparseVector<double>* f = &fv;
+ Fire(f, "H", head_pos);
+ Fire(f, "M", mod_pos);
+ Fire(f, "HM", head_pos, mod_pos);
+
// surrounders
- Fire(features, "posLL", head_pos, mod_pos, head_pos_L, mod_pos_L);
- Fire(features, "posRR", head_pos, mod_pos, head_pos_R, mod_pos_R);
- Fire(features, "posLR", head_pos, mod_pos, head_pos_L, mod_pos_R);
- Fire(features, "posRL", head_pos, mod_pos, head_pos_R, mod_pos_L);
- Fire(features, "lexRL", head_word, head_pos_L, mod_pos_L);
+ Fire(f, "posLL", head_pos, mod_pos, head_pos_L, mod_pos_L);
+ Fire(f, "posRR", head_pos, mod_pos, head_pos_R, mod_pos_R);
+ Fire(f, "posLR", head_pos, mod_pos, head_pos_L, mod_pos_R);
+ Fire(f, "posRL", head_pos, mod_pos, head_pos_R, mod_pos_L);
// between features
int left = min(h,m);
@@ -126,11 +141,28 @@ struct ArcFFImpl {
if (bdir) --right; else ++left;
for (map<WordID, vector<int> >::const_iterator it = pcs.begin(); it != pcs.end(); ++it) {
if (it->second[left] != it->second[right]) {
- Fire(features, "BT", head_pos, TD::Convert(it->first), mod_pos);
- Fire(features, "BT", head_pos, TD::Convert(it->first), mod_pos, dir);
+ Fire(f, "BT", head_pos, TD::Convert(it->first), mod_pos);
}
}
}
+
+ Fire(f, "wH", head_word);
+ Fire(f, "wM", mod_word);
+ Fire(f, "wpH", head_word, head_pos);
+ Fire(f, "wpM", mod_word, mod_pos);
+ Fire(f, "pHwM", head_pos, mod_word);
+ Fire(f, "wHpM", head_word, mod_pos);
+
+ Fire(f, "wHM", head_word, mod_word);
+ Fire(f, "pHMwH", head_pos, mod_pos, head_word);
+ Fire(f, "pHMwM", head_pos, mod_pos, mod_word);
+ Fire(f, "wHMpH", head_word, mod_word, head_pos);
+ Fire(f, "wHMpM", head_word, mod_word, mod_pos);
+ Fire(f, "wHMpHM", head_word, mod_word, head_pos, mod_pos);
+
+ AddConjoin(fv, dir, features);
+ AddConjoin(fv, lenstr, features);
+ (*features) += fv;
}
}
};