summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--rst_parser/arc_ff.cc110
1 files changed, 73 insertions, 37 deletions
diff --git a/rst_parser/arc_ff.cc b/rst_parser/arc_ff.cc
index 10885716..fba36e24 100644
--- a/rst_parser/arc_ff.cc
+++ b/rst_parser/arc_ff.cc
@@ -7,11 +7,48 @@
using namespace std;
struct ArcFFImpl {
- ArcFFImpl() : kROOT("ROOT") {}
+ ArcFFImpl() : kROOT("ROOT"), kLEFT_POS("LEFT"), kRIGHT_POS("RIGHT") {}
const string kROOT;
+ const string kLEFT_POS;
+ const string kRIGHT_POS;
- void PrepareForInput(const TaggedSentence& sentence) {
- (void) sentence;
+ void PrepareForInput(const TaggedSentence& sent) {
+ (void) sent;
+ }
+
+ template <typename A>
+ static void Fire(SparseVector<weight_t>* v, const A& a) {
+ ostringstream os;
+ os << a;
+ v->set_value(FD::Convert(os.str()), 1);
+ }
+
+ template <typename A, typename B>
+ static void Fire(SparseVector<weight_t>* v, const A& a, const B& b) {
+ ostringstream os;
+ os << a << '_' << b;
+ v->set_value(FD::Convert(os.str()), 1);
+ }
+
+ template <typename A, typename B, typename C>
+ static void Fire(SparseVector<weight_t>* v, const A& a, const B& b, const C& c) {
+ ostringstream os;
+ os << a << '_' << b << '_' << c;
+ v->set_value(FD::Convert(os.str()), 1);
+ }
+
+ template <typename A, typename B, typename C, typename D>
+ static void Fire(SparseVector<weight_t>* v, const A& a, const B& b, const C& c, const D& d) {
+ ostringstream os;
+ os << a << '_' << b << '_' << c << '_' << d;
+ v->set_value(FD::Convert(os.str()), 1);
+ }
+
+ template <typename A, typename B, typename C, typename D, typename E>
+ static void Fire(SparseVector<weight_t>* v, const A& a, const B& b, const C& c, const D& d, const E& e) {
+ ostringstream os;
+ os << a << '_' << b << '_' << c << '_' << d << '_' << e;
+ v->set_value(FD::Convert(os.str()), 1);
}
void EdgeFeatures(const TaggedSentence& sent,
@@ -19,10 +56,13 @@ struct ArcFFImpl {
short m,
SparseVector<weight_t>* features) const {
const bool is_root = (h == -1);
+ int num_words = sent.words.size();
const string& head_word = (is_root ? kROOT : TD::Convert(sent.words[h]));
const string& head_pos = (is_root ? kROOT : TD::Convert(sent.pos[h]));
const string& mod_word = TD::Convert(sent.words[m]);
const string& mod_pos = TD::Convert(sent.pos[m]);
+ const string& mod_pos_L = (m > 0 ? TD::Convert(sent.pos[m-1]) : kLEFT_POS);
+ const string& mod_pos_R = (m < sent.pos.size() - 1 ? TD::Convert(sent.pos[m]) : kRIGHT_POS);
const bool dir = m < h;
int v = m - h;
if (v < 0) {
@@ -30,42 +70,38 @@ struct ArcFFImpl {
} else {
v= int(log(v) / log(2));
}
- static map<int, int> lenmap;
- int& lenfid = lenmap[v];
- if (!lenfid) {
- ostringstream os;
- if (v < 0) os << "LenL" << -v; else os << "LenR" << v;
- lenfid = FD::Convert(os.str());
- }
- features->set_value(lenfid, 1.0);
- const string& lenstr = FD::Convert(lenfid);
- if (!is_root) {
- static int modl = FD::Convert("ModLeft");
- static int modr = FD::Convert("ModRight");
- if (dir) features->set_value(modl, 1);
- else features->set_value(modr, 1);
- }
+ ostringstream os;
+ if (v < 0) os << "LenL" << -v; else os << "LenR" << v;
+ const string lenstr = os.str();
if (is_root) {
- ostringstream os;
- os << "ROOT:" << mod_pos;
- features->set_value(FD::Convert(os.str()), 1.0);
- os << "_" << lenstr;
- features->set_value(FD::Convert(os.str()), 1.0);
+ Fire(features, "ROOT", mod_pos);
+ Fire(features, "DROOT", mod_pos, lenstr);
+ Fire(features, "LROOT", mod_pos_L);
+ Fire(features, "RROOT", mod_pos_R);
+ Fire(features, "LROOT", mod_pos_L, mod_pos);
+ Fire(features, "RROOT", mod_pos_R, mod_pos);
+ Fire(features, "LDist", m);
+ Fire(features, "RDist", m - num_words);
} else { // not root
- ostringstream os;
- os << "HM:" << head_pos << '_' << mod_pos;
- features->set_value(FD::Convert(os.str()), 1.0);
- os << '_' << dir;
- features->set_value(FD::Convert(os.str()), 1.0);
- os << '_' << lenstr;
- features->set_value(FD::Convert(os.str()), 1.0);
- ostringstream os2;
- os2 << "LexHM:" << head_word << '_' << mod_word;
- features->set_value(FD::Convert(os2.str()), 1.0);
- os2 << '_' << dir;
- features->set_value(FD::Convert(os2.str()), 1.0);
- os2 << '_' << lenstr;
- features->set_value(FD::Convert(os2.str()), 1.0);
+ const string& head_pos_L = (h > 0 ? TD::Convert(sent.pos[h-1]) : kLEFT_POS);
+ const string& head_pos_R = (h < sent.pos.size() - 1 ? TD::Convert(sent.pos[h]) : kRIGHT_POS);
+ Fire(features, lenstr);
+ Fire(features, "H", head_pos);
+ Fire(features, "M", mod_pos);
+ Fire(features, "HM", head_pos, mod_pos);
+ Fire(features, "HM", head_pos, mod_pos, dir);
+ Fire(features, "HM", head_pos, mod_pos, lenstr);
+ Fire(features, "LexH", head_word);
+ Fire(features, "LexM", mod_word);
+ Fire(features, "LexHM", head_word, mod_word);
+ Fire(features, "LexHM", head_word, mod_word, dir);
+ Fire(features, "LexHM", head_word, mod_word, lenstr);
+ // surrounders
+ Fire(features, "posLL", head_pos, mod_pos, head_pos_L, mod_pos_L);
+ Fire(features, "posRR", head_pos, mod_pos, head_pos_R, mod_pos_R);
+ Fire(features, "posLR", head_pos, mod_pos, head_pos_L, mod_pos_R);
+ Fire(features, "posRL", head_pos, mod_pos, head_pos_R, mod_pos_L);
+ Fire(features, "lexRL", head_word, head_pos_L, mod_pos_L);
}
}
};