summaryrefslogtreecommitdiff
path: root/rst_parser/arc_ff.cc
blob: fba36e24c95c9ea6706add05f94d1b28e465453f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#include "arc_ff.h"

#include "tdict.h"
#include "fdict.h"
#include "sentence_metadata.h"

using namespace std;

struct ArcFFImpl {
  ArcFFImpl() : kROOT("ROOT"), kLEFT_POS("LEFT"), kRIGHT_POS("RIGHT") {}
  const string kROOT;
  const string kLEFT_POS;
  const string kRIGHT_POS;

  void PrepareForInput(const TaggedSentence& sent) {
    (void) sent;
  }

  template <typename A>
  static void Fire(SparseVector<weight_t>* v, const A& a) {
    ostringstream os;
    os << a;
    v->set_value(FD::Convert(os.str()), 1);
  }

  template <typename A, typename B>
  static void Fire(SparseVector<weight_t>* v, const A& a, const B& b) {
    ostringstream os;
    os << a << '_' << b;
    v->set_value(FD::Convert(os.str()), 1);
  }

  template <typename A, typename B, typename C>
  static void Fire(SparseVector<weight_t>* v, const A& a, const B& b, const C& c) {
    ostringstream os;
    os << a << '_' << b << '_' << c;
    v->set_value(FD::Convert(os.str()), 1);
  }

  template <typename A, typename B, typename C, typename D>
  static void Fire(SparseVector<weight_t>* v, const A& a, const B& b, const C& c, const D& d) {
    ostringstream os;
    os << a << '_' << b << '_' << c << '_' << d;
    v->set_value(FD::Convert(os.str()), 1);
  }

  template <typename A, typename B, typename C, typename D, typename E>
  static void Fire(SparseVector<weight_t>* v, const A& a, const B& b, const C& c, const D& d, const E& e) {
    ostringstream os;
    os << a << '_' << b << '_' << c << '_' << d << '_' << e;
    v->set_value(FD::Convert(os.str()), 1);
  }

  void EdgeFeatures(const TaggedSentence& sent,
                    short h,
                    short m,
                    SparseVector<weight_t>* features) const {
    const bool is_root = (h == -1);
    int num_words = sent.words.size();
    const string& head_word = (is_root ? kROOT : TD::Convert(sent.words[h]));
    const string& head_pos = (is_root ? kROOT : TD::Convert(sent.pos[h]));
    const string& mod_word = TD::Convert(sent.words[m]);
    const string& mod_pos = TD::Convert(sent.pos[m]);
    const string& mod_pos_L = (m > 0 ? TD::Convert(sent.pos[m-1]) : kLEFT_POS);
    const string& mod_pos_R = (m < sent.pos.size() - 1 ? TD::Convert(sent.pos[m]) : kRIGHT_POS);
    const bool dir = m < h;
    int v = m - h;
    if (v < 0) {
      v= -1 - int(log(-v) / log(2));
    } else {
      v= int(log(v) / log(2));
    }
    ostringstream os;
    if (v < 0) os << "LenL" << -v; else os << "LenR" << v;
    const string lenstr = os.str();
    if (is_root) {
      Fire(features, "ROOT", mod_pos);
      Fire(features, "DROOT", mod_pos, lenstr);
      Fire(features, "LROOT", mod_pos_L);
      Fire(features, "RROOT", mod_pos_R);
      Fire(features, "LROOT", mod_pos_L, mod_pos);
      Fire(features, "RROOT", mod_pos_R, mod_pos);
      Fire(features, "LDist", m);
      Fire(features, "RDist", m - num_words);
    } else { // not root
      const string& head_pos_L = (h > 0 ? TD::Convert(sent.pos[h-1]) : kLEFT_POS);
      const string& head_pos_R = (h < sent.pos.size() - 1 ? TD::Convert(sent.pos[h]) : kRIGHT_POS);
      Fire(features, lenstr);
      Fire(features, "H", head_pos);
      Fire(features, "M", mod_pos);
      Fire(features, "HM", head_pos, mod_pos);
      Fire(features, "HM", head_pos, mod_pos, dir);
      Fire(features, "HM", head_pos, mod_pos, lenstr);
      Fire(features, "LexH", head_word);
      Fire(features, "LexM", mod_word);
      Fire(features, "LexHM", head_word, mod_word);
      Fire(features, "LexHM", head_word, mod_word, dir);
      Fire(features, "LexHM", head_word, mod_word, lenstr);
      // surrounders
      Fire(features, "posLL", head_pos, mod_pos, head_pos_L, mod_pos_L);
      Fire(features, "posRR", head_pos, mod_pos, head_pos_R, mod_pos_R);
      Fire(features, "posLR", head_pos, mod_pos, head_pos_L, mod_pos_R);
      Fire(features, "posRL", head_pos, mod_pos, head_pos_R, mod_pos_L);
      Fire(features, "lexRL", head_word, head_pos_L, mod_pos_L);
    }
  }
};

ArcFeatureFunctions::ArcFeatureFunctions() : pimpl(new ArcFFImpl) {}
ArcFeatureFunctions::~ArcFeatureFunctions() { delete pimpl; }

void ArcFeatureFunctions::PrepareForInput(const TaggedSentence& sentence) {
  pimpl->PrepareForInput(sentence);
}

void ArcFeatureFunctions::EdgeFeatures(const TaggedSentence& sentence,
                                       short h,
                                       short m,
                                       SparseVector<weight_t>* features) const {
  pimpl->EdgeFeatures(sentence, h, m, features);
}