summaryrefslogtreecommitdiff
path: root/rst_parser/arc_ff.cc
blob: 10885716978fb73d5de09e76a8ce5ecc0cc71771 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#include "arc_ff.h"

#include "tdict.h"
#include "fdict.h"
#include "sentence_metadata.h"

using namespace std;

struct ArcFFImpl {
  ArcFFImpl() : kROOT("ROOT") {}
  const string kROOT;

  void PrepareForInput(const TaggedSentence& sentence) {
    (void) sentence;
  }

  void EdgeFeatures(const TaggedSentence& sent,
                    short h,
                    short m,
                    SparseVector<weight_t>* features) const {
    const bool is_root = (h == -1);
    const string& head_word = (is_root ? kROOT : TD::Convert(sent.words[h]));
    const string& head_pos = (is_root ? kROOT : TD::Convert(sent.pos[h]));
    const string& mod_word = TD::Convert(sent.words[m]);
    const string& mod_pos = TD::Convert(sent.pos[m]);
    const bool dir = m < h;
    int v = m - h;
    if (v < 0) {
      v= -1 - int(log(-v) / log(2));
    } else {
      v= int(log(v) / log(2));
    }
    static map<int, int> lenmap;
    int& lenfid = lenmap[v];
    if (!lenfid) {
      ostringstream os;
      if (v < 0) os << "LenL" << -v; else os << "LenR" << v;
      lenfid = FD::Convert(os.str());
    }
    features->set_value(lenfid, 1.0);
    const string& lenstr = FD::Convert(lenfid);
    if (!is_root) {
      static int modl = FD::Convert("ModLeft");
      static int modr = FD::Convert("ModRight");
      if (dir) features->set_value(modl, 1);
      else features->set_value(modr, 1);
    }
    if (is_root) {
      ostringstream os;
      os << "ROOT:" << mod_pos;
      features->set_value(FD::Convert(os.str()), 1.0);
      os << "_" << lenstr;
      features->set_value(FD::Convert(os.str()), 1.0);
    } else { // not root
      ostringstream os;
      os << "HM:" << head_pos << '_' << mod_pos;
      features->set_value(FD::Convert(os.str()), 1.0);
      os << '_' << dir;
      features->set_value(FD::Convert(os.str()), 1.0);
      os << '_' << lenstr;
      features->set_value(FD::Convert(os.str()), 1.0);
      ostringstream os2;
      os2 << "LexHM:" << head_word << '_' << mod_word;
      features->set_value(FD::Convert(os2.str()), 1.0);
      os2 << '_' << dir;
      features->set_value(FD::Convert(os2.str()), 1.0);
      os2 << '_' << lenstr;
      features->set_value(FD::Convert(os2.str()), 1.0);
    }
  }
};

ArcFeatureFunctions::ArcFeatureFunctions() : pimpl(new ArcFFImpl) {}
ArcFeatureFunctions::~ArcFeatureFunctions() { delete pimpl; }

void ArcFeatureFunctions::PrepareForInput(const TaggedSentence& sentence) {
  pimpl->PrepareForInput(sentence);
}

void ArcFeatureFunctions::EdgeFeatures(const TaggedSentence& sentence,
                                       short h,
                                       short m,
                                       SparseVector<weight_t>* features) const {
  pimpl->EdgeFeatures(sentence, h, m, features);
}