summaryrefslogtreecommitdiff
path: root/decoder/ff_ruleshape.cc
blob: d473704a2e707f37d8ad866f42560d45cf1843a7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#include "ff_ruleshape.h"

#include "fdict.h"
#include <sstream>

using namespace std;

inline bool IsBitSet(int i, int bit) {
  const int mask = 1 << bit;
  return (i & mask);
}

inline char BitAsChar(bool bit) {
  return (bit ? '1' : '0');
}

RuleShapeFeatures::RuleShapeFeatures(const string& param) {
  bool first = true;
  for (int i = 0; i < 32; ++i) {
    for (int j = 0; j < 32; ++j) {
      ostringstream os;
      os << "Shape_S";
      Node* cur = &fidtree_;
      for (int k = 0; k < 5; ++k) {
        bool bit = IsBitSet(i,k);
        cur = &cur->next_[bit];
        os << BitAsChar(bit);
      }
      os << "_T"; 
      for (int k = 0; k < 5; ++k) {
        bool bit = IsBitSet(j,k);
        cur = &cur->next_[bit];
        os << BitAsChar(bit);
      }
      if (first) { first = false; cerr << "  Example feature: " << os.str() << endl; }
      cur->fid_ = FD::Convert(os.str());
    }
  }
}

void RuleShapeFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
                                              const Hypergraph::Edge& edge,
                                              const vector<const void*>& ant_contexts,
                                              SparseVector<double>* features,
                                              SparseVector<double>* estimated_features,
                                              void* context) const {
  const Node* cur = &fidtree_;
  TRule& rule = *edge.rule_;
  int pos = 0;  // feature position
  int i = 0;
  while(i < rule.f_.size()) {
    WordID sym = rule.f_[i];
    if (pos % 2 == 0) {
      if (sym > 0) {       // is terminal
        cur = Advance(cur, true);
        while (i < rule.f_.size() && rule.f_[i] > 0) ++i;  // consume lexical string
      } else {
        cur = Advance(cur, false);
      }
      ++pos;
    } else {  // expecting a NT
      if (sym < 1) {
        cur = Advance(cur, true);
        ++i;
        ++pos;
      } else {
        cerr << "BAD RULE: " << rule.AsString() << endl;
        exit(1);
      }
    }
  }
  for (; pos < 5; ++pos)
    cur = Advance(cur, false);
  assert(pos == 5);  // this will fail if you are using using > binary rules!

  i = 0;
  while(i < rule.e_.size()) {
    WordID sym = rule.e_[i];
    if (pos % 2 == 1) {
      if (sym > 0) {       // is terminal
        cur = Advance(cur, true);
        while (i < rule.e_.size() && rule.e_[i] > 0) ++i;  // consume lexical string
      } else {
        cur = Advance(cur, false);
      }
      ++pos;
    } else {  // expecting a NT
      if (sym < 1) {
        cur = Advance(cur, true);
        ++i;
        ++pos;
      } else {
        cerr << "BAD RULE: " << rule.AsString() << endl;
        exit(1);
      }
    }
  }
  for (;pos < 10; ++pos)
    cur = Advance(cur, false);
  assert(pos == 10);  // this will fail if you are using using > binary rules!

  features->set_value(cur->fid_, 1.0);
}