1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
#include "ff_ruleshape.h"
#include "fdict.h"
#include <sstream>
using namespace std;
inline bool IsBitSet(int i, int bit) {
const int mask = 1 << bit;
return (i & mask);
}
inline char BitAsChar(bool bit) {
return (bit ? '1' : '0');
}
RuleShapeFeatures::RuleShapeFeatures(const string& param) {
bool first = true;
for (int i = 0; i < 32; ++i) {
for (int j = 0; j < 32; ++j) {
ostringstream os;
os << "Shape_S";
Node* cur = &fidtree_;
for (int k = 0; k < 5; ++k) {
bool bit = IsBitSet(i,k);
cur = &cur->next_[bit];
os << BitAsChar(bit);
}
os << "_T";
for (int k = 0; k < 5; ++k) {
bool bit = IsBitSet(j,k);
cur = &cur->next_[bit];
os << BitAsChar(bit);
}
if (first) { first = false; cerr << " Example feature: " << os.str() << endl; }
cur->fid_ = FD::Convert(os.str());
}
}
}
void RuleShapeFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,
const Hypergraph::Edge& edge,
const vector<const void*>& ant_contexts,
SparseVector<double>* features,
SparseVector<double>* estimated_features,
void* context) const {
const Node* cur = &fidtree_;
TRule& rule = *edge.rule_;
int pos = 0; // feature position
int i = 0;
while(i < rule.f_.size()) {
WordID sym = rule.f_[i];
if (pos % 2 == 0) {
if (sym > 0) { // is terminal
cur = Advance(cur, true);
while (i < rule.f_.size() && rule.f_[i] > 0) ++i; // consume lexical string
} else {
cur = Advance(cur, false);
}
++pos;
} else { // expecting a NT
if (sym < 1) {
cur = Advance(cur, true);
++i;
++pos;
} else {
cerr << "BAD RULE: " << rule.AsString() << endl;
exit(1);
}
}
}
for (; pos < 5; ++pos)
cur = Advance(cur, false);
assert(pos == 5); // this will fail if you are using using > binary rules!
i = 0;
while(i < rule.e_.size()) {
WordID sym = rule.e_[i];
if (pos % 2 == 1) {
if (sym > 0) { // is terminal
cur = Advance(cur, true);
while (i < rule.e_.size() && rule.e_[i] > 0) ++i; // consume lexical string
} else {
cur = Advance(cur, false);
}
++pos;
} else { // expecting a NT
if (sym < 1) {
cur = Advance(cur, true);
++i;
++pos;
} else {
cerr << "BAD RULE: " << rule.AsString() << endl;
exit(1);
}
}
}
for (;pos < 10; ++pos)
cur = Advance(cur, false);
assert(pos == 10); // this will fail if you are using using > binary rules!
features->set_value(cur->fid_, 1.0);
}
|