1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
|
cimport grammar
cimport cdec.sa._sa as _sa
import cdec.sa._sa as _sa
def _phrase(phrase):
return ' '.join(w.encode('utf8') if isinstance(w, unicode) else str(w) for w in phrase)
cdef class NT:
cdef public bytes cat
cdef public unsigned ref
def __init__(self, char* cat, unsigned ref=0):
self.cat = cat
self.ref = ref
def __str__(self):
if self.ref > 0:
return '[%s,%d]' % (self.cat, self.ref)
return '[%s]' % self.cat
cdef class NTRef:
cdef public unsigned ref
def __init__(self, unsigned ref):
self.ref = ref
def __str__(self):
return '[%d]' % self.ref
cdef TRule convert_rule(_sa.Rule rule):
cdef unsigned i
cdef lhs = _sa.sym_tocat(rule.lhs)
cdef scores = {}
for i in range(rule.n_scores):
scores['PhraseModel_'+str(i)] = rule.cscores[i]
f, e = [], []
cdef int* fsyms = rule.f.syms
for i in range(rule.f.n):
if _sa.sym_isvar(fsyms[i]):
f.append(NT(_sa.sym_tocat(fsyms[i])))
else:
f.append(_sa.sym_tostring(fsyms[i]))
cdef int* esyms = rule.e.syms
for i in range(rule.e.n):
if _sa.sym_isvar(esyms[i]):
e.append(NTRef(_sa.sym_getindex(esyms[i])))
else:
e.append(_sa.sym_tostring(esyms[i]))
cdef a = [(point/65536, point%65536) for point in rule.word_alignments]
return TRule(lhs, f, e, scores, a)
cdef class TRule:
cdef shared_ptr[grammar.TRule]* rule
def __init__(self, lhs, f, e, scores, a=None):
self.rule = new shared_ptr[grammar.TRule](new grammar.TRule())
self.lhs = lhs
self.e = e
self.f = f
self.scores = scores
if a:
self.a = a
self.rule.get().ComputeArity()
def __dealloc__(self):
del self.rule
property arity:
def __get__(self):
return self.rule.get().arity_
property f:
def __get__(self):
cdef vector[WordID]* f_ = &self.rule.get().f_
cdef WordID w
cdef f = []
cdef unsigned i
cdef int idx = 0
for i in range(f_.size()):
w = f_[0][i]
if w < 0:
idx += 1
f.append(NT(TDConvert(-w), idx))
else:
f.append(unicode(TDConvert(w), encoding='utf8'))
return f
def __set__(self, f):
cdef vector[WordID]* f_ = &self.rule.get().f_
f_.resize(len(f))
cdef unsigned i
cdef int idx = 0
for i in range(len(f)):
if isinstance(f[i], NT):
f_[0][i] = -TDConvert(<char *>f[i].cat)
else:
f_[0][i] = TDConvert(<char *>as_str(f[i]))
property e:
def __get__(self):
cdef vector[WordID]* e_ = &self.rule.get().e_
cdef WordID w
cdef e = []
cdef unsigned i
cdef int idx = 0
for i in range(e_.size()):
w = e_[0][i]
if w < 1:
idx += 1
e.append(NTRef(1-w))
else:
e.append(unicode(TDConvert(w), encoding='utf8'))
return e
def __set__(self, e):
cdef vector[WordID]* e_ = &self.rule.get().e_
e_.resize(len(e))
cdef unsigned i
for i in range(len(e)):
if isinstance(e[i], NTRef):
e_[0][i] = 1-e[i].ref
else:
e_[0][i] = TDConvert(<char *>as_str(e[i]))
property a:
def __get__(self):
cdef unsigned i
cdef vector[grammar.AlignmentPoint]* a = &self.rule.get().a_
for i in range(a.size()):
yield (a[0][i].s_, a[0][i].t_)
def __set__(self, a):
cdef vector[grammar.AlignmentPoint]* a_ = &self.rule.get().a_
a_.resize(len(a))
cdef unsigned i
cdef int s, t
for i in range(len(a)):
s, t = a[i]
a_[0][i] = grammar.AlignmentPoint(s, t)
property scores:
def __get__(self):
cdef SparseVector scores = SparseVector.__new__(SparseVector)
scores.vector = new FastSparseVector[double](self.rule.get().scores_)
return scores
def __set__(self, scores):
cdef FastSparseVector[double]* scores_ = &self.rule.get().scores_
scores_.clear()
cdef int fid
cdef float fval
for fname, fval in scores.items():
fid = FDConvert(<char *>as_str(fname))
if fid < 0: raise KeyError(fname)
scores_.set_value(fid, fval)
property lhs:
def __get__(self):
return NT(TDConvert(-self.rule.get().lhs_))
def __set__(self, lhs):
if not isinstance(lhs, NT):
lhs = NT(lhs)
self.rule.get().lhs_ = -TDConvert(<char *>lhs.cat)
def __str__(self):
scores = ' '.join('%s=%s' % feat for feat in self.scores)
return '%s ||| %s ||| %s ||| %s' % (self.lhs,
_phrase(self.f), _phrase(self.e), scores)
cdef class MRule(TRule):
def __init__(self, lhs, rhs, scores, a=None):
cdef unsigned i = 1
e = []
for s in rhs:
if isinstance(s, NT):
e.append(NTRef(i))
i += 1
else:
e.append(s)
super(MRule, self).__init__(lhs, rhs, e, scores, a)
cdef class Grammar:
cdef shared_ptr[grammar.Grammar]* grammar
def __dealloc__(self):
del self.grammar
def __iter__(self):
cdef grammar.const_GrammarIter* root = self.grammar.get().GetRoot()
cdef grammar.const_RuleBin* rbin = root.GetRules()
cdef TRule trule
cdef unsigned i
for i in range(rbin.GetNumRules()):
trule = TRule.__new__(TRule)
trule.rule = new shared_ptr[grammar.TRule](rbin.GetIthRule(i))
yield trule
property name:
def __get__(self):
self.grammar.get().GetGrammarName().c_str()
def __set__(self, name):
self.grammar.get().SetGrammarName(string(<char *>name))
cdef class TextGrammar(Grammar):
def __cinit__(self, rules):
self.grammar = new shared_ptr[grammar.Grammar](new grammar.TextGrammar())
cdef grammar.TextGrammar* _g = <grammar.TextGrammar*> self.grammar.get()
for trule in rules:
if isinstance(trule, _sa.Rule):
trule = convert_rule(trule)
elif not isinstance(trule, TRule):
raise ValueError('the grammar should contain TRule objects')
_g.AddRule((<TRule> trule).rule[0])
|