summaryrefslogtreecommitdiff
path: root/python/src/grammar.pxi
diff options
context:
space:
mode:
Diffstat (limited to 'python/src/grammar.pxi')
-rw-r--r--python/src/grammar.pxi56
1 files changed, 35 insertions, 21 deletions
diff --git a/python/src/grammar.pxi b/python/src/grammar.pxi
index a9a5ea14..d523e4d2 100644
--- a/python/src/grammar.pxi
+++ b/python/src/grammar.pxi
@@ -8,7 +8,8 @@ def _phrase(phrase):
cdef class NT:
cdef public bytes cat
cdef public unsigned ref
- def __init__(self, char* cat, unsigned ref=0):
+ def __init__(self, bytes cat, unsigned ref=0):
+ """NT(bytes cat, int ref=0) -> Non-terminal from category `cat`."""
self.cat = cat
self.ref = ref
@@ -20,17 +21,15 @@ cdef class NT:
cdef class NTRef:
cdef public unsigned ref
def __init__(self, unsigned ref):
+ """NTRef(int ref) -> Non-terminal reference."""
self.ref = ref
def __str__(self):
return '[%d]' % self.ref
cdef TRule convert_rule(_sa.Rule rule):
- cdef unsigned i
- cdef lhs = _sa.sym_tocat(rule.lhs)
- cdef scores = {}
- for i in range(rule.n_scores):
- scores['PhraseModel_'+str(i)] = rule.cscores[i]
+ lhs = _sa.sym_tocat(rule.lhs)
+ scores = dict(rule.scores)
f, e = [], []
cdef int* fsyms = rule.f.syms
for i in range(rule.f.n):
@@ -44,13 +43,19 @@ cdef TRule convert_rule(_sa.Rule rule):
e.append(NTRef(_sa.sym_getindex(esyms[i])))
else:
e.append(_sa.sym_tostring(esyms[i]))
- cdef a = [(point/65536, point%65536) for point in rule.word_alignments]
+ a = list(rule.alignments())
return TRule(lhs, f, e, scores, a)
cdef class TRule:
cdef shared_ptr[grammar.TRule]* rule
def __init__(self, lhs, f, e, scores, a=None):
+ """TRule(lhs, f, e, scores, a=None) -> Translation rule.
+ lhs: left hand side non-terminal
+ f: source phrase (list of words/NT)
+ e: target phrase (list of words/NTRef)
+ scores: dictionary of feature scores
+ a: optional list of alignment points"""
self.rule = new shared_ptr[grammar.TRule](new grammar.TRule())
self.lhs = lhs
self.e = e
@@ -78,9 +83,9 @@ cdef class TRule:
w = f_[0][i]
if w < 0:
idx += 1
- f.append(NT(TDConvert(-w), idx))
+ f.append(NT(TDConvert(-w).c_str(), idx))
else:
- f.append(unicode(TDConvert(w), encoding='utf8'))
+ f.append(unicode(TDConvert(w).c_str(), encoding='utf8'))
return f
def __set__(self, f):
@@ -90,9 +95,10 @@ cdef class TRule:
cdef int idx = 0
for i in range(len(f)):
if isinstance(f[i], NT):
- f_[0][i] = -TDConvert(<char *>f[i].cat)
+ f_[0][i] = -TDConvert((<NT> f[i]).cat)
else:
- f_[0][i] = TDConvert(<char *>as_str(f[i]))
+ fi = as_str(f[i])
+ f_[0][i] = TDConvert(fi)
property e:
def __get__(self):
@@ -107,7 +113,7 @@ cdef class TRule:
idx += 1
e.append(NTRef(1-w))
else:
- e.append(unicode(TDConvert(w), encoding='utf8'))
+ e.append(unicode(TDConvert(w).c_str(), encoding='utf8'))
return e
def __set__(self, e):
@@ -118,7 +124,8 @@ cdef class TRule:
if isinstance(e[i], NTRef):
e_[0][i] = 1-e[i].ref
else:
- e_[0][i] = TDConvert(<char *>as_str(e[i]))
+ ei = as_str(e[i])
+ e_[0][i] = TDConvert(ei)
property a:
def __get__(self):
@@ -148,18 +155,19 @@ cdef class TRule:
cdef int fid
cdef float fval
for fname, fval in scores.items():
- fid = FDConvert(<char *>as_str(fname))
+ fn = as_str(fname)
+ fid = FDConvert(fn)
if fid < 0: raise KeyError(fname)
scores_.set_value(fid, fval)
property lhs:
def __get__(self):
- return NT(TDConvert(-self.rule.get().lhs_))
+ return NT(TDConvert(-self.rule.get().lhs_).c_str())
def __set__(self, lhs):
if not isinstance(lhs, NT):
lhs = NT(lhs)
- self.rule.get().lhs_ = -TDConvert(<char *>lhs.cat)
+ self.rule.get().lhs_ = -TDConvert((<NT> lhs).cat)
def __str__(self):
scores = ' '.join('%s=%s' % feat for feat in self.scores)
@@ -167,7 +175,11 @@ cdef class TRule:
_phrase(self.f), _phrase(self.e), scores)
cdef class MRule(TRule):
- def __init__(self, lhs, rhs, scores, a=None):
+ def __init__(self, lhs, rhs, scores):
+ """MRule(lhs, rhs, scores, a=None) -> Monolingual rule.
+ lhs: left hand side non-terminal
+ rhs: right hand side phrase (list of words/NT)
+ scores: dictionary of feature scores"""
cdef unsigned i = 1
e = []
for s in rhs:
@@ -176,7 +188,7 @@ cdef class MRule(TRule):
i += 1
else:
e.append(s)
- super(MRule, self).__init__(lhs, rhs, e, scores, a)
+ super(MRule, self).__init__(lhs, rhs, e, scores, None)
cdef class Grammar:
cdef shared_ptr[grammar.Grammar]* grammar
@@ -196,13 +208,15 @@ cdef class Grammar:
property name:
def __get__(self):
- self.grammar.get().GetGrammarName().c_str()
+ str(self.grammar.get().GetGrammarName().c_str())
def __set__(self, name):
- self.grammar.get().SetGrammarName(string(<char *>name))
+ name = as_str(name)
+ self.grammar.get().SetGrammarName(name)
cdef class TextGrammar(Grammar):
- def __cinit__(self, rules):
+ def __init__(self, rules):
+ """TextGrammar(rules) -> SCFG Grammar containing the rules."""
self.grammar = new shared_ptr[grammar.Grammar](new grammar.TextGrammar())
cdef grammar.TextGrammar* _g = <grammar.TextGrammar*> self.grammar.get()
for trule in rules: