diff options
Diffstat (limited to 'python/src/grammar.pxi')
-rw-r--r-- | python/src/grammar.pxi | 56 |
1 files changed, 35 insertions, 21 deletions
diff --git a/python/src/grammar.pxi b/python/src/grammar.pxi index a9a5ea14..d523e4d2 100644 --- a/python/src/grammar.pxi +++ b/python/src/grammar.pxi @@ -8,7 +8,8 @@ def _phrase(phrase): cdef class NT: cdef public bytes cat cdef public unsigned ref - def __init__(self, char* cat, unsigned ref=0): + def __init__(self, bytes cat, unsigned ref=0): + """NT(bytes cat, int ref=0) -> Non-terminal from category `cat`.""" self.cat = cat self.ref = ref @@ -20,17 +21,15 @@ cdef class NT: cdef class NTRef: cdef public unsigned ref def __init__(self, unsigned ref): + """NTRef(int ref) -> Non-terminal reference.""" self.ref = ref def __str__(self): return '[%d]' % self.ref cdef TRule convert_rule(_sa.Rule rule): - cdef unsigned i - cdef lhs = _sa.sym_tocat(rule.lhs) - cdef scores = {} - for i in range(rule.n_scores): - scores['PhraseModel_'+str(i)] = rule.cscores[i] + lhs = _sa.sym_tocat(rule.lhs) + scores = dict(rule.scores) f, e = [], [] cdef int* fsyms = rule.f.syms for i in range(rule.f.n): @@ -44,13 +43,19 @@ cdef TRule convert_rule(_sa.Rule rule): e.append(NTRef(_sa.sym_getindex(esyms[i]))) else: e.append(_sa.sym_tostring(esyms[i])) - cdef a = [(point/65536, point%65536) for point in rule.word_alignments] + a = list(rule.alignments()) return TRule(lhs, f, e, scores, a) cdef class TRule: cdef shared_ptr[grammar.TRule]* rule def __init__(self, lhs, f, e, scores, a=None): + """TRule(lhs, f, e, scores, a=None) -> Translation rule. + lhs: left hand side non-terminal + f: source phrase (list of words/NT) + e: target phrase (list of words/NTRef) + scores: dictionary of feature scores + a: optional list of alignment points""" self.rule = new shared_ptr[grammar.TRule](new grammar.TRule()) self.lhs = lhs self.e = e @@ -78,9 +83,9 @@ cdef class TRule: w = f_[0][i] if w < 0: idx += 1 - f.append(NT(TDConvert(-w), idx)) + f.append(NT(TDConvert(-w).c_str(), idx)) else: - f.append(unicode(TDConvert(w), encoding='utf8')) + f.append(unicode(TDConvert(w).c_str(), encoding='utf8')) return f def __set__(self, f): @@ -90,9 +95,10 @@ cdef class TRule: cdef int idx = 0 for i in range(len(f)): if isinstance(f[i], NT): - f_[0][i] = -TDConvert(<char *>f[i].cat) + f_[0][i] = -TDConvert((<NT> f[i]).cat) else: - f_[0][i] = TDConvert(<char *>as_str(f[i])) + fi = as_str(f[i]) + f_[0][i] = TDConvert(fi) property e: def __get__(self): @@ -107,7 +113,7 @@ cdef class TRule: idx += 1 e.append(NTRef(1-w)) else: - e.append(unicode(TDConvert(w), encoding='utf8')) + e.append(unicode(TDConvert(w).c_str(), encoding='utf8')) return e def __set__(self, e): @@ -118,7 +124,8 @@ cdef class TRule: if isinstance(e[i], NTRef): e_[0][i] = 1-e[i].ref else: - e_[0][i] = TDConvert(<char *>as_str(e[i])) + ei = as_str(e[i]) + e_[0][i] = TDConvert(ei) property a: def __get__(self): @@ -148,18 +155,19 @@ cdef class TRule: cdef int fid cdef float fval for fname, fval in scores.items(): - fid = FDConvert(<char *>as_str(fname)) + fn = as_str(fname) + fid = FDConvert(fn) if fid < 0: raise KeyError(fname) scores_.set_value(fid, fval) property lhs: def __get__(self): - return NT(TDConvert(-self.rule.get().lhs_)) + return NT(TDConvert(-self.rule.get().lhs_).c_str()) def __set__(self, lhs): if not isinstance(lhs, NT): lhs = NT(lhs) - self.rule.get().lhs_ = -TDConvert(<char *>lhs.cat) + self.rule.get().lhs_ = -TDConvert((<NT> lhs).cat) def __str__(self): scores = ' '.join('%s=%s' % feat for feat in self.scores) @@ -167,7 +175,11 @@ cdef class TRule: _phrase(self.f), _phrase(self.e), scores) cdef class MRule(TRule): - def __init__(self, lhs, rhs, scores, a=None): + def __init__(self, lhs, rhs, scores): + """MRule(lhs, rhs, scores, a=None) -> Monolingual rule. + lhs: left hand side non-terminal + rhs: right hand side phrase (list of words/NT) + scores: dictionary of feature scores""" cdef unsigned i = 1 e = [] for s in rhs: @@ -176,7 +188,7 @@ cdef class MRule(TRule): i += 1 else: e.append(s) - super(MRule, self).__init__(lhs, rhs, e, scores, a) + super(MRule, self).__init__(lhs, rhs, e, scores, None) cdef class Grammar: cdef shared_ptr[grammar.Grammar]* grammar @@ -196,13 +208,15 @@ cdef class Grammar: property name: def __get__(self): - self.grammar.get().GetGrammarName().c_str() + str(self.grammar.get().GetGrammarName().c_str()) def __set__(self, name): - self.grammar.get().SetGrammarName(string(<char *>name)) + name = as_str(name) + self.grammar.get().SetGrammarName(name) cdef class TextGrammar(Grammar): - def __cinit__(self, rules): + def __init__(self, rules): + """TextGrammar(rules) -> SCFG Grammar containing the rules.""" self.grammar = new shared_ptr[grammar.Grammar](new grammar.TextGrammar()) cdef grammar.TextGrammar* _g = <grammar.TextGrammar*> self.grammar.get() for trule in rules: |