[python] Support for grammars

- Translation rules can now be create programatically - Grammars = list of translation rules can be used for translation - Feature expectations on the hypergraph (inside_outside)
author: Victor Chahuneau <vchahune@cs.cmu.edu> 2012-07-21 01:22:53 -0400
committer: Victor Chahuneau <vchahune@cs.cmu.edu> 2012-07-21 01:22:53 -0400
commit: 06f90d83a1feafad301d365a4a437e44f68be45b (patch)
tree: 24128de1cb5a4767151f9380c46104a26121535d /python/src/grammar.pxi
parent: c4c9c2febd5af552ecddc215758e32b88013fbc7 (diff)
1 files changed, 176 insertions, 0 deletions
diff --git a/python/src/grammar.pxi b/python/src/grammar.pxi
new file mode 100644
index 00000000..80d9fbf5
--- /dev/null
+++ b/python/src/grammar.pxi
@@ -0,0 +1,176 @@
+cimport grammar
+
+def _phrase(phrase):
+    return ' '.join(w.encode('utf8') if isinstance(w, unicode) else str(w) for w in phrase)
+
+cdef class NT:
+    cdef public char* cat
+    cdef public unsigned ref
+    def __init__(self, cat, ref=0):
+        self.cat = cat
+        self.ref = ref
+
+    def __str__(self):
+        if self.ref > 0:
+            return '[%s,%d]' % (self.cat, self.ref)
+        return '[%s]' % self.cat
+
+cdef class NTRef:
+    cdef public unsigned ref
+    def __init__(self, ref):
+        self.ref = ref
+
+    def __str__(self):
+        return '[%d]' % self.ref
+
+cdef class BaseTRule:
+    cdef shared_ptr[grammar.TRule]* rule
+
+    def __dealloc__(self):
+        del self.rule
+
+    property arity:
+        def __get__(self):
+            return self.rule.get().arity_
+
+    property f:
+        def __get__(self):
+            cdef vector[WordID]* f_ = &self.rule.get().f_
+            cdef WordID w
+            cdef f = []
+            cdef unsigned i
+            cdef int idx = 0
+            for i in range(f_.size()):
+                w = f_[0][i]
+                if w < 0:
+                    idx += 1
+                    f.append(NT(TDConvert(-w), idx))
+                else:
+                    f.append(unicode(TDConvert(w), encoding='utf8'))
+            return f
+
+        def __set__(self, f):
+            cdef vector[WordID]* f_ = &self.rule.get().f_
+            f_.resize(len(f))
+            cdef unsigned i
+            cdef int idx = 0
+            for i in range(len(f)):
+                if isinstance(f[i], NT):
+                    f_[0][i] = -TDConvert(<char *>f[i].cat)
+                else:
+                    f_[0][i] = TDConvert(<char *>as_str(f[i]))
+
+    property e:
+        def __get__(self):
+            cdef vector[WordID]* e_ = &self.rule.get().e_
+            cdef WordID w
+            cdef e = []
+            cdef unsigned i
+            cdef int idx = 0
+            for i in range(e_.size()):
+                w = e_[0][i]
+                if w < 1:
+                    idx += 1
+                    e.append(NTRef(1-w))
+                else:
+                    e.append(unicode(TDConvert(w), encoding='utf8'))
+            return e
+
+        def __set__(self, e):
+            cdef vector[WordID]* e_ = &self.rule.get().e_
+            e_.resize(len(e))
+            cdef unsigned i
+            for i in range(len(e)):
+                if isinstance(e[i], NTRef):
+                    e_[0][i] = 1-e[i].ref
+                else:
+                    e_[0][i] = TDConvert(<char *>as_str(e[i]))
+
+    property a:
+        def __get__(self):
+            cdef unsigned i
+            cdef vector[grammar.AlignmentPoint]* a = &self.rule.get().a_
+            for i in range(a.size()):
+                yield (a[0][i].s_, a[0][i].t_)
+
+        def __set__(self, a):
+            cdef vector[grammar.AlignmentPoint]* a_ = &self.rule.get().a_
+            a_.resize(len(a))
+            cdef unsigned i
+            cdef int s, t
+            for i in range(len(a)):
+                s, t = a[i]
+                a_[0][i] = grammar.AlignmentPoint(s, t)
+
+    property scores:
+        def __get__(self):
+            cdef SparseVector scores = SparseVector()
+            scores.vector = new FastSparseVector[double](self.rule.get().scores_)
+            return scores
+
+        def __set__(self, scores):
+            cdef FastSparseVector[double]* scores_ = &self.rule.get().scores_
+            scores_.clear()
+            cdef int fid
+            cdef float fval
+            for fname, fval in scores.items():
+                fid = FDConvert(<char *>as_str(fname))
+                if fid < 0: raise KeyError(fname)
+                scores_.set_value(fid, fval)
+
+    property lhs:
+        def __get__(self):
+            return NT(TDConvert(-self.rule.get().lhs_))
+
+        def __set__(self, lhs):
+            if not isinstance(lhs, NT):
+                lhs = NT(lhs)
+            self.rule.get().lhs_ = -TDConvert(<char *>lhs.cat)
+
+    def __str__(self):
+        scores = ' '.join('%s=%s' % feat for feat in self.scores)
+        return '%s ||| %s ||| %s ||| %s' % (self.lhs,
+                _phrase(self.f), _phrase(self.e), scores)
+
+cdef class TRule(BaseTRule):
+    def __cinit__(self, lhs, f, e, scores, a=None):
+        self.rule = new shared_ptr[grammar.TRule](new grammar.TRule())
+        self.lhs = lhs
+        self.e = e
+        self.f = f
+        self.scores = scores
+        if a:
+            self.a = a
+        self.rule.get().ComputeArity()
+
+cdef class Grammar:
+    cdef shared_ptr[grammar.Grammar]* grammar
+    
+    def __dealloc__(self):
+        del self.grammar
+    
+    def __iter__(self):
+        cdef grammar.GrammarIter* root = self.grammar.get().GetRoot()
+        cdef grammar.RuleBin* rbin = root.GetRules()
+        cdef TRule trule
+        cdef unsigned i
+        for i in range(rbin.GetNumRules()):
+            trule = TRule()
+            trule.rule = new shared_ptr[grammar.TRule](rbin.GetIthRule(i))
+            yield trule
+
+    property name:
+        def __get__(self):
+            self.grammar.get().GetGrammarName().c_str()
+
+        def __set__(self, name):
+            self.grammar.get().SetGrammarName(string(<char *>name))
+
+cdef class TextGrammar(Grammar):
+    def __cinit__(self, rules):
+        self.grammar = new shared_ptr[grammar.Grammar](new grammar.TextGrammar())
+        cdef grammar.TextGrammar* _g = <grammar.TextGrammar*> self.grammar.get()
+        for trule in rules:
+            if not isinstance(trule, BaseTRule):
+                raise ValueError('the grammar should contain TRule objects')
+            _g.AddRule((<BaseTRule> trule).rule[0])
author	Victor Chahuneau <vchahune@cs.cmu.edu>	2012-07-21 01:22:53 -0400
committer	Victor Chahuneau <vchahune@cs.cmu.edu>	2012-07-21 01:22:53 -0400
commit	06f90d83a1feafad301d365a4a437e44f68be45b (patch)
tree	24128de1cb5a4767151f9380c46104a26121535d /python/src/grammar.pxi
parent	c4c9c2febd5af552ecddc215758e32b88013fbc7 (diff)