diff options
Diffstat (limited to 'python/cdec/sa/rule.pxi')
-rw-r--r-- | python/cdec/sa/rule.pxi | 191 |
1 files changed, 191 insertions, 0 deletions
diff --git a/python/cdec/sa/rule.pxi b/python/cdec/sa/rule.pxi new file mode 100644 index 00000000..7fde3e06 --- /dev/null +++ b/python/cdec/sa/rule.pxi @@ -0,0 +1,191 @@ +from libc.stdlib cimport malloc, calloc, realloc, free, strtof, strtol +from libc.string cimport strsep, strcpy, strlen + +cdef class Phrase: + + def __cinit__(self, words): + cdef int i, j, n, n_vars + n_vars = 0 + n = len(words) + self.syms = <int *>malloc(n*sizeof(int)) + for i from 0 <= i < n: + self.syms[i] = words[i] + if sym_isvar(self.syms[i]): + n_vars += 1 + self.n = n + self.n_vars = n_vars + self.varpos = <int *>malloc(n_vars*sizeof(int)) + j = 0 + for i from 0 <= i < n: + if sym_isvar(self.syms[i]): + self.varpos[j] = i + j = j + 1 + + def __dealloc__(self): + free(self.syms) + free(self.varpos) + + def __str__(self): + strs = [] + cdef int i, s + for i from 0 <= i < self.n: + s = self.syms[i] + strs.append(sym_tostring(s)) + return ' '.join(strs) + + def handle(self): + """return a hashable representation that normalizes the ordering + of the nonterminal indices""" + norm = [] + cdef int i, j, s + i = 1 + j = 0 + for j from 0 <= j < self.n: + s = self.syms[j] + if sym_isvar(s): + s = sym_setindex(s,i) + i = i + 1 + norm.append(s) + return tuple(norm) + + def strhandle(self): + norm = [] + cdef int i, j, s + i = 1 + j = 0 + for j from 0 <= j < self.n: + s = self.syms[j] + if sym_isvar(s): + s = sym_setindex(s,i) + i = i + 1 + norm.append(sym_tostring(s)) + return ' '.join(norm) + + def arity(self): + return self.n_vars + + def getvarpos(self, i): + if 0 <= i < self.n_vars: + return self.varpos[i] + else: + raise IndexError + + def getvar(self, i): + if 0 <= i < self.n_vars: + return self.syms[self.varpos[i]] + else: + raise IndexError + + cdef int chunkpos(self, int k): + if k == 0: + return 0 + else: + return self.varpos[k-1]+1 + + cdef int chunklen(self, int k): + if self.n_vars == 0: + return self.n + elif k == 0: + return self.varpos[0] + elif k == self.n_vars: + return self.n-self.varpos[k-1]-1 + else: + return self.varpos[k]-self.varpos[k-1]-1 + + def clen(self, k): + return self.chunklen(k) + + def getchunk(self, ci): + cdef int start, stop + start = self.chunkpos(ci) + stop = start+self.chunklen(ci) + chunk = [] + for i from start <= i < stop: + chunk.append(self.syms[i]) + return chunk + + def __cmp__(self, other): + cdef Phrase otherp + cdef int i + otherp = other + for i from 0 <= i < min(self.n, otherp.n): + if self.syms[i] < otherp.syms[i]: + return -1 + elif self.syms[i] > otherp.syms[i]: + return 1 + if self.n < otherp.n: + return -1 + elif self.n > otherp.n: + return 1 + else: + return 0 + + def __hash__(self): + cdef int i + cdef unsigned h + h = 0 + for i from 0 <= i < self.n: + if self.syms[i] > 0: + h = (h << 1) + self.syms[i] + else: + h = (h << 1) + -self.syms[i] + return h + + def __len__(self): + return self.n + + def __getitem__(self, i): + return self.syms[i] + + def __iter__(self): + cdef int i + for i from 0 <= i < self.n: + yield self.syms[i] + + def subst(self, start, children): + cdef int i + for i from 0 <= i < self.n: + if sym_isvar(self.syms[i]): + start = start + children[sym_getindex(self.syms[i])-1] + else: + start = start + (self.syms[i],) + return start + + property words: + def __get__(self): + return [sym_tostring(w) for w in self if not sym_isvar(w)] + +cdef class Rule: + + def __cinit__(self, int lhs, Phrase f, Phrase e, scores=None, word_alignments=None): + if not sym_isvar(lhs): raise Exception('Invalid LHS symbol: %d' % lhs) + self.lhs = lhs + self.f = f + self.e = e + self.word_alignments = word_alignments + self.scores = scores + + def __hash__(self): + return hash((self.lhs, self.f, self.e)) + + def __cmp__(self, Rule other): + return cmp((self.lhs, self.f, self.e, self.word_alignments), + (other.lhs, other.f, other.e, self.word_alignments)) + + def fmerge(self, Phrase f): + if self.f == f: + self.f = f + + def arity(self): + return self.f.arity() + + def __str__(self): + cdef unsigned i + fields = [sym_tostring(self.lhs), str(self.f), str(self.e), str(self.scores)] + if self.word_alignments is not None: + fields.append(' '.join('%d-%d' % a for a in self.alignments())) + return ' ||| '.join(fields) + + def alignments(self): + for point in self.word_alignments: + yield point / ALIGNMENT_CODE, point % ALIGNMENT_CODE |