from libc.stdlib cimport malloc, calloc, realloc, free, strtof, strtol from libc.string cimport strsep, strcpy, strlen cdef class Phrase: def __cinit__(self, words): cdef int i, j, n, n_vars n_vars = 0 n = len(words) self.syms = <int *>malloc(n*sizeof(int)) for i from 0 <= i < n: self.syms[i] = words[i] if sym_isvar(self.syms[i]): n_vars += 1 self.n = n self.n_vars = n_vars self.varpos = <int *>malloc(n_vars*sizeof(int)) j = 0 for i from 0 <= i < n: if sym_isvar(self.syms[i]): self.varpos[j] = i j = j + 1 def __dealloc__(self): free(self.syms) free(self.varpos) def __str__(self): strs = [] cdef int i, s for i from 0 <= i < self.n: s = self.syms[i] strs.append(sym_tostring(s)) return ' '.join(strs) def handle(self): """return a hashable representation that normalizes the ordering of the nonterminal indices""" norm = [] cdef int i, j, s i = 1 j = 0 for j from 0 <= j < self.n: s = self.syms[j] if sym_isvar(s): s = sym_setindex(s,i) i = i + 1 norm.append(s) return tuple(norm) def strhandle(self): norm = [] cdef int i, j, s i = 1 j = 0 for j from 0 <= j < self.n: s = self.syms[j] if sym_isvar(s): s = sym_setindex(s,i) i = i + 1 norm.append(sym_tostring(s)) return ' '.join(norm) def arity(self): return self.n_vars def getvarpos(self, i): if 0 <= i < self.n_vars: return self.varpos[i] else: raise IndexError def getvar(self, i): if 0 <= i < self.n_vars: return self.syms[self.varpos[i]] else: raise IndexError cdef int chunkpos(self, int k): if k == 0: return 0 else: return self.varpos[k-1]+1 cdef int chunklen(self, int k): if self.n_vars == 0: return self.n elif k == 0: return self.varpos[0] elif k == self.n_vars: return self.n-self.varpos[k-1]-1 else: return self.varpos[k]-self.varpos[k-1]-1 def clen(self, k): return self.chunklen(k) def getchunk(self, ci): cdef int start, stop start = self.chunkpos(ci) stop = start+self.chunklen(ci) chunk = [] for i from start <= i < stop: chunk.append(self.syms[i]) return chunk def __cmp__(self, other): cdef Phrase otherp cdef int i otherp = other for i from 0 <= i < min(self.n, otherp.n): if self.syms[i] < otherp.syms[i]: return -1 elif self.syms[i] > otherp.syms[i]: return 1 if self.n < otherp.n: return -1 elif self.n > otherp.n: return 1 else: return 0 def __hash__(self): cdef int i cdef unsigned h h = 0 for i from 0 <= i < self.n: if self.syms[i] > 0: h = (h << 1) + self.syms[i] else: h = (h << 1) + -self.syms[i] return h def __len__(self): return self.n def __getitem__(self, i): return self.syms[i] def __iter__(self): cdef int i for i from 0 <= i < self.n: yield self.syms[i] def subst(self, start, children): cdef int i for i from 0 <= i < self.n: if sym_isvar(self.syms[i]): start = start + children[sym_getindex(self.syms[i])-1] else: start = start + (self.syms[i],) return start property words: def __get__(self): return [sym_tostring(w) for w in self if not sym_isvar(w)] cdef class Rule: def __cinit__(self, int lhs, Phrase f, Phrase e, scores=None, word_alignments=None): if not sym_isvar(lhs): raise Exception('Invalid LHS symbol: %d' % lhs) self.lhs = lhs self.f = f self.e = e self.word_alignments = word_alignments self.scores = scores def __hash__(self): return hash((self.lhs, self.f, self.e)) def __cmp__(self, Rule other): return cmp((self.lhs, self.f, self.e, self.word_alignments), (other.lhs, other.f, other.e, self.word_alignments)) def fmerge(self, Phrase f): if self.f == f: self.f = f def arity(self): return self.f.arity() def __str__(self): cdef unsigned i fields = [sym_tostring(self.lhs), str(self.f), str(self.e), str(self.scores)] if self.word_alignments is not None: fields.append(' '.join('%d-%d' % a for a in self.alignments())) return ' ||| '.join(fields) def alignments(self): for point in self.word_alignments: yield point / ALIGNMENT_CODE, point % ALIGNMENT_CODE