summaryrefslogtreecommitdiff
path: root/python/src/hypergraph.pxi
diff options
context:
space:
mode:
authorVictor Chahuneau <vchahune@cs.cmu.edu>2013-08-26 20:12:32 -0400
committerVictor Chahuneau <vchahune@cs.cmu.edu>2013-08-26 20:12:32 -0400
commit03799a2d330c6dbbe12154d4bcea236210b4f6ed (patch)
tree7adb0bc8dd2987fa32ee1299d8821dd8b7b06706 /python/src/hypergraph.pxi
parent8b491e57f8a011f4f8496e44bed7eb7a4360bc93 (diff)
Improve the package structure of pycdec
This change should not break anything, but now you can run: python setup.py build_ext --inplace and use the cleaner: PYTHONPATH=/path/to/cdec/python python -m ...
Diffstat (limited to 'python/src/hypergraph.pxi')
-rw-r--r--python/src/hypergraph.pxi281
1 files changed, 0 insertions, 281 deletions
diff --git a/python/src/hypergraph.pxi b/python/src/hypergraph.pxi
deleted file mode 100644
index 4a709d32..00000000
--- a/python/src/hypergraph.pxi
+++ /dev/null
@@ -1,281 +0,0 @@
-cimport hypergraph
-cimport kbest
-
-cdef class Hypergraph:
- cdef hypergraph.Hypergraph* hg
- cdef MT19937* rng
-
- def __dealloc__(self):
- del self.hg
- if self.rng != NULL:
- del self.rng
-
- cdef MT19937* _rng(self):
- if self.rng == NULL:
- self.rng = new MT19937()
- return self.rng
-
- def viterbi(self):
- """hg.viterbi() -> String for the best hypothesis in the hypergraph."""
- cdef vector[WordID] trans
- hypergraph.ViterbiESentence(self.hg[0], &trans)
- return unicode(GetString(trans).c_str(), 'utf8')
-
- def viterbi_trees(self):
- """hg.viterbi_trees() -> (f_tree, e_tree)
- f_tree: Source tree for the best hypothesis in the hypergraph.
- e_tree: Target tree for the best hypothesis in the hypergraph.
- """
- f_tree = unicode(hypergraph.ViterbiFTree(self.hg[0]).c_str(), 'utf8')
- e_tree = unicode(hypergraph.ViterbiETree(self.hg[0]).c_str(), 'utf8')
- return (f_tree, e_tree)
-
- def viterbi_features(self):
- """hg.viterbi_features() -> SparseVector with the features corresponding
- to the best derivation in the hypergraph."""
- cdef SparseVector fmap = SparseVector.__new__(SparseVector)
- fmap.vector = new FastSparseVector[weight_t](hypergraph.ViterbiFeatures(self.hg[0]))
- return fmap
-
- def viterbi_forest(self):
- cdef Hypergraph hg = Hypergraph()
- hg.hg = new hypergraph.Hypergraph(self.hg[0].CreateViterbiHypergraph(NULL).get()[0])
- return hg
-
- def viterbi_joshua(self):
- """hg.viterbi_joshua() -> Joshua representation of the best derivation."""
- return unicode(hypergraph.JoshuaVisualizationString(self.hg[0]).c_str(), 'utf8')
-
- def kbest(self, size):
- """hg.kbest(size) -> List of k-best hypotheses in the hypergraph."""
- cdef kbest.KBestDerivations[vector[WordID], kbest.ESentenceTraversal]* derivations = new kbest.KBestDerivations[vector[WordID], kbest.ESentenceTraversal](self.hg[0], size)
- cdef kbest.KBestDerivations[vector[WordID], kbest.ESentenceTraversal].Derivation* derivation
- cdef unsigned k
- try:
- for k in range(size):
- derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k)
- if not derivation: break
- yield unicode(GetString(derivation._yield).c_str(), 'utf8')
- finally:
- del derivations
-
- def kbest_trees(self, size):
- """hg.kbest_trees(size) -> List of k-best trees in the hypergraph."""
- cdef kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal]* f_derivations = new kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal](self.hg[0], size)
- cdef kbest.KBestDerivations[vector[WordID], kbest.FTreeTraversal].Derivation* f_derivation
- cdef kbest.KBestDerivations[vector[WordID], kbest.ETreeTraversal]* e_derivations = new kbest.KBestDerivations[vector[WordID], kbest.ETreeTraversal](self.hg[0], size)
- cdef kbest.KBestDerivations[vector[WordID], kbest.ETreeTraversal].Derivation* e_derivation
- cdef unsigned k
- try:
- for k in range(size):
- f_derivation = f_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k)
- e_derivation = e_derivations.LazyKthBest(self.hg.nodes_.size() - 1, k)
- if not f_derivation or not e_derivation: break
- f_tree = unicode(GetString(f_derivation._yield).c_str(), 'utf8')
- e_tree = unicode(GetString(e_derivation._yield).c_str(), 'utf8')
- yield (f_tree, e_tree)
- finally:
- del f_derivations
- del e_derivations
-
- def kbest_features(self, size):
- """hg.kbest_trees(size) -> List of k-best feature vectors in the hypergraph."""
- cdef kbest.KBestDerivations[FastSparseVector[weight_t], kbest.FeatureVectorTraversal]* derivations = new kbest.KBestDerivations[FastSparseVector[weight_t], kbest.FeatureVectorTraversal](self.hg[0], size)
- cdef kbest.KBestDerivations[FastSparseVector[weight_t], kbest.FeatureVectorTraversal].Derivation* derivation
- cdef SparseVector fmap
- cdef unsigned k
- try:
- for k in range(size):
- derivation = derivations.LazyKthBest(self.hg.nodes_.size() - 1, k)
- if not derivation: break
- fmap = SparseVector.__new__(SparseVector)
- fmap.vector = new FastSparseVector[weight_t](derivation._yield)
- yield fmap
- finally:
- del derivations
-
- def sample(self, unsigned n):
- """hg.sample(n) -> Sample of n hypotheses from the hypergraph."""
- cdef vector[hypergraph.Hypothesis]* hypos = new vector[hypergraph.Hypothesis]()
- hypergraph.sample_hypotheses(self.hg[0], n, self._rng(), hypos)
- cdef unsigned k
- try:
- for k in range(hypos.size()):
- yield unicode(GetString(hypos[0][k].words).c_str(), 'utf8')
- finally:
- del hypos
-
- def sample_trees(self, unsigned n):
- """hg.sample_trees(n) -> Sample of n trees from the hypergraph."""
- cdef vector[string]* trees = new vector[string]()
- hypergraph.sample_trees(self.hg[0], n, self._rng(), trees)
- cdef unsigned k
- try:
- for k in range(trees.size()):
- yield unicode(trees[0][k].c_str(), 'utf8')
- finally:
- del trees
-
- def intersect(self, inp):
- """hg.intersect(Lattice/string): Intersect the hypergraph with the provided reference."""
- cdef Lattice lat
- if isinstance(inp, Lattice):
- lat = <Lattice> inp
- elif isinstance(inp, basestring):
- lat = Lattice(inp)
- else:
- raise TypeError('cannot intersect hypergraph with %s' % type(inp))
- return hypergraph.Intersect(lat.lattice[0], self.hg)
-
- def prune(self, beam_alpha=0, density=0, **kwargs):
- """hg.prune(beam_alpha=0, density=0): Prune the hypergraph.
- beam_alpha: use beam pruning
- density: use density pruning"""
- cdef hypergraph.EdgeMask* preserve_mask = NULL
- if 'csplit_preserve_full_word' in kwargs:
- preserve_mask = new hypergraph.EdgeMask(self.hg.edges_.size())
- preserve_mask[0][hypergraph.GetFullWordEdgeIndex(self.hg[0])] = True
- self.hg.PruneInsideOutside(beam_alpha, density, preserve_mask, False, 1, False)
- if preserve_mask:
- del preserve_mask
-
- def lattice(self): # TODO direct hg -> lattice conversion in cdec
- """hg.lattice() -> Lattice corresponding to the hypergraph."""
- cdef bytes plf = hypergraph.AsPLF(self.hg[0], True).c_str()
- return Lattice(eval(plf))
-
- def plf(self):
- """hg.plf() -> Lattice PLF representation corresponding to the hypergraph."""
- return bytes(hypergraph.AsPLF(self.hg[0], True).c_str())
-
- def reweight(self, weights):
- """hg.reweight(SparseVector/DenseVector): Reweight the hypergraph with a new vector."""
- if isinstance(weights, SparseVector):
- self.hg.Reweight((<SparseVector> weights).vector[0])
- elif isinstance(weights, DenseVector):
- self.hg.Reweight((<DenseVector> weights).vector[0])
- else:
- raise TypeError('cannot reweight hypergraph with %s' % type(weights))
-
- property edges:
- def __get__(self):
- cdef unsigned i
- for i in range(self.hg.edges_.size()):
- yield HypergraphEdge().init(self.hg, i)
-
- property nodes:
- def __get__(self):
- cdef unsigned i
- for i in range(self.hg.nodes_.size()):
- yield HypergraphNode().init(self.hg, i)
-
- property goal:
- def __get__(self):
- return HypergraphNode().init(self.hg, self.hg.GoalNode())
-
- property npaths:
- def __get__(self):
- return self.hg.NumberOfPaths()
-
- def inside_outside(self):
- """hg.inside_outside() -> SparseVector with inside-outside scores for each feature."""
- cdef FastSparseVector[prob_t]* result = new FastSparseVector[prob_t]()
- cdef prob_t z = hypergraph.InsideOutside(self.hg[0], result)
- result[0] /= z
- cdef SparseVector vector = SparseVector.__new__(SparseVector)
- vector.vector = new FastSparseVector[double]()
- cdef FastSparseVector[prob_t].const_iterator* it = new FastSparseVector[prob_t].const_iterator(result[0], False)
- cdef unsigned i
- for i in range(result.size()):
- vector.vector.set_value(it[0].ptr().first, log(it[0].ptr().second))
- pinc(it[0]) # ++it
- del it
- del result
- return vector
-
-cdef class HypergraphEdge:
- cdef hypergraph.Hypergraph* hg
- cdef hypergraph.HypergraphEdge* edge
- cdef public TRule trule
-
- cdef init(self, hypergraph.Hypergraph* hg, unsigned i):
- self.hg = hg
- self.edge = &hg.edges_[i]
- self.trule = TRule.__new__(TRule)
- self.trule.rule = new shared_ptr[grammar.TRule](self.edge.rule_)
- return self
-
- def __len__(self):
- return self.edge.tail_nodes_.size()
-
- property head_node:
- def __get__(self):
- return HypergraphNode().init(self.hg, self.edge.head_node_)
-
- property tail_nodes:
- def __get__(self):
- cdef unsigned i
- for i in range(self.edge.tail_nodes_.size()):
- yield HypergraphNode().init(self.hg, self.edge.tail_nodes_[i])
-
- property span:
- def __get__(self):
- return (self.edge.i_, self.edge.j_)
-
- property src_span:
- def __get__(self):
- return (self.edge.prev_i_, self.edge.prev_j_)
-
- property feature_values:
- def __get__(self):
- cdef SparseVector vector = SparseVector.__new__(SparseVector)
- vector.vector = new FastSparseVector[double](self.edge.feature_values_)
- return vector
-
- property prob:
- def __get__(self):
- return self.edge.edge_prob_.as_float()
-
- def __richcmp__(HypergraphEdge x, HypergraphEdge y, int op):
- if op == 2: # ==
- return x.edge == y.edge
- elif op == 3: # !=
- return not (x == y)
- raise NotImplemented('comparison not implemented for HypergraphEdge')
-
-cdef class HypergraphNode:
- cdef hypergraph.Hypergraph* hg
- cdef hypergraph.HypergraphNode* node
-
- cdef init(self, hypergraph.Hypergraph* hg, unsigned i):
- self.hg = hg
- self.node = &hg.nodes_[i]
- return self
-
- property in_edges:
- def __get__(self):
- cdef unsigned i
- for i in range(self.node.in_edges_.size()):
- yield HypergraphEdge().init(self.hg, self.node.in_edges_[i])
-
- property out_edges:
- def __get__(self):
- cdef unsigned i
- for i in range(self.node.out_edges_.size()):
- yield HypergraphEdge().init(self.hg, self.node.out_edges_[i])
-
- property span:
- def __get__(self):
- return next(self.in_edges).span
-
- property cat:
- def __get__(self):
- if self.node.cat_:
- return str(TDConvert(-self.node.cat_).c_str())
-
- def __richcmp__(HypergraphNode x, HypergraphNode y, int op):
- if op == 2: # ==
- return x.node == y.node
- elif op == 3: # !=
- return not (x == y)
- raise NotImplemented('comparison not implemented for HypergraphNode')