diff options
Diffstat (limited to 'decoder/hg.h')
-rw-r--r-- | decoder/hg.h | 40 |
1 files changed, 28 insertions, 12 deletions
diff --git a/decoder/hg.h b/decoder/hg.h index 0f8d21fd..4b6a6357 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -11,7 +11,7 @@ #include "prob.h" // if you define this, edges_ will be sorted -// (normally, just nodes_ are), but this can be quite +// (normally, just nodes_ are - root must be nodes_.back()), but this can be quite // slow #undef HG_EDGES_TOPO_SORTED @@ -25,12 +25,14 @@ class Hypergraph { typedef SmallVector TailNodeVector; // TODO get rid of cat_? + // TODO keep cat_ and add span and/or state? :) struct Node { - Node() : id_(), cat_() {} + Node() : id_(), cat_(), promise(1) {} int id_; // equal to this object's position in the nodes_ vector WordID cat_; // non-terminal category if <0, 0 if not set std::vector<int> in_edges_; // contents refer to positions in edges_ std::vector<int> out_edges_; // contents refer to positions in edges_ + double promise; // set in global pruning; in [0,infty) so that mean is 1. use: e.g. scale cube poplimit }; // TODO get rid of edge_prob_? (can be computed on the fly as the dot @@ -173,17 +175,30 @@ class Hypergraph { // note: if run_inside_algorithm is false, then consumers may be unhappy if you pruned nodes that are built on by nodes that are kept. void PruneEdges(const EdgeMask& prune_edge, bool run_inside_algorithm = false); - // for density>=1.0, keep this many times the edges needed for the 1best derivation - // if you don't know, use_sum_prod_semiring should be false - void DensityPruneInsideOutside(const double scale, const bool use_sum_prod_semiring, const double density,const EdgeMask* preserve_mask = NULL,bool safe_inside=false); - /// drop edge i if edge_margin[i] < prune_below, unless preserve_mask[i] void MarginPrune(EdgeProbs const& edge_margin,prob_t prune_below,EdgeMask const* preserve_mask=0,bool safe_inside=false,bool verbose=false); - // safe_inside: if true, a theoretically redundant (but practically important .001% of the time due to rounding error) inside pruning pass will happen after max-marginal pruning. if you don't do this, it's possible that the pruned hypergraph will contain outside-reachable (but not inside-buildable) nodes. that is, a parent will be kept whose children were pruned. if output, those forests may confuse (crash) e.g. mr_vest_map. however, if the hyperedges occur in defined-before-use (all edges with head h occur before h is used as a tail) order, then a grace margin for keeping edges that starts leniently and becomes more forbidding will make it impossible for this to occur, i.e. safe_inside=true is not needed. + // promise[i]=((max_marginal[i]/viterbi)^power).todouble. if normalize, ensure that avg promise is 1. + typedef EdgeProbs NodeProbs; + void SetPromise(NodeProbs const& inside,NodeProbs const& outside, double power=1, bool normalize=true); + + // beam_alpha=0 means don't beam prune, otherwise drop things that are e^beam_alpha times worse than best - // prunes any edge whose prob_t on the best path taking that edge is more than e^alpha times + //density=0 means don't density prune: // for density>=1.0, keep this many times the edges needed for the 1best derivation + // worse than the score of the global best past (or the highest edge posterior) + // scale is for use_sum_prod_semiring (sharpens distribution?) + // promise_power is for a call to SetPromise (no call happens if power=0) + // returns true if density pruning was tighter than beam + // safe_inside would be a redundant anti-rounding error second bottom-up reachability before actually removing edges, to prevent stranded edges. shouldn't be needed - if the hyperedges occur in defined-before-use (all edges with head h occur before h is used as a tail) order, then a grace margin for keeping edges that starts leniently and becomes more forbidding will make it impossible for this to occur, i.e. safe_inside=true is not needed. + bool PruneInsideOutside(double beam_alpha,double density,const EdgeMask* preserve_mask = NULL,const bool use_sum_prod_semiring=false, const double scale=1,double promise_power=0,bool safe_inside=false); + + // legacy: + void DensityPruneInsideOutside(const double scale, const bool use_sum_prod_semiring, const double density,const EdgeMask* preserve_mask = NULL) { + PruneInsideOutside(density,0,preserve_mask,use_sum_prod_semiring,scale); + } - // prunes any edge whose score on the best path taking that edge is more than alpha away - // from the score of the global best past (or the highest edge posterior) - void BeamPruneInsideOutside(const double scale, const bool use_sum_prod_semiring, const double alpha,const EdgeMask* preserve_mask = NULL,bool safe_inside=false); + // legacy: + void BeamPruneInsideOutside(const double scale, const bool use_sum_prod_semiring, const double alpha,const EdgeMask* preserve_mask = NULL) { + PruneInsideOutside(0,alpha,preserve_mask,use_sum_prod_semiring,scale); + } // report nodes, edges, paths std::string stats(std::string const& name="forest") const; @@ -246,8 +261,9 @@ struct ScaledEdgeProb { // see Li (2010), Section 3.2.2-- this is 'x_e = p_e*r_e' struct EdgeFeaturesAndProbWeightFunction { - typedef const SparseVector<prob_t> Weight; - inline const SparseVector<prob_t> operator()(const Hypergraph::Edge& e) const { + typedef SparseVector<prob_t> Weight; + typedef Weight Result; //TODO: change Result->Weight everywhere? + inline const Weight operator()(const Hypergraph::Edge& e) const { SparseVector<prob_t> res; for (SparseVector<double>::const_iterator it = e.feature_values_.begin(); it != e.feature_values_.end(); ++it) |