summaryrefslogtreecommitdiff
path: root/decoder/hg.h
diff options
context:
space:
mode:
authorgraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-12 03:42:39 +0000
committergraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-12 03:42:39 +0000
commit7220bd3685bb7cbd6a68749f55314892af5605c9 (patch)
tree524621fbdb2a2a3a4bfceb82bca72172f12412d1 /decoder/hg.h
parent1cf86da06f6a0ab5d9a78bd39a1627df142ea8c5 (diff)
inebriated, but sped up inside/outside pruning and made cube poplimit scale with promise of nodes from previous global pruning, if --promise_power=N for N>0.
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@219 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/hg.h')
-rw-r--r--decoder/hg.h40
1 files changed, 28 insertions, 12 deletions
diff --git a/decoder/hg.h b/decoder/hg.h
index 0f8d21fd..4b6a6357 100644
--- a/decoder/hg.h
+++ b/decoder/hg.h
@@ -11,7 +11,7 @@
#include "prob.h"
// if you define this, edges_ will be sorted
-// (normally, just nodes_ are), but this can be quite
+// (normally, just nodes_ are - root must be nodes_.back()), but this can be quite
// slow
#undef HG_EDGES_TOPO_SORTED
@@ -25,12 +25,14 @@ class Hypergraph {
typedef SmallVector TailNodeVector;
// TODO get rid of cat_?
+ // TODO keep cat_ and add span and/or state? :)
struct Node {
- Node() : id_(), cat_() {}
+ Node() : id_(), cat_(), promise(1) {}
int id_; // equal to this object's position in the nodes_ vector
WordID cat_; // non-terminal category if <0, 0 if not set
std::vector<int> in_edges_; // contents refer to positions in edges_
std::vector<int> out_edges_; // contents refer to positions in edges_
+ double promise; // set in global pruning; in [0,infty) so that mean is 1. use: e.g. scale cube poplimit
};
// TODO get rid of edge_prob_? (can be computed on the fly as the dot
@@ -173,17 +175,30 @@ class Hypergraph {
// note: if run_inside_algorithm is false, then consumers may be unhappy if you pruned nodes that are built on by nodes that are kept.
void PruneEdges(const EdgeMask& prune_edge, bool run_inside_algorithm = false);
- // for density>=1.0, keep this many times the edges needed for the 1best derivation
- // if you don't know, use_sum_prod_semiring should be false
- void DensityPruneInsideOutside(const double scale, const bool use_sum_prod_semiring, const double density,const EdgeMask* preserve_mask = NULL,bool safe_inside=false);
-
/// drop edge i if edge_margin[i] < prune_below, unless preserve_mask[i]
void MarginPrune(EdgeProbs const& edge_margin,prob_t prune_below,EdgeMask const* preserve_mask=0,bool safe_inside=false,bool verbose=false);
- // safe_inside: if true, a theoretically redundant (but practically important .001% of the time due to rounding error) inside pruning pass will happen after max-marginal pruning. if you don't do this, it's possible that the pruned hypergraph will contain outside-reachable (but not inside-buildable) nodes. that is, a parent will be kept whose children were pruned. if output, those forests may confuse (crash) e.g. mr_vest_map. however, if the hyperedges occur in defined-before-use (all edges with head h occur before h is used as a tail) order, then a grace margin for keeping edges that starts leniently and becomes more forbidding will make it impossible for this to occur, i.e. safe_inside=true is not needed.
+ // promise[i]=((max_marginal[i]/viterbi)^power).todouble. if normalize, ensure that avg promise is 1.
+ typedef EdgeProbs NodeProbs;
+ void SetPromise(NodeProbs const& inside,NodeProbs const& outside, double power=1, bool normalize=true);
+
+ // beam_alpha=0 means don't beam prune, otherwise drop things that are e^beam_alpha times worse than best - // prunes any edge whose prob_t on the best path taking that edge is more than e^alpha times
+ //density=0 means don't density prune: // for density>=1.0, keep this many times the edges needed for the 1best derivation
+ // worse than the score of the global best past (or the highest edge posterior)
+ // scale is for use_sum_prod_semiring (sharpens distribution?)
+ // promise_power is for a call to SetPromise (no call happens if power=0)
+ // returns true if density pruning was tighter than beam
+ // safe_inside would be a redundant anti-rounding error second bottom-up reachability before actually removing edges, to prevent stranded edges. shouldn't be needed - if the hyperedges occur in defined-before-use (all edges with head h occur before h is used as a tail) order, then a grace margin for keeping edges that starts leniently and becomes more forbidding will make it impossible for this to occur, i.e. safe_inside=true is not needed.
+ bool PruneInsideOutside(double beam_alpha,double density,const EdgeMask* preserve_mask = NULL,const bool use_sum_prod_semiring=false, const double scale=1,double promise_power=0,bool safe_inside=false);
+
+ // legacy:
+ void DensityPruneInsideOutside(const double scale, const bool use_sum_prod_semiring, const double density,const EdgeMask* preserve_mask = NULL) {
+ PruneInsideOutside(density,0,preserve_mask,use_sum_prod_semiring,scale);
+ }
- // prunes any edge whose score on the best path taking that edge is more than alpha away
- // from the score of the global best past (or the highest edge posterior)
- void BeamPruneInsideOutside(const double scale, const bool use_sum_prod_semiring, const double alpha,const EdgeMask* preserve_mask = NULL,bool safe_inside=false);
+ // legacy:
+ void BeamPruneInsideOutside(const double scale, const bool use_sum_prod_semiring, const double alpha,const EdgeMask* preserve_mask = NULL) {
+ PruneInsideOutside(0,alpha,preserve_mask,use_sum_prod_semiring,scale);
+ }
// report nodes, edges, paths
std::string stats(std::string const& name="forest") const;
@@ -246,8 +261,9 @@ struct ScaledEdgeProb {
// see Li (2010), Section 3.2.2-- this is 'x_e = p_e*r_e'
struct EdgeFeaturesAndProbWeightFunction {
- typedef const SparseVector<prob_t> Weight;
- inline const SparseVector<prob_t> operator()(const Hypergraph::Edge& e) const {
+ typedef SparseVector<prob_t> Weight;
+ typedef Weight Result; //TODO: change Result->Weight everywhere?
+ inline const Weight operator()(const Hypergraph::Edge& e) const {
SparseVector<prob_t> res;
for (SparseVector<double>::const_iterator it = e.feature_values_.begin();
it != e.feature_values_.end(); ++it)