From 11980f43455a85f31f2941f570f9a3a1ff925408 Mon Sep 17 00:00:00 2001 From: graehl Date: Mon, 12 Jul 2010 03:42:39 +0000 Subject: inebriated, but sped up inside/outside pruning and made cube poplimit scale with promise of nodes from previous global pruning, if --promise_power=N for N>0. git-svn-id: https://ws10smt.googlecode.com/svn/trunk@219 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/aligner.cc | 5 +- decoder/apply_models.cc | 3 +- decoder/cdec.cc | 8 ++- decoder/hg.cc | 134 ++++++++++++++++++++++++++++++----------------- decoder/hg.h | 40 +++++++++----- decoder/inside_outside.h | 109 ++++++++++++++++++++++++++++++-------- 6 files changed, 207 insertions(+), 92 deletions(-) (limited to 'decoder') diff --git a/decoder/aligner.cc b/decoder/aligner.cc index bad97b74..d498c22c 100644 --- a/decoder/aligner.cc +++ b/decoder/aligner.cc @@ -86,7 +86,7 @@ void SourceEdgeCoveragesUsingParseIndices(const Hypergraph& g, vector >* src_cov) { src_cov->clear(); src_cov->resize(g.edges_.size()); - + for (int i = 0; i < g.edges_.size(); ++i) { const Hypergraph::Edge& edge = g.edges_[i]; set& cov = (*src_cov)[i]; @@ -212,6 +212,7 @@ void TargetEdgeCoveragesUsingTree(const Hypergraph& g, } struct TransitionEventWeightFunction { + typedef SparseVector Result; inline SparseVector operator()(const Hypergraph::Edge& e) const { SparseVector result; result.set_value(e.id_, e.edge_prob_); @@ -261,7 +262,7 @@ void AlignerTools::WriteAlignment(const Lattice& src_lattice, if (edges || !map_instead_of_viterbi) { for (int i = 0; i < edge_posteriors.size(); ++i) edge_posteriors[i] = prob_t::One(); - } else { + } else { SparseVector posts; const prob_t z = InsideOutside, TransitionEventWeightFunction>(*g, &posts); for (int i = 0; i < edge_posteriors.size(); ++i) diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc index 78f37e68..4093f667 100644 --- a/decoder/apply_models.cc +++ b/decoder/apply_models.cc @@ -254,7 +254,8 @@ public: make_heap(cand.begin(), cand.end(), HeapCandCompare()); State2Node state2node; // "buf" in Figure 2 int pops = 0; - while(!cand.empty() && pops < pop_limit_) { + int pop_limit_eff=max(1,int(v.promise*pop_limit_)); + while(!cand.empty() && pops < pop_limit_eff) { pop_heap(cand.begin(), cand.end(), HeapCandCompare()); Candidate* item = cand.back(); cand.pop_back(); diff --git a/decoder/cdec.cc b/decoder/cdec.cc index bcba1409..09760f6b 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -127,6 +127,7 @@ void InitCommandLine(int argc, char** argv, po::variables_map* confp) { ("prelm_beam_prune", po::value(), "Prune paths from -LM forest before LM rescoring, keeping paths within exp(alpha>=0)") ("beam_prune", po::value(), "Prune paths from +LM forest, keep paths within exp(alpha>=0)") ("scale_prune_srclen", "scale beams by the input length (in # of tokens; may not be what you want for lattices") + ("promise_power",po::value()->default_value(0), "Give more beam budget to more promising previous-pass nodes when pruning - but allocate the same average beams. 0 means off, 1 means beam proportional to inside*outside prob, n means nth power (affects just --cubepruning_pop_limit)") ("lexalign_use_null", "Support source-side null words in lexical translation") ("tagger_tagset,t", po::value(), "(Tagger) file containing tag set") ("csplit_output_plf", "(Compound splitter) Output lattice in PLF format") @@ -337,7 +338,7 @@ void forest_stats(Hypergraph &forest,string name,bool show_tree,bool show_featur } void maybe_prune(Hypergraph &forest,po::variables_map const& conf,string nbeam,string ndensity,string forestname,double srclen) { - double beam_prune,density_prune; + double beam_prune=0,density_prune=0; bool use_beam_prune=beam_param(conf,nbeam,&beam_prune,conf.count("scale_prune_srclen"),srclen); bool use_density_prune=beam_param(conf,ndensity,&density_prune); if (use_beam_prune || use_density_prune) { @@ -348,10 +349,7 @@ void maybe_prune(Hypergraph &forest,po::variables_map const& conf,string nbeam,s preserve_mask[CompoundSplit::GetFullWordEdgeIndex(forest)] = true; pm=&preserve_mask; } - if (use_beam_prune) - forest.BeamPruneInsideOutside(1.0, false, beam_prune, pm); - if (use_density_prune) - forest.DensityPruneInsideOutside(1.0 ,false, density_prune, pm); + forest.PruneInsideOutside(beam_prune,density_prune,pm,false,1,conf["promise_power"].as()); if (!forestname.empty()) forestname=" "+forestname; forest_stats(forest," Pruned "+forestname+" forest",false,false); cerr << " Pruned "< Result; ScaledTransitionEventWeightFunction(double alpha) : scale_(alpha) {} inline SparseVector operator()(const Hypergraph::Edge& e) const { SparseVector result; @@ -50,6 +51,7 @@ struct ScaledTransitionEventWeightFunction { const double scale_; }; +// safe to reinterpret a vector of these as a vector of prob_t (plain old data) struct TropicalValue { TropicalValue() : v_() {} explicit TropicalValue(int v) { @@ -71,12 +73,14 @@ struct TropicalValue { }; struct ViterbiWeightFunction { + typedef TropicalValue Weight; inline TropicalValue operator()(const Hypergraph::Edge& e) const { return TropicalValue(e.edge_prob_); } }; struct ViterbiTransitionEventWeightFunction { + typedef SparseVector Result; inline SparseVector operator()(const Hypergraph::Edge& e) const { SparseVector result; result.set_value(e.id_, TropicalValue(e.edge_prob_)); @@ -100,6 +104,7 @@ prob_t Hypergraph::ComputeEdgePosteriors(double scale, vector* posts) co } prob_t Hypergraph::ComputeBestPathThroughEdges(vector* post) const { + // I don't like this - explicitly passing around counts of each edge. It's clever but slow. SparseVector pv; const TropicalValue viterbi_weight = InsideOutside causes problems with the Inside algorithm. - // I don't know a good c++ way to resolve this short of template specialization which - // I dislike. If you know of a better way that doesn't involve specialization, - // fix this! vector reachable; bool goal_derivable = Inside/* */(*this, &reachable, wf); if (!goal_derivable) { @@ -182,12 +183,32 @@ void Hypergraph::PruneEdges(const EdgeMask& prune_edge, bool run_inside_algorith filtered[i] = prune; } } - TopologicallySortNodesAndEdges(nodes_.size() - 1, &filtered); } +void Hypergraph::SetPromise(NodeProbs const& inside,NodeProbs const& outside,double power, bool normalize) +{ + int nn=nodes_.size(); + if (!nn) return; + assert(inside.size()==nn); + assert(outside.size()==nn); + double sum; //TODO: prevent underflow by using prob_t? + if (normalize) + for (int i=0;i const& io,prob_t cutoff,vector const* preserve_mask,bool safe_inside,bool verbose) { + assert(io.size()==edges_.size()); const prob_t BARELY_SMALLER(1e-6,false); // nearly 1; 1-epsilon //TODO: //FIXME: if EPSILON is 0, then remnants (useless edges that don't connect to top? or top-connected but not bottom-up buildable referneced?) are left in the hypergraph output that cause mr_vest_map to segfault. adding EPSILON probably just covers up the symptom by making it far less frequent; I imagine any time threshold is set by DensityPrune, cutoff is exactly equal to the io of several nodes, but because of how it's computed, some round slightly down vs. slightly up. probably the flaw is in PruneEdges. int ne=NumberOfEdges(); @@ -213,54 +234,67 @@ void Hypergraph::MarginPrune(vector const& io,prob_t cutoff,vector PruneEdges(prune,safe_inside); // inside reachability check in case cutoff rounded down too much (probably redundant with EPSILON hack) } -void Hypergraph::DensityPruneInsideOutside(const double scale, - const bool use_sum_prod_semiring, - const double density, - const vector* preserve_mask - , bool safe_inside - ) -{ - assert(density >= 1.0); - const int plen = ViterbiPathLength(*this); - vector bp; - int rnum = min(static_cast(edges_.size()), static_cast(density * static_cast(plen))); - cerr << "Density pruning: keep "< io(edges_.size()); - if (use_sum_prod_semiring) { - ComputeEdgePosteriors(scale, &io); - assert(scale > 0.0); - } else - ComputeBestPathThroughEdges(&io); - assert(edges_.size() == io.size()); - vector sorted = io; - nth_element(sorted.begin(), sorted.begin() + rnum, sorted.end(), greater()); - MarginPrune(io,sorted[rnum],preserve_mask,safe_inside); +template +V nth_greatest(int n,vector vs) { + nth_element(vs.begin(),vs.begin()+n,vs.end(),greater()); + return vs[n]; } -void Hypergraph::BeamPruneInsideOutside( - const double scale, - const bool use_sum_prod_semiring, - const double alpha, - const vector* preserve_mask - ,bool safe_inside) { - assert(alpha >= 0.0); - vector io(edges_.size()); - if (use_sum_prod_semiring) { - ComputeEdgePosteriors(scale, &io); - assert(scale > 0.0); - } else - ComputeBestPathThroughEdges(&io); - assert(edges_.size() == io.size()); - prob_t best; // initializes to zero - for (int i = 0; i < io.size(); ++i) - if (io[i] > best) best = io[i]; - MarginPrune(io,best*prob_t::exp(-alpha),preserve_mask,safe_inside); +bool Hypergraph::PruneInsideOutside(double alpha,double density,const EdgeMask* preserve_mask,const bool use_sum_prod_semiring, const double scale,double promise_power,bool safe_inside) +{ + bool use_density=density!=0; + bool use_beam=alpha!=0; + assert(!use_beam||alpha>0); + assert(!use_density||density>=1); + assert(!use_sum_prod_semiring||scale>0); + int rnum; + if (use_density) { + const int plen = ViterbiPathLength(*this); + vector bp; + rnum = min(static_cast(edges_.size()), static_cast(density * static_cast(plen))); + cerr << "Density pruning: keep "< io; + OutsideNormalize norm; + if (use_sum_prod_semiring) + io.compute(*this,norm,ScaledEdgeProb(scale)); + else + io.compute(*this,norm,ViterbiWeightFunction()); // the storage gets cast to Tropical from prob_t, scary - e.g. w/ specialized static allocator differences it could break. + vector mm; + io.compute_edge_marginals(*this,mm,EdgeProb()); // should be normalized to 1 for best edges in viterbi. in sum, best is less than 1. + + prob_t cutoff=prob_t::One(); // we'll destroy everything smaller than this (note: nothing is bigger than 1). so bigger cutoff = more pruning. + bool density_won=false; + if (use_density) { + cutoff=nth_greatest(rnum,mm); + density_won=true; + } + if (use_beam) { + prob_t best=prob_t::One(); + if (use_sum_prod_semiring) { + for (int i = 0; i < mm.size(); ++i) + if (mm[i] > best) best = mm[i]; + } + prob_t beam_cut=best*prob_t::exp(-alpha); + if (!(use_density&&cutoff>beam_cut)) { + density_won=false; + cutoff=beam_cut; + } + } + if (promise_power!=0) + SetPromise(io.inside,io.outside,promise_power,true); + MarginPrune(mm,cutoff,preserve_mask,safe_inside); // we do this last because otherwise indices in mm would be wrong for setting promise. + return density_won; } + void Hypergraph::PrintGraphviz() const { int ei = 0; cerr << "digraph G {\n rankdir=LR;\n nodesep=.05;\n"; @@ -378,6 +412,8 @@ struct IdCompare { bool operator()(const T& a, const T& b) { return a.id_ < b.id_; } }; +// this keeps the nodes' edge indices and edges' node indices in sync. or do nodes not get removed when you prune_edges? seems like they get reordered. +//TODO: if you had parallel arrays associating data w/ each node or edge, you'd want access to reloc_node and reloc_edge - expose in stateful object? void Hypergraph::TopologicallySortNodesAndEdges(int goal_index, const vector* prune_edges) { // figure out which nodes are reachable from the goal diff --git a/decoder/hg.h b/decoder/hg.h index 0f8d21fd..4b6a6357 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -11,7 +11,7 @@ #include "prob.h" // if you define this, edges_ will be sorted -// (normally, just nodes_ are), but this can be quite +// (normally, just nodes_ are - root must be nodes_.back()), but this can be quite // slow #undef HG_EDGES_TOPO_SORTED @@ -25,12 +25,14 @@ class Hypergraph { typedef SmallVector TailNodeVector; // TODO get rid of cat_? + // TODO keep cat_ and add span and/or state? :) struct Node { - Node() : id_(), cat_() {} + Node() : id_(), cat_(), promise(1) {} int id_; // equal to this object's position in the nodes_ vector WordID cat_; // non-terminal category if <0, 0 if not set std::vector in_edges_; // contents refer to positions in edges_ std::vector out_edges_; // contents refer to positions in edges_ + double promise; // set in global pruning; in [0,infty) so that mean is 1. use: e.g. scale cube poplimit }; // TODO get rid of edge_prob_? (can be computed on the fly as the dot @@ -173,17 +175,30 @@ class Hypergraph { // note: if run_inside_algorithm is false, then consumers may be unhappy if you pruned nodes that are built on by nodes that are kept. void PruneEdges(const EdgeMask& prune_edge, bool run_inside_algorithm = false); - // for density>=1.0, keep this many times the edges needed for the 1best derivation - // if you don't know, use_sum_prod_semiring should be false - void DensityPruneInsideOutside(const double scale, const bool use_sum_prod_semiring, const double density,const EdgeMask* preserve_mask = NULL,bool safe_inside=false); - /// drop edge i if edge_margin[i] < prune_below, unless preserve_mask[i] void MarginPrune(EdgeProbs const& edge_margin,prob_t prune_below,EdgeMask const* preserve_mask=0,bool safe_inside=false,bool verbose=false); - // safe_inside: if true, a theoretically redundant (but practically important .001% of the time due to rounding error) inside pruning pass will happen after max-marginal pruning. if you don't do this, it's possible that the pruned hypergraph will contain outside-reachable (but not inside-buildable) nodes. that is, a parent will be kept whose children were pruned. if output, those forests may confuse (crash) e.g. mr_vest_map. however, if the hyperedges occur in defined-before-use (all edges with head h occur before h is used as a tail) order, then a grace margin for keeping edges that starts leniently and becomes more forbidding will make it impossible for this to occur, i.e. safe_inside=true is not needed. + // promise[i]=((max_marginal[i]/viterbi)^power).todouble. if normalize, ensure that avg promise is 1. + typedef EdgeProbs NodeProbs; + void SetPromise(NodeProbs const& inside,NodeProbs const& outside, double power=1, bool normalize=true); + + // beam_alpha=0 means don't beam prune, otherwise drop things that are e^beam_alpha times worse than best - // prunes any edge whose prob_t on the best path taking that edge is more than e^alpha times + //density=0 means don't density prune: // for density>=1.0, keep this many times the edges needed for the 1best derivation + // worse than the score of the global best past (or the highest edge posterior) + // scale is for use_sum_prod_semiring (sharpens distribution?) + // promise_power is for a call to SetPromise (no call happens if power=0) + // returns true if density pruning was tighter than beam + // safe_inside would be a redundant anti-rounding error second bottom-up reachability before actually removing edges, to prevent stranded edges. shouldn't be needed - if the hyperedges occur in defined-before-use (all edges with head h occur before h is used as a tail) order, then a grace margin for keeping edges that starts leniently and becomes more forbidding will make it impossible for this to occur, i.e. safe_inside=true is not needed. + bool PruneInsideOutside(double beam_alpha,double density,const EdgeMask* preserve_mask = NULL,const bool use_sum_prod_semiring=false, const double scale=1,double promise_power=0,bool safe_inside=false); + + // legacy: + void DensityPruneInsideOutside(const double scale, const bool use_sum_prod_semiring, const double density,const EdgeMask* preserve_mask = NULL) { + PruneInsideOutside(density,0,preserve_mask,use_sum_prod_semiring,scale); + } - // prunes any edge whose score on the best path taking that edge is more than alpha away - // from the score of the global best past (or the highest edge posterior) - void BeamPruneInsideOutside(const double scale, const bool use_sum_prod_semiring, const double alpha,const EdgeMask* preserve_mask = NULL,bool safe_inside=false); + // legacy: + void BeamPruneInsideOutside(const double scale, const bool use_sum_prod_semiring, const double alpha,const EdgeMask* preserve_mask = NULL) { + PruneInsideOutside(0,alpha,preserve_mask,use_sum_prod_semiring,scale); + } // report nodes, edges, paths std::string stats(std::string const& name="forest") const; @@ -246,8 +261,9 @@ struct ScaledEdgeProb { // see Li (2010), Section 3.2.2-- this is 'x_e = p_e*r_e' struct EdgeFeaturesAndProbWeightFunction { - typedef const SparseVector Weight; - inline const SparseVector operator()(const Hypergraph::Edge& e) const { + typedef SparseVector Weight; + typedef Weight Result; //TODO: change Result->Weight everywhere? + inline const Weight operator()(const Hypergraph::Edge& e) const { SparseVector res; for (SparseVector::const_iterator it = e.feature_values_.begin(); it != e.feature_values_.end(); ++it) diff --git a/decoder/inside_outside.h b/decoder/inside_outside.h index 62daca1f..128d89da 100644 --- a/decoder/inside_outside.h +++ b/decoder/inside_outside.h @@ -34,8 +34,9 @@ WeightType Inside(const Hypergraph& hg, const int num_nodes = hg.nodes_.size(); std::vector dummy; std::vector& inside_score = result ? *result : dummy; + inside_score.clear(); inside_score.resize(num_nodes); - std::fill(inside_score.begin(), inside_score.end(), WeightType()); +// std::fill(inside_score.begin(), inside_score.end(), WeightType()); // clear handles for (int i = 0; i < num_nodes; ++i) { const Hypergraph::Node& cur_node = hg.nodes_[i]; WeightType* const cur_node_inside_score = &inside_score[i]; @@ -61,14 +62,17 @@ template void Outside(const Hypergraph& hg, std::vector& inside_score, std::vector* result, - const WeightFunction& weight = WeightFunction()) { + const WeightFunction& weight = WeightFunction(), + WeightType scale_outside = WeightType(1) + ) { assert(result); const int num_nodes = hg.nodes_.size(); assert(inside_score.size() == num_nodes); std::vector& outside_score = *result; + outside_score.clear(); outside_score.resize(num_nodes); - std::fill(outside_score.begin(), outside_score.end(), WeightType()); - outside_score.back() = WeightType(1); +// std::fill(outside_score.begin(), outside_score.end(), WeightType()); // cleared + outside_score.back() = scale_outside; for (int i = num_nodes - 1; i >= 0; --i) { const Hypergraph::Node& cur_node = hg.nodes_[i]; const WeightType& head_node_outside_score = outside_score[i]; @@ -94,6 +98,80 @@ void Outside(const Hypergraph& hg, } } +template // obviously not all semirings have a multiplicative inverse +struct OutsideNormalize { + bool enable; + OutsideNormalize(bool enable=true) : enable(enable) {} + K operator()(K k) { return enable?K(1)/k:K(1); } +}; +template +struct Outside1 { + K operator()(K) { return K(1); } +}; + +template +struct InsideOutsides { +// typedef typename KWeightFunction::Weight KType; + typedef std::vector Ks; + Ks inside,outside; + KType root_inside() { + return inside.back(); + } + InsideOutsides() { } + template + KType compute(Hypergraph const& hg,KWeightFunction const& kwf=KWeightFunction()) { + return compute(hg,Outside1(),kwf); + } + template + KType compute(Hypergraph const& hg,O1 outside1,KWeightFunction const& kwf=KWeightFunction()) { + typedef typename KWeightFunction::Weight KType2; + assert(sizeof(KType2)==sizeof(KType)); // why am I doing this? because I want to share the vectors used for tropical and prob_t semirings. should instead have separate value type from semiring operations? or suck it up and split the code calling in Prune* into 2 types (template) + typedef std::vector K2s; + K2s &inside2=reinterpret_cast(inside); + Inside(hg, &inside2, kwf); + KType scale=outside1(reinterpret_cast(inside2.back())); + Outside(hg, inside2, reinterpret_cast(&outside), kwf, reinterpret_cast(scale)); + return root_inside(); + } +// XWeightFunction::Result is result + template + typename XWeightFunction::Result expect(Hypergraph const& hg,XWeightFunction const& xwf=XWeightFunction()) { + typename XWeightFunction::Result x; // default constructor is semiring 0 + for (int i = 0,num_nodes=hg.nodes_.size(); i < num_nodes; ++i) { + const Hypergraph::Node& cur_node = hg.nodes_[i]; + const int num_in_edges = cur_node.in_edges_.size(); + for (int j = 0; j < num_in_edges; ++j) { + const Hypergraph::Edge& edge = hg.edges_[cur_node.in_edges_[j]]; + KType kbar_e = outside[i]; + const int num_tail_nodes = edge.tail_nodes_.size(); + for (int k = 0; k < num_tail_nodes; ++k) + kbar_e *= inside[edge.tail_nodes_[k]]; + x += xwf(edge) * kbar_e; + } + } + return x; + } + template + void compute_edge_marginals(Hypergraph const& hg,std::vector &vs,VWeight const& weight) { + vs.resize(hg.edges_.size()); + for (int i = 0,num_nodes=hg.nodes_.size(); i < num_nodes; ++i) { + const Hypergraph::Node& cur_node = hg.nodes_[i]; + const int num_in_edges = cur_node.in_edges_.size(); + for (int j = 0; j < num_in_edges; ++j) { + int edgei=cur_node.in_edges_[j]; + const Hypergraph::Edge& edge = hg.edges_[edgei]; + V x=weight(edge)*outside[i]; + const int num_tail_nodes = edge.tail_nodes_.size(); + for (int k = 0; k < num_tail_nodes; ++k) + x *= inside[edge.tail_nodes_[k]]; + vs[edgei] = x; + } + } + } + +}; + + // this is the Inside-Outside optimization described in Li and Eisner (EMNLP 2009) // for computing the inside algorithm over expensive semirings // (such as expectations over features). See Figure 4. @@ -105,25 +183,10 @@ KType InsideOutside(const Hypergraph& hg, XType* result_x, const KWeightFunction& kwf = KWeightFunction(), const XWeightFunction& xwf = XWeightFunction()) { - const int num_nodes = hg.nodes_.size(); - std::vector inside, outside; - const KType k = Inside(hg, &inside, kwf); - Outside(hg, inside, &outside, kwf); - XType& x = *result_x; - x = XType(); // default constructor is semiring 0 - for (int i = 0; i < num_nodes; ++i) { - const Hypergraph::Node& cur_node = hg.nodes_[i]; - const int num_in_edges = cur_node.in_edges_.size(); - for (int j = 0; j < num_in_edges; ++j) { - const Hypergraph::Edge& edge = hg.edges_[cur_node.in_edges_[j]]; - KType kbar_e = outside[i]; - const int num_tail_nodes = edge.tail_nodes_.size(); - for (int k = 0; k < num_tail_nodes; ++k) - kbar_e *= inside[edge.tail_nodes_[k]]; - x += xwf(edge) * kbar_e; - } - } - return k; + InsideOutsides io; + io.compute(hg,kwf); + *result_x=io.expect(hg,xwf); + return io.root_inside(); } #endif -- cgit v1.2.3