From 410cc38baef914cdc0841a2e8d5a84098e48be49 Mon Sep 17 00:00:00 2001 From: graehl Date: Sun, 25 Jul 2010 07:37:59 +0000 Subject: more comprehensible (but untested) edge/node filtering, awesome per-edge debugging streams git-svn-id: https://ws10smt.googlecode.com/svn/trunk@407 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/aligner.cc | 8 ++- decoder/cdec.cc | 5 +- decoder/ff_from_fsa.h | 33 +++++----- decoder/ff_fsa.h | 12 ++-- decoder/hg.cc | 116 ++++++++++++++++++++++++++++++++--- decoder/hg.h | 157 ++++++++++++++++++++++++++++++++++++------------ decoder/indices_after.h | 152 ++++++++++++++++++++++++++++++++++++++++++++++ decoder/kbest.h | 18 ++++++ decoder/oracle_bleu.h | 3 +- decoder/small_vector.h | 3 +- decoder/viterbi.cc | 10 +-- 11 files changed, 434 insertions(+), 83 deletions(-) create mode 100755 decoder/indices_after.h (limited to 'decoder') diff --git a/decoder/aligner.cc b/decoder/aligner.cc index d498c22c..b089f52e 100644 --- a/decoder/aligner.cc +++ b/decoder/aligner.cc @@ -231,6 +231,7 @@ void AlignerTools::WriteAlignment(const Lattice& src_lattice, const vector* edges) { bool fix_up_src_spans = false; const Hypergraph* g = &in_g; + HypergraphP new_hg; if (!src_lattice.IsSentence() || !trg_lattice.IsSentence()) { if (map_instead_of_viterbi) { @@ -240,10 +241,10 @@ void AlignerTools::WriteAlignment(const Lattice& src_lattice, fix_up_src_spans = !src_lattice.IsSentence(); } if (!map_instead_of_viterbi || edges) { - Hypergraph* new_hg = in_g.CreateViterbiHypergraph(edges); + new_hg = in_g.CreateViterbiHypergraph(edges); for (int i = 0; i < new_hg->edges_.size(); ++i) new_hg->edges_[i].edge_prob_ = prob_t::One(); - g = new_hg; + g = new_hg.get(); } vector edge_posteriors(g->edges_.size(), prob_t::Zero()); @@ -293,7 +294,8 @@ void AlignerTools::WriteAlignment(const Lattice& src_lattice, } } } - if (g != &in_g) { delete g; g = NULL; } + new_hg.reset(); + //if (g != &in_g) { g.reset(); } prob_t threshold(0.9); const bool use_soft_threshold = true; // TODO configure diff --git a/decoder/cdec.cc b/decoder/cdec.cc index 9110a234..bc1c348d 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -102,7 +102,7 @@ void InitCommandLine(int argc, char** argv, OracleBleu &ob, po::variables_map* c ("prelm_weights",po::value(),"Feature weights file for prelm_beam_prune. Requires --weights.") ("prelm_copy_weights","use --weights as value for --prelm_weights.") ("prelm_feature_function",po::value >()->composing(),"Additional feature functions for prelm pass only (in addition to the 0-state subset of feature_function") - ("keep_prelm_cube_order","when forest rescoring with final models, use the edge ordering from the prelm pruning features*weights. only meaningful if --prelm_weights given. UNTESTED but assume that cube pruning gives a sensible result, and that 'good' (as tuned for bleu w/ prelm features) edges come first.") + ("keep_prelm_cube_order","DEPRECATED (always enabled). when forest rescoring with final models, use the edge ordering from the prelm pruning features*weights. only meaningful if --prelm_weights given. UNTESTED but assume that cube pruning gives a sensible result, and that 'good' (as tuned for bleu w/ prelm features) edges come first.") ("warn_0_weight","Warn about any feature id that has a 0 weight (this is perfectly safe if you intend 0 weight, though)") ("no_freeze_feature_set,Z", "Do not freeze feature set after reading feature weights file") ("feature_function,F",po::value >()->composing(), "Additional feature function(s) (-L for list)") @@ -577,7 +577,6 @@ int main(int argc, char** argv) { if (has_prelm_models) { Timer t("prelm rescoring"); forest.Reweight(prelm_feature_weights); - forest.SortInEdgesByEdgeWeights(); Hypergraph prelm_forest; ApplyModelSet(forest, smeta, @@ -595,8 +594,6 @@ int main(int argc, char** argv) { if (has_late_models) { Timer t("Forest rescoring:"); forest.Reweight(feature_weights); - if (!has_prelm_models || conf.count("keep_prelm_cube_order")) - forest.SortInEdgesByEdgeWeights(); Hypergraph lm_forest; ApplyModelSet(forest, smeta, diff --git a/decoder/ff_from_fsa.h b/decoder/ff_from_fsa.h index 20e7c5ca..f9f707d7 100755 --- a/decoder/ff_from_fsa.h +++ b/decoder/ff_from_fsa.h @@ -5,9 +5,12 @@ #define FSA_FF_DEBUG #ifdef FSA_FF_DEBUG -# define FSAFFDBG(x) do { if (debug) { std::cerr << x; } } while(0) +# define FSAFFDBG(e,x) do { if (debug) { FSADBGae(e,x) } } while(0) +# define FSAFFDBGnl(e) do { if (debug) { std::cerr< "< "<") - FSAFFDBG(" right: "< "< "< markov order 0 - FSAFFDBG("order="< @@ -249,9 +252,10 @@ public: inline void Scan(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,void const* st,void *next_state,FeatureVector *features) const { Impl const& im=d(); - FSADBG("Scan "<"<"<= 0); assert(goal_node_id < nodes_.size()); + // I don't get it: does TopologicallySortNodesAndEdges not remove things that don't connect to goal_index? it uses goal_index just to order things? InsideOutside pruning can do this anyway (nearly infinite beam, viterbi semiring) // TODO finish implementation abort(); } @@ -614,15 +615,100 @@ struct EdgeWeightSorter { return hg.edges_[a].edge_prob_ > hg.edges_[b].edge_prob_; } }; - -void Hypergraph::SortInEdgesByEdgeWeights() { +/* + void Hypergraph::SortInEdgesByEdgeWeights() { for (int i = 0; i < nodes_.size(); ++i) { - Node& node = nodes_[i]; - sort(node.in_edges_.begin(), node.in_edges_.end(), EdgeWeightSorter(*this)); + Node& node = nodes_[i]; + sort(node.in_edges_.begin(), node.in_edges_.end(), EdgeWeightSorter(*this)); + } + } +*/ + +std::string Hypergraph::show_viterbi_tree(bool indent,int show_mask,int maxdepth,int depth) const { + HypergraphP v=CreateViterbiHypergraph(); + //FIXME: remove dbg print, fix. + cerr<NumberOfEdges()) { + Edge const* beste=&v->edges_.back(); //FIXME: this doesn't work. check CreateViterbiHypergraph ? + return beste->derivation_tree(*this,beste,indent,show_mask,maxdepth,depth); + } + return std::string(); +} + +HypergraphP Hypergraph::CreateEdgeSubset(EdgeMask &keep_edges) const { + NodeMask kn; + return CreateEdgeSubset(keep_edges,kn); +} + +HypergraphP Hypergraph::CreateNodeEdgeSubset(NodeMask const& keep_nodes,EdgeMask const&keep_edges) const { + indices_after n2(keep_nodes); + indices_after e2(keep_edges); + HypergraphP ret(new Hypergraph(n2.n_kept, e2.n_kept, is_linear_chain_)); + Nodes &rn=ret->nodes_; + for (int i=0;iedges_; + for (int i=0;i* edges) const { +HypergraphP Hypergraph::CreateViterbiHypergraph(const vector* edges) const { typedef ViterbiPathTraversal::Result VE; VE vit_edges; if (edges) { @@ -631,18 +717,29 @@ Hypergraph* Hypergraph::CreateViterbiHypergraph(const vector* edges) const } else { Viterbi(*this, &vit_edges, ViterbiPathTraversal() ,EdgeProb()); } +#if 0 +# if 1 + check_ids(); +# else + set_ids(); +# endif + EdgeMask used(edges_.size()); + for (int i = 0; i < vit_edges.size(); ++i) + used[vit_edges[i]->id_]=true; + return CreateEdgeSubset(used); +#else map old2new_node; int num_new_nodes = 0; for (int i = 0; i < vit_edges.size(); ++i) { const Edge& edge = *vit_edges[i]; - for (int j = 0; j < edge.tail_nodes_.size(); ++j) - assert(old2new_node.count(edge.tail_nodes_[j]) > 0); + for (int j = 0; j < edge.tail_nodes_.size(); ++j) assert(old2new_node.count(edge.tail_nodes_[j]) > 0); if (old2new_node.count(edge.head_node_) == 0) { old2new_node[edge.head_node_] = num_new_nodes; ++num_new_nodes; } } - Hypergraph* out = new Hypergraph(num_new_nodes, vit_edges.size(), is_linear_chain_); + HypergraphP ret(new Hypergraph(num_new_nodes, vit_edges.size(), is_linear_chain_)); + Hypergraph* out = ret.get(); for (map::iterator it = old2new_node.begin(); it != old2new_node.end(); ++it) { const Node& old_node = nodes_[it->first]; @@ -665,6 +762,7 @@ Hypergraph* Hypergraph::CreateViterbiHypergraph(const vector* edges) const out->nodes_[new_tail_node].out_edges_.push_back(i); } } - return out; +#endif + return ret; } diff --git a/decoder/hg.h b/decoder/hg.h index 34638e04..90ae0935 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -1,11 +1,13 @@ #ifndef _HG_H_ #define _HG_H_ + +//FIXME: is the edge given to ffs the coarse (previous forest) edge? if so, then INFO_EDGE is effectively not working. supposed to have logging associated with each edge and see how it fits together in kbest afterwards. #define USE_INFO_EDGE 1 #if USE_INFO_EDGE # include -# define INFO_EDGE(e,msg) do { std::ostringstream &o=e.info_;o<(e.info_);o<(e.info_);if (o.empty()) o<<' ';o< #include +#include #include "feature_vector.h" #include "small_vector.h" @@ -21,16 +24,20 @@ #include "tdict.h" #include "trule.h" #include "prob.h" +#include "indices_after.h" // if you define this, edges_ will be sorted // (normally, just nodes_ are - root must be nodes_.back()), but this can be quite // slow #undef HG_EDGES_TOPO_SORTED +class Hypergraph; +typedef boost::shared_ptr HypergraphP; + // class representing an acyclic hypergraph // - edges have 1 head, 0..n tails class Hypergraph { - public: +public: Hypergraph() : is_linear_chain_(false) {} // SmallVector is a fast, small vector implementation for sizes <= 2 @@ -43,9 +50,20 @@ class Hypergraph { Node() : id_(), cat_(), promise(1) {} int id_; // equal to this object's position in the nodes_ vector WordID cat_; // non-terminal category if <0, 0 if not set - std::vector in_edges_; // contents refer to positions in edges_ - std::vector out_edges_; // contents refer to positions in edges_ + EdgesVector in_edges_; // contents refer to positions in edges_ + EdgesVector out_edges_; // contents refer to positions in edges_ double promise; // set in global pruning; in [0,infty) so that mean is 1. use: e.g. scale cube poplimit + void copy_fixed(Node const& o) { // nonstructural fields only - structural ones are managed by sorting/pruning/subsetting + cat_=o.cat_; + promise=o.promise; + } + void copy_reindex(Node const& o,indices_after const& n2,indices_after const& e2) { + copy_fixed(o); + id_=n2[id_]; + e2.copy_to(in_edges_,o.in_edges_); + e2.copy_to(out_edges_,o.out_edges_); + } + }; // TODO get rid of edge_prob_? (can be computed on the fly as the dot @@ -72,14 +90,30 @@ class Hypergraph { short int j_; short int prev_i_; short int prev_j_; + + void copy_fixed(Edge const& o) { + rule_=o.rule_; + feature_values_ = o.feature_values_; + edge_prob_ = o.edge_prob_; + i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_; +#if USE_INFO_EDGE + info_.str(o.info_.str()); +#endif + } + void copy_reindex(Edge const& o,indices_after const& n2,indices_after const& e2) { + copy_fixed(o); + head_node_=n2[o.head_node_]; + id_=e2[o.id_]; + n2.copy_to(tail_nodes_,o.tail_nodes_); + } + #if USE_INFO_EDGE - private: - std::ostringstream info_; - public: + mutable std::ostringstream info_; + Edge(Edge const& o) : head_node_(o.head_node_),tail_nodes_(o.tail_nodes_),rule_(o.rule_),feature_values_(o.feature_values_),edge_prob_(o.edge_prob_),id_(o.id_),i_(o.i_),j_(o.j_),prev_i_(o.prev_i_),prev_j_(o.prev_j_), info_(o.info_.str()) { } void operator=(Edge const& o) { head_node_ = o.head_node_; tail_nodes_ = o.tail_nodes_; rule_ = o.rule_; feature_values_ = o.feature_values_; edge_prob_ = o.edge_prob_; id_ = o.id_; i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_; info_.str(o.info_.str()); - } + } std::string info() const { return info_.str(); } #else std::string info() const { return std::string(); } @@ -109,9 +143,47 @@ class Hypergraph { show(o,mask); return o.str(); } + /* generic recursion re: child_handle=re(tail_nodes_[i],i,parent_handle) + + FIXME: make kbest create a simple derivation-tree structure (could be a + hg), and replace the list-of-edges viterbi.h with a tree-structured one. + CreateViterbiHypergraph can do for 1best, though. + */ + template + std::string derivation_tree(EdgeRecurse const& re,EdgeHandle const& eh,bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const { + std::ostringstream o; + derivation_tree_stream(re,eh,o,indent,show_mask,maxdepth,depth); + return o.str(); + } + template + void derivation_tree_stream(EdgeRecurse const& re,EdgeHandle const& eh,std::ostream &o,bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const { + if (depth>maxdepth) return; + if (indent) for (int i=0;iderivation_tree_stream(re,c,o,indent,show_mask,maxdepth,depth+1); + if (!indent) o<<' '; + } + } + if (indent) for (int i=0;imaxdepth) return; - if (indent) for(int i=0;i EdgeProbs; typedef std::vector EdgeMask; + typedef std::vector NodeMask; // computes inside and outside scores for each // edge in the hypergraph @@ -231,10 +282,33 @@ class Hypergraph { // find the score of the very best path passing through each edge prob_t ComputeBestPathThroughEdges(EdgeProbs* posts) const; + + /* for all of the below subsets, the hg Nodes must be topo sorted already*/ + + // keep_nodes is an output-only param, keep_edges is in-out (more may be pruned than you asked for if the tails can't be built) + HypergraphP CreateEdgeSubset(EdgeMask & keep_edges_in_out,NodeMask &keep_nodes_out) const; + HypergraphP CreateEdgeSubset(EdgeMask & keep_edges) const; + // node keeping is final (const), edge keeping is an additional constraint which will sometimes be made stricter (and output back to you) by the absence of nodes. you may end up with some empty nodes. that is, kept edges will be made consistent with kept nodes first (but empty nodes are allowed) + HypergraphP CreateNodeSubset(NodeMask const& keep_nodes,EdgeMask &keep_edges_in_out) const { + TightenEdgeMask(keep_edges_in_out,keep_nodes); + return CreateNodeEdgeSubset(keep_nodes,keep_edges_in_out); + } + HypergraphP CreateNodeSubset(NodeMask const& keep_nodes) const { + EdgeMask ke(edges_.size(),true); + return CreateNodeSubset(keep_nodes,ke); + } + + void TightenEdgeMask(EdgeMask &ke,NodeMask const& kn) const; + // kept edges are consistent with kept nodes already: + HypergraphP CreateNodeEdgeSubset(NodeMask const& keep_nodes,EdgeMask const&keep_edges_in_out) const; + HypergraphP CreateNodeEdgeSubset(NodeMask const& keep_nodes) const; + + // create a new hypergraph consisting only of the nodes / edges // in the Viterbi derivation of this hypergraph // if edges is set, use the EdgeSelectEdgeWeightFunction - Hypergraph* CreateViterbiHypergraph(const EdgeMask* edges = NULL) const; + // NOTE: last edge/node index are goal + HypergraphP CreateViterbiHypergraph(const EdgeMask* edges = NULL) const; // move weights as near to the source as possible, resulting in a // stochastic automaton. ONLY FUNCTIONAL FOR *LATTICES*. @@ -246,7 +320,7 @@ class Hypergraph { // not just lattices void PushWeightsToGoal(double scale = 1.0); - void SortInEdgesByEdgeWeights(); +// void SortInEdgesByEdgeWeights(); // local sort only - pretty useless void PruneUnreachable(int goal_node_id); // DEPRECATED @@ -301,21 +375,28 @@ class Hypergraph { bool is_linear_chain_; // nodes_ is sorted in topological order - std::vector nodes_; + typedef std::vector Nodes; + Nodes nodes_; // edges_ is not guaranteed to be in any particular order - std::vector edges_; + typedef std::vector Edges; + Edges edges_; // reorder nodes_ so they are in topological order // source nodes at 0 sink nodes at size-1 void TopologicallySortNodesAndEdges(int goal_idx, const EdgeMask* prune_edges = NULL); - private: + + void set_ids(); // resync edge,node .id_ + void check_ids(); // assert that .id_ have been kept in sync + +private: Hypergraph(int num_nodes, int num_edges, bool is_lc) : is_linear_chain_(is_lc), nodes_(num_nodes), edges_(num_edges) {} static TRulePtr kEPSRule; static TRulePtr kUnaryRule; }; + // common WeightFunctions, map an edge -> WeightType // for generic Viterbi/Inside algorithms struct EdgeProb { @@ -331,7 +412,7 @@ struct EdgeSelectEdgeWeightFunction { if (v_[e.id_]) return prob_t::One(); else return prob_t::Zero(); } - private: +private: const EdgeMask& v_; }; diff --git a/decoder/indices_after.h b/decoder/indices_after.h new file mode 100755 index 00000000..ad94c576 --- /dev/null +++ b/decoder/indices_after.h @@ -0,0 +1,152 @@ +#ifndef INDICES_AFTER_REMOVING_H +#define INDICES_AFTER_REMOVING_H + +#include // STATIC_CONSTANT +#include //swap + +// iterator wrapper. inverts boolean value. +template +struct keep_not_marked +{ + typedef keep_not_marked self_type; + AB i; + bool operator *() const + { + return !*i; + } + void operator++() + { + ++i; + } + bool operator ==(self_type const& o) + { + return i==o.i; + } +}; + +// outputs sequence to out iterator, of new indices for each element i, corresponding to deleting element i from an array when remove[i] is true (-1 if it was deleted, new index otherwise), returning one-past-end of out (the return value = # of elements left after deletion) +template +unsigned new_indices_keep(KEEP i, KEEPe end,O out) { + unsigned f=0; + while (i!=end) + *out++ = *i++ ? f++ : (unsigned)-1; + return f; +}; + +template +unsigned new_indices_keep_if_n(unsigned n,It i,KeepIf const& r,O out) +{ + unsigned f=0; + while(n--) + *out++ = r(*i++) ? f++ : (unsigned)-1; + return f; +} + +template +unsigned new_indices(KEEP keep,O out) { + return new_indices(keep.begin(),keep.end(),out); +} + +// given a vector and a parallel sequence of bools where true means keep, keep only the marked elements while maintaining order +struct indices_after +{ + BOOST_STATIC_CONSTANT(unsigned,REMOVED=(unsigned)-1); + unsigned *map; // map[i] == REMOVED if i is deleted + unsigned n_kept; + unsigned n_mapped; + template + indices_after(AB i, ABe end) { + init(i,end); + } + + template + void init_keep_if(Vec v,R const& r) + { + n_mapped=v.size(); + if ( !n_mapped ) return; + map=(unsigned *)::operator new(sizeof(unsigned)*n_mapped); + n_kept=new_indices_keep_if_n(n_mapped,r,map); + } + + template + void init(AB i, ABe end) { + n_mapped=end-i; + if (n_mapped>0) { + map=(unsigned *)::operator new(sizeof(unsigned)*n_mapped); + n_kept=new_indices_keep(i,end,map); + } else + map=NULL; + } + template + void init(A const& a) + { + init(a.begin(),a.end()); + } + + template + indices_after(A const& a) + { + init(a.begin(),a.end()); + } + indices_after() : n_mapped(0) {} + ~indices_after() + { + if (n_mapped) + ::operator delete((void*)map); + } + bool removing(unsigned i) const + { + return map[i] == REMOVED; + } + bool keeping(unsigned i) const + { + return map[i] != REMOVED; + } + + unsigned operator[](unsigned i) const + { + return map[i]; + } + + template + void do_moves(Vec &v) const + { + assert(v.size()==n_mapped); + unsigned i=0; + for (;i + void do_moves_swap(Vec &v) const + { + using std::swap; + assert(v.size()==n_mapped); + unsigned r=n_mapped; + unsigned i=0; + for (;i + void copy_to(Vecto &to,Vecfrom const& v) const { + to.resize(n_kept); + for (unsigned i=0;iscore > b->score; } }; + + struct EdgeHandle { + Derivation const* d; + explicit EdgeHandle(Derivation const* d) : d(d) { } +// operator bool() const { return d->edge; } + operator Hypergraph::Edge const* () const { return d->edge; } +// const Hypergraph::Edge* operator ->() const { return d->edge; } + }; + +// typedef Derivation EdgeHandle; // will cause copying, below but not performance critical; change to actual handle if you want + EdgeHandle operator()(int t,int taili,EdgeHandle const& parent) const { + return EdgeHandle(nds[t].D[parent.d->j[taili]]); + } + + std::string derivation_tree(Derivation const& d,bool indent=true,int show_mask=Hypergraph::SPAN|Hypergraph::RULE,int maxdepth=0x7FFFFFFF,int depth=0) const { + return d.edge->derivation_tree(*this,EdgeHandle(&d),indent,show_mask,maxdepth,depth); + } + struct DerivationUniquenessHash { size_t operator()(const Derivation* d) const { size_t x = 5381; diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h index 4a2cbbe5..fbb681e0 100755 --- a/decoder/oracle_bleu.h +++ b/decoder/oracle_bleu.h @@ -246,7 +246,6 @@ struct OracleBleu { void ReweightBleu(Hypergraph *dest_forest,double bleu_weight=-1.) { feature_weights_.set_value(0,bleu_weight); dest_forest->Reweight(feature_weights_); -// dest_forest->SortInEdgesByEdgeWeights(); } bool show_derivation; @@ -272,7 +271,7 @@ struct OracleBleu { kbest_out<edge); + deriv_out< trans; const prob_t vs = ViterbiESentence(hg, &trans); - o<(hg, &d); - if (d.empty()) - o<<"(empty viterbi hyperpath - no translation)"; - else - hg.show_tree(o,*d.back(),false); // last item should be goal (or at least depend on prev items). TODO: this doesn't actually reorder the nodes in hg. + o << hg.show_viterbi_tree(false); // last item should be goal (or at least depend on prev items). TODO: this doesn't actually reorder the nodes in hg. o<