From b96f8ca79c3293062a4b24266e07859cedc936c2 Mon Sep 17 00:00:00 2001 From: "graehl@gmail.com" Date: Mon, 9 Aug 2010 06:14:04 +0000 Subject: hg sort in_edges by viterbi, debug print temporarily enabled which always --show_derivation git-svn-id: https://ws10smt.googlecode.com/svn/trunk@494 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/hg.cc | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++---- decoder/hg.h | 45 +++++++++++++++++++++++++---------- decoder/viterbi.cc | 9 ++++++- 3 files changed, 106 insertions(+), 18 deletions(-) diff --git a/decoder/hg.cc b/decoder/hg.cc index 47924d99..1e2a0010 100644 --- a/decoder/hg.cc +++ b/decoder/hg.cc @@ -16,6 +16,7 @@ using namespace std; +#if 0 Hypergraph::Edge const* Hypergraph::ViterbiGoalEdge() const { Edge const* r=0; @@ -26,6 +27,64 @@ Hypergraph::Edge const* Hypergraph::ViterbiGoalEdge() const } return r; } +#endif + +Hypergraph::Edge const* Hypergraph::ViterbiSortInEdges() +{ + NodeProbs nv; + ComputeNodeViterbi(&nv); + return SortInEdgesByNodeViterbi(nv); +} + +Hypergraph::Edge const* Hypergraph::SortInEdgesByNodeViterbi(NodeProbs const& nv) +{ + EdgeProbs ev; + ComputeEdgeViterbi(nv,&ev); + return ViterbiSortInEdges(ev); +} + +namespace { +struct less_ve { + Hypergraph::EdgeProbs const& ev; + //Hypergraph const& hg; + explicit less_ve(Hypergraph::EdgeProbs const& ev/*,Hypergraph const& hg */) : ev(ev)/*,hg(hg)*/ { } + bool operator()(int a,int b) const { + return ev[a] > ev[b]; + } +}; +} + +Hypergraph::Edge const* Hypergraph::ViterbiSortInEdges(EdgeProbs const& ev) +{ + for (int i=0;iderivation_tree(*this,fe,indent,show_mask,maxdepth,depth) : ""; +} + std::string Hypergraph::show_viterbi_tree(bool indent,int show_mask,int maxdepth,int depth) const { HypergraphP v=CreateViterbiHypergraph(); // cerr<NumberOfEdges()) { - Edge const* beste=&v->edges_.back(); //FIXME: this doesn't work. check CreateViterbiHypergraph ? - return beste->derivation_tree(*v,beste,indent,show_mask,maxdepth,depth); - } - return std::string(); + return v->show_first_tree(indent,show_mask,maxdepth,depth); } HypergraphP Hypergraph::CreateEdgeSubset(EdgeMask &keep_edges) const { diff --git a/decoder/hg.h b/decoder/hg.h index f426d32f..e64db837 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -197,24 +197,52 @@ public: } }; + typedef std::vector EdgeProbs; + typedef std::vector NodeProbs; + typedef std::vector EdgeMask; + typedef std::vector NodeMask; + std::string show_viterbi_tree(bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const; +// builds viterbi hg and returns it formatted as a pretty string + + enum { + NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF + }; + + std::string show_first_tree(bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const; + // same as above, but takes in_edges_[0] all the way down - to make it viterbi cost (1-best), call ViterbiSortInEdges() first typedef Edge const* EdgeHandle; EdgeHandle operator()(int tailn,int /*taili*/,EdgeHandle /*parent*/) const { - return viterbi_edge(tailn); + return first_edge(tailn); } - Edge const* viterbi_edge(int node) const { // even if edges are sorted by edgeweights, this doesn't give viterbi + Edge const* first_edge(int node) const { // only actually viterbi if ViterbiSortInEdges() called. otherwise it's just the first. EdgesVector const& v=nodes_[node].in_edges_; return v.empty() ? 0 : &edges_[v.front()]; } - enum { - NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF - }; + Edge const* first_edge() const { + int nn=nodes_.size(); + return nn>=0?first_edge(nn-1):0; + } + +#if 0 // returns edge with rule_.IsGoal, returns 0 if none found. otherwise gives best edge_prob_ - note: I don't think edge_prob_ is viterbi cumulative, so this would just be the best local probability. Edge const* ViterbiGoalEdge() const; +#endif + + int GoalNode() const { return nodes_.size()-1; } // by definition, and sorting of nodes in topo order (bottom up) + +// post: in_edges_ for each node is ordered by global viterbi. returns 1best goal node edge as well + Edge const* ViterbiSortInEdges(); + Edge const* SortInEdgesByNodeViterbi(NodeProbs const& nv); + Edge const* ViterbiSortInEdges(EdgeProbs const& eviterbi); + + prob_t ComputeNodeViterbi(NodeProbs *np) const; + prob_t ComputeEdgeViterbi(EdgeProbs *ev) const; + prob_t ComputeEdgeViterbi(NodeProbs const&np,EdgeProbs *ev) const; void swap(Hypergraph& other) { other.nodes_.swap(nodes_); @@ -324,11 +352,6 @@ public: } } - typedef std::vector EdgeProbs; - typedef std::vector NodeProbs; - typedef std::vector EdgeMask; - typedef std::vector NodeMask; - // computes inside and outside scores for each // edge in the hypergraph // alpha->size = edges_.size = beta->size @@ -379,8 +402,6 @@ public: // contrary to PushWeightsToGoal, use viterbi semiring; store log(p) to fid. note that p_viterbi becomes 1; k*p_viterbi becomes k. also modifies edge_prob_ (note that the fid stored log(p) will stick around even if you reweight) // afterwards, product of edge_prob_ for a derivation will equal 1 for the viterbi (p_v before, 1 after), and in general (k*p_v before, k after). returns inside(goal) prob_t PushViterbiWeightsToGoal(int fid=0); - prob_t ComputeNodeViterbi(NodeProbs *np) const; - // void SortInEdgesByEdgeWeights(); // local sort only - pretty useless diff --git a/decoder/viterbi.cc b/decoder/viterbi.cc index b21139df..6bcc97db 100644 --- a/decoder/viterbi.cc +++ b/decoder/viterbi.cc @@ -5,6 +5,8 @@ #include #include "hg.h" +#define DEBUG_VITERBI_SORT + using namespace std; std::string viterbi_stats(Hypergraph const& hg, std::string const& name, bool estring, bool etree,bool show_derivation) @@ -25,7 +27,12 @@ std::string viterbi_stats(Hypergraph const& hg, std::string const& name, bool es o << hg.show_viterbi_tree(false); // last item should be goal (or at least depend on prev items). TODO: this doesn't actually reorder the nodes in hg. o<(hg).ViterbiSortInEdges(); + o<