diff options
-rw-r--r-- | decoder/hg.cc | 70 | ||||
-rw-r--r-- | decoder/hg.h | 45 | ||||
-rw-r--r-- | decoder/viterbi.cc | 9 |
3 files changed, 106 insertions, 18 deletions
diff --git a/decoder/hg.cc b/decoder/hg.cc index 47924d99..1e2a0010 100644 --- a/decoder/hg.cc +++ b/decoder/hg.cc @@ -16,6 +16,7 @@ using namespace std; +#if 0 Hypergraph::Edge const* Hypergraph::ViterbiGoalEdge() const { Edge const* r=0; @@ -26,6 +27,64 @@ Hypergraph::Edge const* Hypergraph::ViterbiGoalEdge() const } return r; } +#endif + +Hypergraph::Edge const* Hypergraph::ViterbiSortInEdges() +{ + NodeProbs nv; + ComputeNodeViterbi(&nv); + return SortInEdgesByNodeViterbi(nv); +} + +Hypergraph::Edge const* Hypergraph::SortInEdgesByNodeViterbi(NodeProbs const& nv) +{ + EdgeProbs ev; + ComputeEdgeViterbi(nv,&ev); + return ViterbiSortInEdges(ev); +} + +namespace { +struct less_ve { + Hypergraph::EdgeProbs const& ev; + //Hypergraph const& hg; + explicit less_ve(Hypergraph::EdgeProbs const& ev/*,Hypergraph const& hg */) : ev(ev)/*,hg(hg)*/ { } + bool operator()(int a,int b) const { + return ev[a] > ev[b]; + } +}; +} + +Hypergraph::Edge const* Hypergraph::ViterbiSortInEdges(EdgeProbs const& ev) +{ + for (int i=0;i<nodes_.size();++i) { + EdgesVector &ie=nodes_[i].in_edges_; + std::sort(ie.begin(),ie.end(),less_ve(ev)); + } + return first_edge(); +} + +prob_t Hypergraph::ComputeEdgeViterbi(EdgeProbs *ev) const { + NodeProbs nv; + ComputeNodeViterbi(&nv); + return ComputeEdgeViterbi(nv,ev); +} + +prob_t Hypergraph::ComputeEdgeViterbi(NodeProbs const& nv,EdgeProbs *ev) const { + for (int i=0;i<edges_.size();++i) { + Edge const& e=edges_[i]; + prob_t r=e.edge_prob_; + TailNodeVector const& t=e.tail_nodes_; + for (TailNodeVector::const_iterator j=t.begin(),jj=t.end();j!=jj;++j) + r*=nv[*j]; + /* + for (int j=0;j<e.tail_nodes_.size();++j) + r*=nv[e.tail_nodes_[j]]; + */ + (*ev)[i]=r; + } + return nv.empty()?prob_t(0):nv.back(); +} + std::string Hypergraph::stats(std::string const& name) const { @@ -659,14 +718,15 @@ struct EdgeWeightSorter { } */ +std::string Hypergraph::show_first_tree(bool indent,int show_mask,int maxdepth,int depth) const { + EdgeHandle fe=first_edge(); + return fe ? fe->derivation_tree(*this,fe,indent,show_mask,maxdepth,depth) : ""; +} + std::string Hypergraph::show_viterbi_tree(bool indent,int show_mask,int maxdepth,int depth) const { HypergraphP v=CreateViterbiHypergraph(); // cerr<<viterbi_stats(*v,"debug show_viterbi_tree",true,true,false)<<endl; - if (v->NumberOfEdges()) { - Edge const* beste=&v->edges_.back(); //FIXME: this doesn't work. check CreateViterbiHypergraph ? - return beste->derivation_tree(*v,beste,indent,show_mask,maxdepth,depth); - } - return std::string(); + return v->show_first_tree(indent,show_mask,maxdepth,depth); } HypergraphP Hypergraph::CreateEdgeSubset(EdgeMask &keep_edges) const { diff --git a/decoder/hg.h b/decoder/hg.h index f426d32f..e64db837 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -197,24 +197,52 @@ public: } }; + typedef std::vector<prob_t> EdgeProbs; + typedef std::vector<prob_t> NodeProbs; + typedef std::vector<bool> EdgeMask; + typedef std::vector<bool> NodeMask; + std::string show_viterbi_tree(bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const; +// builds viterbi hg and returns it formatted as a pretty string + + enum { + NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF + }; + + std::string show_first_tree(bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const; + // same as above, but takes in_edges_[0] all the way down - to make it viterbi cost (1-best), call ViterbiSortInEdges() first typedef Edge const* EdgeHandle; EdgeHandle operator()(int tailn,int /*taili*/,EdgeHandle /*parent*/) const { - return viterbi_edge(tailn); + return first_edge(tailn); } - Edge const* viterbi_edge(int node) const { // even if edges are sorted by edgeweights, this doesn't give viterbi + Edge const* first_edge(int node) const { // only actually viterbi if ViterbiSortInEdges() called. otherwise it's just the first. EdgesVector const& v=nodes_[node].in_edges_; return v.empty() ? 0 : &edges_[v.front()]; } - enum { - NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF - }; + Edge const* first_edge() const { + int nn=nodes_.size(); + return nn>=0?first_edge(nn-1):0; + } + +#if 0 // returns edge with rule_.IsGoal, returns 0 if none found. otherwise gives best edge_prob_ - note: I don't think edge_prob_ is viterbi cumulative, so this would just be the best local probability. Edge const* ViterbiGoalEdge() const; +#endif + + int GoalNode() const { return nodes_.size()-1; } // by definition, and sorting of nodes in topo order (bottom up) + +// post: in_edges_ for each node is ordered by global viterbi. returns 1best goal node edge as well + Edge const* ViterbiSortInEdges(); + Edge const* SortInEdgesByNodeViterbi(NodeProbs const& nv); + Edge const* ViterbiSortInEdges(EdgeProbs const& eviterbi); + + prob_t ComputeNodeViterbi(NodeProbs *np) const; + prob_t ComputeEdgeViterbi(EdgeProbs *ev) const; + prob_t ComputeEdgeViterbi(NodeProbs const&np,EdgeProbs *ev) const; void swap(Hypergraph& other) { other.nodes_.swap(nodes_); @@ -324,11 +352,6 @@ public: } } - typedef std::vector<prob_t> EdgeProbs; - typedef std::vector<prob_t> NodeProbs; - typedef std::vector<bool> EdgeMask; - typedef std::vector<bool> NodeMask; - // computes inside and outside scores for each // edge in the hypergraph // alpha->size = edges_.size = beta->size @@ -379,8 +402,6 @@ public: // contrary to PushWeightsToGoal, use viterbi semiring; store log(p) to fid. note that p_viterbi becomes 1; k*p_viterbi becomes k. also modifies edge_prob_ (note that the fid stored log(p) will stick around even if you reweight) // afterwards, product of edge_prob_ for a derivation will equal 1 for the viterbi (p_v before, 1 after), and in general (k*p_v before, k after). returns inside(goal) prob_t PushViterbiWeightsToGoal(int fid=0); - prob_t ComputeNodeViterbi(NodeProbs *np) const; - // void SortInEdgesByEdgeWeights(); // local sort only - pretty useless diff --git a/decoder/viterbi.cc b/decoder/viterbi.cc index b21139df..6bcc97db 100644 --- a/decoder/viterbi.cc +++ b/decoder/viterbi.cc @@ -5,6 +5,8 @@ #include <vector> #include "hg.h" +#define DEBUG_VITERBI_SORT + using namespace std; std::string viterbi_stats(Hypergraph const& hg, std::string const& name, bool estring, bool etree,bool show_derivation) @@ -25,7 +27,12 @@ std::string viterbi_stats(Hypergraph const& hg, std::string const& name, bool es o << hg.show_viterbi_tree(false); // last item should be goal (or at least depend on prev items). TODO: this doesn't actually reorder the nodes in hg. o<<endl; } - +#ifdef DEBUG_VITERBI_SORT + const_cast<Hypergraph&>(hg).ViterbiSortInEdges(); + o<<name<<" (viterbi sort) first derivation: "; + o<<hg.show_first_tree(false); + o<<endl; +#endif return o.str(); } |