summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgraehl@gmail.com <graehl@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-08-09 06:14:04 +0000
committergraehl@gmail.com <graehl@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-08-09 06:14:04 +0000
commitb96f8ca79c3293062a4b24266e07859cedc936c2 (patch)
treeef81550fb137d536f8ae64733d9da73be2417078
parentffd4acfe5b84f413c66af6ec5a76bdbc0d3aa9e8 (diff)
hg sort in_edges by viterbi, debug print temporarily enabled which always --show_derivation
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@494 ec762483-ff6d-05da-a07a-a48fb63a330f
-rw-r--r--decoder/hg.cc70
-rw-r--r--decoder/hg.h45
-rw-r--r--decoder/viterbi.cc9
3 files changed, 106 insertions, 18 deletions
diff --git a/decoder/hg.cc b/decoder/hg.cc
index 47924d99..1e2a0010 100644
--- a/decoder/hg.cc
+++ b/decoder/hg.cc
@@ -16,6 +16,7 @@
using namespace std;
+#if 0
Hypergraph::Edge const* Hypergraph::ViterbiGoalEdge() const
{
Edge const* r=0;
@@ -26,6 +27,64 @@ Hypergraph::Edge const* Hypergraph::ViterbiGoalEdge() const
}
return r;
}
+#endif
+
+Hypergraph::Edge const* Hypergraph::ViterbiSortInEdges()
+{
+ NodeProbs nv;
+ ComputeNodeViterbi(&nv);
+ return SortInEdgesByNodeViterbi(nv);
+}
+
+Hypergraph::Edge const* Hypergraph::SortInEdgesByNodeViterbi(NodeProbs const& nv)
+{
+ EdgeProbs ev;
+ ComputeEdgeViterbi(nv,&ev);
+ return ViterbiSortInEdges(ev);
+}
+
+namespace {
+struct less_ve {
+ Hypergraph::EdgeProbs const& ev;
+ //Hypergraph const& hg;
+ explicit less_ve(Hypergraph::EdgeProbs const& ev/*,Hypergraph const& hg */) : ev(ev)/*,hg(hg)*/ { }
+ bool operator()(int a,int b) const {
+ return ev[a] > ev[b];
+ }
+};
+}
+
+Hypergraph::Edge const* Hypergraph::ViterbiSortInEdges(EdgeProbs const& ev)
+{
+ for (int i=0;i<nodes_.size();++i) {
+ EdgesVector &ie=nodes_[i].in_edges_;
+ std::sort(ie.begin(),ie.end(),less_ve(ev));
+ }
+ return first_edge();
+}
+
+prob_t Hypergraph::ComputeEdgeViterbi(EdgeProbs *ev) const {
+ NodeProbs nv;
+ ComputeNodeViterbi(&nv);
+ return ComputeEdgeViterbi(nv,ev);
+}
+
+prob_t Hypergraph::ComputeEdgeViterbi(NodeProbs const& nv,EdgeProbs *ev) const {
+ for (int i=0;i<edges_.size();++i) {
+ Edge const& e=edges_[i];
+ prob_t r=e.edge_prob_;
+ TailNodeVector const& t=e.tail_nodes_;
+ for (TailNodeVector::const_iterator j=t.begin(),jj=t.end();j!=jj;++j)
+ r*=nv[*j];
+ /*
+ for (int j=0;j<e.tail_nodes_.size();++j)
+ r*=nv[e.tail_nodes_[j]];
+ */
+ (*ev)[i]=r;
+ }
+ return nv.empty()?prob_t(0):nv.back();
+}
+
std::string Hypergraph::stats(std::string const& name) const
{
@@ -659,14 +718,15 @@ struct EdgeWeightSorter {
}
*/
+std::string Hypergraph::show_first_tree(bool indent,int show_mask,int maxdepth,int depth) const {
+ EdgeHandle fe=first_edge();
+ return fe ? fe->derivation_tree(*this,fe,indent,show_mask,maxdepth,depth) : "";
+}
+
std::string Hypergraph::show_viterbi_tree(bool indent,int show_mask,int maxdepth,int depth) const {
HypergraphP v=CreateViterbiHypergraph();
// cerr<<viterbi_stats(*v,"debug show_viterbi_tree",true,true,false)<<endl;
- if (v->NumberOfEdges()) {
- Edge const* beste=&v->edges_.back(); //FIXME: this doesn't work. check CreateViterbiHypergraph ?
- return beste->derivation_tree(*v,beste,indent,show_mask,maxdepth,depth);
- }
- return std::string();
+ return v->show_first_tree(indent,show_mask,maxdepth,depth);
}
HypergraphP Hypergraph::CreateEdgeSubset(EdgeMask &keep_edges) const {
diff --git a/decoder/hg.h b/decoder/hg.h
index f426d32f..e64db837 100644
--- a/decoder/hg.h
+++ b/decoder/hg.h
@@ -197,24 +197,52 @@ public:
}
};
+ typedef std::vector<prob_t> EdgeProbs;
+ typedef std::vector<prob_t> NodeProbs;
+ typedef std::vector<bool> EdgeMask;
+ typedef std::vector<bool> NodeMask;
+
std::string show_viterbi_tree(bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const;
+// builds viterbi hg and returns it formatted as a pretty string
+
+ enum {
+ NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF
+ };
+
+ std::string show_first_tree(bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const;
+ // same as above, but takes in_edges_[0] all the way down - to make it viterbi cost (1-best), call ViterbiSortInEdges() first
typedef Edge const* EdgeHandle;
EdgeHandle operator()(int tailn,int /*taili*/,EdgeHandle /*parent*/) const {
- return viterbi_edge(tailn);
+ return first_edge(tailn);
}
- Edge const* viterbi_edge(int node) const { // even if edges are sorted by edgeweights, this doesn't give viterbi
+ Edge const* first_edge(int node) const { // only actually viterbi if ViterbiSortInEdges() called. otherwise it's just the first.
EdgesVector const& v=nodes_[node].in_edges_;
return v.empty() ? 0 : &edges_[v.front()];
}
- enum {
- NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF
- };
+ Edge const* first_edge() const {
+ int nn=nodes_.size();
+ return nn>=0?first_edge(nn-1):0;
+ }
+
+#if 0
// returns edge with rule_.IsGoal, returns 0 if none found. otherwise gives best edge_prob_ - note: I don't think edge_prob_ is viterbi cumulative, so this would just be the best local probability.
Edge const* ViterbiGoalEdge() const;
+#endif
+
+ int GoalNode() const { return nodes_.size()-1; } // by definition, and sorting of nodes in topo order (bottom up)
+
+// post: in_edges_ for each node is ordered by global viterbi. returns 1best goal node edge as well
+ Edge const* ViterbiSortInEdges();
+ Edge const* SortInEdgesByNodeViterbi(NodeProbs const& nv);
+ Edge const* ViterbiSortInEdges(EdgeProbs const& eviterbi);
+
+ prob_t ComputeNodeViterbi(NodeProbs *np) const;
+ prob_t ComputeEdgeViterbi(EdgeProbs *ev) const;
+ prob_t ComputeEdgeViterbi(NodeProbs const&np,EdgeProbs *ev) const;
void swap(Hypergraph& other) {
other.nodes_.swap(nodes_);
@@ -324,11 +352,6 @@ public:
}
}
- typedef std::vector<prob_t> EdgeProbs;
- typedef std::vector<prob_t> NodeProbs;
- typedef std::vector<bool> EdgeMask;
- typedef std::vector<bool> NodeMask;
-
// computes inside and outside scores for each
// edge in the hypergraph
// alpha->size = edges_.size = beta->size
@@ -379,8 +402,6 @@ public:
// contrary to PushWeightsToGoal, use viterbi semiring; store log(p) to fid. note that p_viterbi becomes 1; k*p_viterbi becomes k. also modifies edge_prob_ (note that the fid stored log(p) will stick around even if you reweight)
// afterwards, product of edge_prob_ for a derivation will equal 1 for the viterbi (p_v before, 1 after), and in general (k*p_v before, k after). returns inside(goal)
prob_t PushViterbiWeightsToGoal(int fid=0);
- prob_t ComputeNodeViterbi(NodeProbs *np) const;
-
// void SortInEdgesByEdgeWeights(); // local sort only - pretty useless
diff --git a/decoder/viterbi.cc b/decoder/viterbi.cc
index b21139df..6bcc97db 100644
--- a/decoder/viterbi.cc
+++ b/decoder/viterbi.cc
@@ -5,6 +5,8 @@
#include <vector>
#include "hg.h"
+#define DEBUG_VITERBI_SORT
+
using namespace std;
std::string viterbi_stats(Hypergraph const& hg, std::string const& name, bool estring, bool etree,bool show_derivation)
@@ -25,7 +27,12 @@ std::string viterbi_stats(Hypergraph const& hg, std::string const& name, bool es
o << hg.show_viterbi_tree(false); // last item should be goal (or at least depend on prev items). TODO: this doesn't actually reorder the nodes in hg.
o<<endl;
}
-
+#ifdef DEBUG_VITERBI_SORT
+ const_cast<Hypergraph&>(hg).ViterbiSortInEdges();
+ o<<name<<" (viterbi sort) first derivation: ";
+ o<<hg.show_first_tree(false);
+ o<<endl;
+#endif
return o.str();
}