safe hg pruning without needing additional inside reachability pass (max margin tightness is less at bottom of derivation tree)

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@181 ec762483-ff6d-05da-a07a-a48fb63a330f
author: graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-07 21:26:51 +0000
committer: graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-07 21:26:51 +0000
commit: c4cb48ad003de65f97e0a6013e9da4329c89faf1 (patch)
tree: 1d96a2224375bfbb0a8fb97c975147d37a4e324d
parent: ffe002f8792dd8693c12e9bc6a7f715ca170acfc (diff)
4 files changed, 52 insertions, 29 deletions
diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc
index 03dc2054..e6f7912e 100644
--- a/decoder/ff_lm.cc
+++ b/decoder/ff_lm.cc
@@ -1,3 +1,5 @@
+//TODO: backoff wordclasses for named entity xltns, esp. numbers.  e.g. digits -> @.  idealy rule features would specify replacement lm tokens/classes
+
 //TODO: extra int in state to hold "GAP" token is not needed.  if there are less than (N-1) words, then null terminate the e.g. left words.  however, this would mean treating gapless items differently.  not worth the potential bugs right now.
 
 //TODO: allow features to reorder by heuristic*weight the rules' terminal phrases (or of hyperedges').  if first pass has pruning, then compute over whole ruleset as part of heuristic
@@ -311,7 +313,7 @@ class LanguageModelImpl {
     double sum=0;
     for (;rend>rbegin;--rend) {
       sum+=clamp(WordProb(rend[-1],rend));
-      UNIDBG(","<<TD::Convert(rend[-1]));
+      UNIDBG(" "<<TD::Convert(rend[-1]));
     }
     UNIDBG(")="<<sum<<endl);
     return sum;
diff --git a/decoder/hg.cc b/decoder/hg.cc
index b6b9d8bd..70831d3d 100644
--- a/decoder/hg.cc
+++ b/decoder/hg.cc
@@ -129,7 +129,7 @@ void Hypergraph::PushWeightsToGoal(double scale) {
 }
 
 struct EdgeExistsWeightFunction {
-  EdgeExistsWeightFunction(const std::vector<bool>& prunes) : prunes_(prunes) {}
+  EdgeExistsWeightFunction(const vector<bool>& prunes) : prunes_(prunes) {}
   bool operator()(const Hypergraph::Edge& edge) const {
     return !prunes_[edge.id_];
   }
@@ -137,7 +137,7 @@ struct EdgeExistsWeightFunction {
   const vector<bool>& prunes_;
 };
 
-void Hypergraph::PruneEdges(const std::vector<bool>& prune_edge, bool run_inside_algorithm) {
+void Hypergraph::PruneEdges(const EdgeMask& prune_edge, bool run_inside_algorithm) {
   assert(prune_edge.size() == edges_.size());
   vector<bool> filtered = prune_edge;
 
@@ -175,18 +175,22 @@ void Hypergraph::PruneEdges(const std::vector<bool>& prune_edge, bool run_inside
   TopologicallySortNodesAndEdges(nodes_.size() - 1, &filtered);
 }
 
-void Hypergraph_finish_prune(Hypergraph &hg,vector<prob_t> const& io,double cutoff,vector<bool> const* preserve_mask,bool verbose=false)
+void Hypergraph::MarginPrune(vector<prob_t> const& io,prob_t cutoff,vector<bool> const* preserve_mask,bool safe_inside,bool verbose)
 {
-  const double EPSILON=1e-5;
+  const prob_t BARELY_SMALLER(1e-6,false); // nearly 1; 1-epsilon
   //TODO: //FIXME: if EPSILON is 0, then remnants (useless edges that don't connect to top? or top-connected but not bottom-up buildable referneced?) are left in the hypergraph output that cause mr_vest_map to segfault.  adding EPSILON probably just covers up the symptom by making it far less frequent; I imagine any time threshold is set by DensityPrune, cutoff is exactly equal to the io of several nodes, but because of how it's computed, some round slightly down vs. slightly up.  probably the flaw is in PruneEdges.
-  cutoff=cutoff-EPSILON;
-  vector<bool> prune(hg.NumberOfEdges());
+  int ne=NumberOfEdges();
+  cutoff*=BARELY_SMALLER;
+  prob_t creep=BARELY_SMALLER.root(-(ne+1)); // start more permissive, then become less generous.  this is barely more than 1.  we want to do this because it's a disaster if something lower in a derivation tree is deleted, but the higher thing remains (unless safe_inside)
+
+  vector<bool> prune(ne);
   if (verbose) {
     if (preserve_mask) cerr << preserve_mask->size() << " " << prune.size() << endl;
     cerr<<"Finishing prune for "<<prune.size()<<" edges; CUTOFF=" << cutoff << endl;
   }
   unsigned pc = 0;
   for (int i = 0; i < io.size(); ++i) {
+    cutoff*=creep;
     const bool prune_edge = (io[i] < cutoff);
     if (prune_edge) {
       ++pc;
@@ -195,13 +199,15 @@ void Hypergraph_finish_prune(Hypergraph &hg,vector<prob_t> const& io,double cuto
   }
   if (verbose)
     cerr << "Finished pruning; removed " << pc << "/" << io.size() << " edges\n";
-  hg.PruneEdges(prune,true); // inside reachability check in case cutoff rounded down too much (probably redundant with EPSILON hack)
+  PruneEdges(prune,safe_inside); // inside reachability check in case cutoff rounded down too much (probably redundant with EPSILON hack)
 }
 
 void Hypergraph::DensityPruneInsideOutside(const double scale,
                                            const bool use_sum_prod_semiring,
                                            const double density,
-                                           const vector<bool>* preserve_mask)
+                                           const vector<bool>* preserve_mask
+                                           , bool safe_inside
+  )
 {
   assert(density >= 1.0);
   const int plen = ViterbiPathLength(*this);
@@ -221,14 +227,15 @@ void Hypergraph::DensityPruneInsideOutside(const double scale,
   assert(edges_.size() == io.size());
   vector<prob_t> sorted = io;
   nth_element(sorted.begin(), sorted.begin() + rnum, sorted.end(), greater<prob_t>());
-  Hypergraph_finish_prune(*this,io,sorted[rnum],preserve_mask);
+  MarginPrune(io,sorted[rnum],preserve_mask,safe_inside);
 }
 
 void Hypergraph::BeamPruneInsideOutside(
     const double scale,
     const bool use_sum_prod_semiring,
     const double alpha,
-    const vector<bool>* preserve_mask) {
+    const vector<bool>* preserve_mask
+    ,bool safe_inside) {
   assert(alpha >= 0.0);
   vector<prob_t> io(edges_.size());
   if (use_sum_prod_semiring) {
@@ -240,7 +247,7 @@ void Hypergraph::BeamPruneInsideOutside(
   prob_t best;  // initializes to zero
   for (int i = 0; i < io.size(); ++i)
     if (io[i] > best) best = io[i];
-  Hypergraph_finish_prune(*this,io,best*exp(-alpha),preserve_mask);
+  MarginPrune(io,best*prob_t::exp(-alpha),preserve_mask,safe_inside);
 }
 
 void Hypergraph::PrintGraphviz() const {
diff --git a/decoder/hg.h b/decoder/hg.h
index e54b4bcc..ab90650c 100644
--- a/decoder/hg.h
+++ b/decoder/hg.h
@@ -132,20 +132,23 @@ class Hypergraph {
     }
   }
 
+  typedef std::vector<prob_t> EdgeProbs;
+  typedef std::vector<bool> EdgeMask;
+
   // computes inside and outside scores for each
   // edge in the hypergraph
   // alpha->size = edges_.size = beta->size
   // returns inside prob of goal node
   prob_t ComputeEdgePosteriors(double scale,
-                               std::vector<prob_t>* posts) const;
+                               EdgeProbs* posts) const;
 
   // find the score of the very best path passing through each edge
-  prob_t ComputeBestPathThroughEdges(std::vector<prob_t>* posts) const;
+  prob_t ComputeBestPathThroughEdges(EdgeProbs* posts) const;
 
   // create a new hypergraph consisting only of the nodes / edges
   // in the Viterbi derivation of this hypergraph
   // if edges is set, use the EdgeSelectEdgeWeightFunction
-  Hypergraph* CreateViterbiHypergraph(const std::vector<bool>* edges = NULL) const;
+  Hypergraph* CreateViterbiHypergraph(const EdgeMask* edges = NULL) const;
 
   // move weights as near to the source as possible, resulting in a
   // stochastic automaton.  ONLY FUNCTIONAL FOR *LATTICES*.
@@ -164,20 +167,20 @@ class Hypergraph {
   void RemoveNoncoaccessibleStates(int goal_node_id = -1);
 
   // remove edges from the hypergraph if prune_edge[edge_id] is true
-  // TODO need to investigate why this shouldn't be run for the forest trans
-  // case.  To investigate, change false to true and see where ftrans crashes
-  //TODO: what does the above "TODO" comment mean?  that PruneEdges can lead to a crash?  or that run_inside_algorithm should be false?  there definitely is an unsolved bug, see hg.cc - workaround added
-  void PruneEdges(const std::vector<bool>& prune_edge, bool run_inside_algorithm = false);
+  // note: if run_inside_algorithm is false, then consumers may be unhappy if you pruned nodes that are built on by nodes that are kept.
+  void PruneEdges(const EdgeMask& prune_edge, bool run_inside_algorithm = false);
 
   // for density>=1.0, keep this many times the edges needed for the 1best derivation
   // if you don't know, use_sum_prod_semiring should be false
-  void DensityPruneInsideOutside(const double scale, const bool use_sum_prod_semiring, const double density,
-                                 const std::vector<bool>* preserve_mask = NULL);
+  void DensityPruneInsideOutside(const double scale, const bool use_sum_prod_semiring, const double density,const EdgeMask* preserve_mask = NULL,bool safe_inside=false);
+
+  /// drop edge i if edge_margin[i] < prune_below, unless preserve_mask[i]
+  void MarginPrune(EdgeProbs const& edge_margin,prob_t prune_below,EdgeMask const* preserve_mask=0,bool safe_inside=false,bool verbose=false);
+  // safe_inside: if true, a theoretically redundant (but practically important .001% of the time due to rounding error) inside pruning pass will happen after max-marginal pruning.  if you don't do this, it's possible that the pruned hypergraph will contain outside-reachable (but not inside-buildable) nodes.  that is, a parent will be kept whose children were pruned.  if output, those forests may confuse (crash) e.g. mr_vest_map.  however, if the hyperedges occur in defined-before-use (all edges with head h occur before h is used as a tail) order, then a grace margin for keeping edges that starts leniently and becomes more forbidding will make it impossible for this to occur, i.e. safe_inside=true is not needed.
 
   // prunes any edge whose score on the best path taking that edge is more than alpha away
   // from the score of the global best past (or the highest edge posterior)
-  void BeamPruneInsideOutside(const double scale, const bool use_sum_prod_semiring, const double alpha,
-                              const std::vector<bool>* preserve_mask = NULL);
+  void BeamPruneInsideOutside(const double scale, const bool use_sum_prod_semiring, const double alpha,const EdgeMask* preserve_mask = NULL,bool safe_inside=false);
 
   // report nodes, edges, paths
   std::string stats(std::string const& name="forest") const;
@@ -204,7 +207,7 @@ class Hypergraph {
   // reorder nodes_ so they are in topological order
   // source nodes at 0 sink nodes at size-1
   void TopologicallySortNodesAndEdges(int goal_idx,
-                                      const std::vector<bool>* prune_edges = NULL);
+                                      const EdgeMask* prune_edges = NULL);
  private:
   Hypergraph(int num_nodes, int num_edges, bool is_lc) : is_linear_chain_(is_lc), nodes_(num_nodes), edges_(num_edges) {}
 
@@ -219,13 +222,14 @@ struct EdgeProb {
 };
 
 struct EdgeSelectEdgeWeightFunction {
-  EdgeSelectEdgeWeightFunction(const std::vector<bool>& v) : v_(v) {}
+  typedef std::vector<bool> EdgeMask;
+  EdgeSelectEdgeWeightFunction(const EdgeMask& v) : v_(v) {}
   inline prob_t operator()(const Hypergraph::Edge& e) const {
     if (v_[e.id_]) return prob_t::One();
     else return prob_t::Zero();
   }
  private:
-  const std::vector<bool>& v_;
+  const EdgeMask& v_;
 };
 
 struct ScaledEdgeProb {
diff --git a/decoder/logval.h b/decoder/logval.h
index 622b308e..457818e7 100644
--- a/decoder/logval.h
+++ b/decoder/logval.h
@@ -1,6 +1,8 @@
 #ifndef LOGVAL_H_
 #define LOGVAL_H_
 
+#define LOGVAL_CHECK_NEG_POW false
+
 #include <iostream>
 #include <cstdlib>
 #include <cmath>
@@ -11,9 +13,12 @@ class LogVal {
  public:
   LogVal() : s_(), v_(-std::numeric_limits<T>::infinity()) {}
   explicit LogVal(double x) : s_(std::signbit(x)), v_(s_ ? std::log(-x) : std::log(x)) {}
+  LogVal(double lnx,bool sign) : s_(sign),v_(lnx) {}
+  static LogVal<T> exp(T lnx) { return LogVal(lnx,false); }
+
   static LogVal<T> One() { return LogVal(1); }
   static LogVal<T> Zero() { return LogVal(); }
-
+  static LogVal<T> e() { return LogVal(1,false); }
   void logeq(const T& v) { s_ = false; v_ = v; }
 
   LogVal& operator+=(const LogVal& a) {
@@ -54,12 +59,13 @@ class LogVal {
   }
 
   LogVal& poweq(const T& power) {
+#if LOGVAL_CHECK_NEG_POW
     if (s_) {
       std::cerr << "poweq(T) not implemented when s_ is true\n";
       std::abort();
-    } else {
+    } else
+#endif
       v_ *= power;
-    }
     return *this;
   }
 
@@ -71,6 +77,10 @@ class LogVal {
     return res;
   }
 
+  LogVal root(const T& root) const {
+    return pow(1/root);
+  }
+
   operator T() const {
     if (s_) return -std::exp(v_); else return std::exp(v_);
   }
author	graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-07 21:26:51 +0000
committer	graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-07 21:26:51 +0000
commit	c4cb48ad003de65f97e0a6013e9da4329c89faf1 (patch)
tree	1d96a2224375bfbb0a8fb97c975147d37a4e324d
parent	ffe002f8792dd8693c12e9bc6a7f715ca170acfc (diff)