11 files changed, 434 insertions, 83 deletions
diff --git a/decoder/aligner.cc b/decoder/aligner.cc
index d498c22c..b089f52e 100644
--- a/decoder/aligner.cc
+++ b/decoder/aligner.cc
@@ -231,6 +231,7 @@ void AlignerTools::WriteAlignment(const Lattice& src_lattice,
                                   const vector<bool>* edges) {
   bool fix_up_src_spans = false;
   const Hypergraph* g = &in_g;
+  HypergraphP new_hg;
   if (!src_lattice.IsSentence() ||
       !trg_lattice.IsSentence()) {
     if (map_instead_of_viterbi) {
@@ -240,10 +241,10 @@ void AlignerTools::WriteAlignment(const Lattice& src_lattice,
     fix_up_src_spans = !src_lattice.IsSentence();
   }
   if (!map_instead_of_viterbi || edges) {
-    Hypergraph* new_hg = in_g.CreateViterbiHypergraph(edges);
+    new_hg = in_g.CreateViterbiHypergraph(edges);
     for (int i = 0; i < new_hg->edges_.size(); ++i)
       new_hg->edges_[i].edge_prob_ = prob_t::One();
-    g = new_hg;
+    g = new_hg.get();
   }
 
   vector<prob_t> edge_posteriors(g->edges_.size(), prob_t::Zero());
@@ -293,7 +294,8 @@ void AlignerTools::WriteAlignment(const Lattice& src_lattice,
       }
     }
   }
-  if (g != &in_g) { delete g; g = NULL; }
+  new_hg.reset();
+  //if (g != &in_g) { g.reset(); }
 
   prob_t threshold(0.9);
   const bool use_soft_threshold = true; // TODO configure
diff --git a/decoder/cdec.cc b/decoder/cdec.cc
index 9110a234..bc1c348d 100644
--- a/decoder/cdec.cc
+++ b/decoder/cdec.cc
@@ -102,7 +102,7 @@ void InitCommandLine(int argc, char** argv, OracleBleu &ob, po::variables_map* c
     ("prelm_weights",po::value<string>(),"Feature weights file for prelm_beam_prune.  Requires --weights.")
     ("prelm_copy_weights","use --weights as value for --prelm_weights.")
     ("prelm_feature_function",po::value<vector<string> >()->composing(),"Additional feature functions for prelm pass only (in addition to the 0-state subset of feature_function")
-    ("keep_prelm_cube_order","when forest rescoring with final models, use the edge ordering from the prelm pruning features*weights.  only meaningful if --prelm_weights given.  UNTESTED but assume that cube pruning gives a sensible result, and that 'good' (as tuned for bleu w/ prelm features) edges come first.")
+    ("keep_prelm_cube_order","DEPRECATED (always enabled).  when forest rescoring with final models, use the edge ordering from the prelm pruning features*weights.  only meaningful if --prelm_weights given.  UNTESTED but assume that cube pruning gives a sensible result, and that 'good' (as tuned for bleu w/ prelm features) edges come first.")
     ("warn_0_weight","Warn about any feature id that has a 0 weight (this is perfectly safe if you intend 0 weight, though)")
         ("no_freeze_feature_set,Z", "Do not freeze feature set after reading feature weights file")
         ("feature_function,F",po::value<vector<string> >()->composing(), "Additional feature function(s) (-L for list)")
@@ -577,7 +577,6 @@ int main(int argc, char** argv) {
     if (has_prelm_models) {
       Timer t("prelm rescoring");
       forest.Reweight(prelm_feature_weights);
-      forest.SortInEdgesByEdgeWeights();
       Hypergraph prelm_forest;
       ApplyModelSet(forest,
                     smeta,
@@ -595,8 +594,6 @@ int main(int argc, char** argv) {
     if (has_late_models) {
       Timer t("Forest rescoring:");
       forest.Reweight(feature_weights);
-      if (!has_prelm_models || conf.count("keep_prelm_cube_order"))
-        forest.SortInEdgesByEdgeWeights();
       Hypergraph lm_forest;
       ApplyModelSet(forest,
                     smeta,
diff --git a/decoder/ff_from_fsa.h b/decoder/ff_from_fsa.h
index 20e7c5ca..f9f707d7 100755
--- a/decoder/ff_from_fsa.h
+++ b/decoder/ff_from_fsa.h
@@ -5,9 +5,12 @@
 
 #define FSA_FF_DEBUG
 #ifdef FSA_FF_DEBUG
-# define FSAFFDBG(x) do { if (debug) { std::cerr << x; }  } while(0)
+# define FSAFFDBG(e,x) do { if (debug) { FSADBGae(e,x) }  } while(0)
+# define FSAFFDBGnl(e) do { if (debug) { std::cerr<<std::endl; INFO_EDGE(e,"; "); } } while(0)
+
 #else
-# define FSAFFDBG(x)
+# define FSAFFDBG(e,x)
+# define FSAFFDBGnl(e)
 #endif
 
 /* regular bottom up scorer from Fsa feature
@@ -29,7 +32,6 @@ class FeatureFunctionFromFsa : public FeatureFunction {
 public:
   FeatureFunctionFromFsa(std::string const& param) : ff(param) {
     debug=true; // because factory won't set until after we construct.
-    FSAFFDBG(ff.name()<<" params="<<param<<" calling Init: ");
     Init();
   }
 
@@ -48,7 +50,7 @@ public:
                              FeatureVector* estimated_features,
                              void* out_state) const
   {
-    FSAFFDBG("(FromFsa) "<<name);
+    FSAFFDBG(edge,"(FromFsa) "<<name);
     ff.init_features(features); // estimated_features is fresh
     if (!ssz) {
       TRule const& rule=*edge.rule_;
@@ -57,11 +59,11 @@ public:
         if (e[j] < 1) { // variable
         } else {
           WordID ew=e[j];
-          FSAFFDBG(' '<<TD::Convert(ew));
+          FSAFFDBG(edge,' '<<TD::Convert(ew));
           ff.Scan(smeta,edge,ew,0,0,features);
         }
       }
-      FSAFFDBG('\n');
+      FSAFFDBGnl(edge);
       return;
     }
 
@@ -75,7 +77,7 @@ public:
     for (int j = 0; j < e.size(); ++j) { // items in target side of rule
       if (e[j] < 1) { // variable
         SP a = ant_contexts[-e[j]];
-        FSAFFDBG(' '<<describe_state(a));
+        FSAFFDBG(edge,' '<<describe_state(a));
         WP al=(WP)a;
         WP ale=left_end(a);
         // scan(al,le) these - the same as below else.  macro for now; pull into closure object later?
@@ -100,7 +102,7 @@ public:
           fsa.reset(fsa_state(a));
       } else { // single word
         WordID ew=e[j];
-        FSAFFDBG(' '<<TD::Convert(ew));
+        FSAFFDBG(edge,' '<<TD::Convert(ew));
         // some redundancy: non-vectorized version of above handling of left words of child item
         if (left_out<left_full) {
           *left_out++=ew;
@@ -120,7 +122,8 @@ public:
       do { *left_out++=TD::none; } while(left_out<left_full); // none-terminate so left_end(out_state) will know how many words
     } else // or else store final right-state.  heuristic was already assigned
       fstatecpy(out_state,fsa.cs);
-    FSAFFDBG(" = " << describe_state(out_state)<<" "<<(*features)[ff.fid()]<<" h="<<(*estimated_features)[ff.fid()]<<'\n');
+    FSAFFDBG(edge," = " << describe_state(out_state)<<" "<<(*features)[ff.fid()]<<" h="<<(*estimated_features)[ff.fid()]);
+    FSAFFDBGnl(edge);
   }
 
   void print_state(std::ostream &o,void const*ant) const {
@@ -154,23 +157,24 @@ public:
     SP ss=ff.start_state();
     WP l=(WP)residual_state,lend=left_end(residual_state);
     SP rst=fsa_state(residual_state);
-    FSAFFDBG("(FromFsa) Final "<<name<< " before="<<*final_features);
+    FSAFFDBG(edge,"(FromFsa) Final "<<name<< " before="<<*final_features);
     if (lend==rst) { // implying we have an fsa state
       AccumFeatures(ff,smeta,edge,l,lend,final_features,ss); // e.g. <s> score(full left unscored phrase)
-      FSAFFDBG(" left: "<<ff.describe_state(ss)<<" -> "<<Sentence(l,lend));
+      FSAFFDBG(edge," left: "<<ff.describe_state(ss)<<" -> "<<Sentence(l,lend));
       AccumFeatures(ff,smeta,edge,begin(ends),end(ends),final_features,rst); // e.g. [ctx for last M words] score("</s>")
-      FSAFFDBG(" right: "<<ff.describe_state(rst)<<" -> "<<ends);
+      FSAFFDBG(edge," right: "<<ff.describe_state(rst)<<" -> "<<ends);
     } else { // all we have is a single short phrase < M words before adding ends
       int nl=lend-l;
       Sentence whole(ends.size()+nl);
       WordID *w=begin(whole);
       wordcpy(w,l,nl);
       wordcpy(w+nl,begin(ends),ends.size());
-      FSAFFDBG(" score whole sentence: "<<whole);
+      FSAFFDBG(edge," score whole sentence: "<<whole);
       // whole = left-words + end-phrase
       AccumFeatures(ff,smeta,edge,w,end(whole),final_features,ss);
     }
-    FSAFFDBG(" = "<<*final_features<<'\n');
+    FSAFFDBG(edge," = "<<*final_features);
+    FSAFFDBGnl(edge);
   }
 
   bool rule_feature() const {
@@ -195,7 +199,6 @@ private:
     state_offset=sizeof(WordID)*M;
     SetStateSize(ssz+state_offset);
     assert(!ssz == !M); // no fsa state <=> markov order 0
-    FSAFFDBG("order="<<M<<" fsa_state_offset="<<state_offset<<" fsa_state_bytes="<<ssz<<" ff_state_bytes="<<StateSize()<<'\n');
   }
   int M; // markov order (ctx len)
   FeatureFunctionFromFsa(); // not allowed.
diff --git a/decoder/ff_fsa.h b/decoder/ff_fsa.h
index 16f3142c..f48fac60 100755
--- a/decoder/ff_fsa.h
+++ b/decoder/ff_fsa.h
@@ -14,11 +14,14 @@
 
 //#define FSA_DEBUG
 
+# define FSADBGae(e,x) std::cerr << x; INFO_EDGE(e,x);
 #ifdef FSA_DEBUG
 # include <iostream>
-# define FSADBG(x) do { if (d().debug()) { std::cerr << x; } } while(0)
+# define FSADBG(e,x) do { if (d().debug()) { FSADBGae(e,x) } } while(0)
+# define FSADBGnl(e) do { if (d().debug) { std::cerr<<std::endl; INFO_EDGE(e,"; "); } } while(0)
 #else
-# define FSADBG(x)
+# define FSADBG(e,x)
+# define FSADBGnl(e)
 #endif
 
 #include <boost/lexical_cast.hpp>
@@ -249,9 +252,10 @@ public:
 
   inline void Scan(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,void const* st,void *next_state,FeatureVector *features) const {
     Impl const& im=d();
-    FSADBG("Scan "<<FD::Convert(im.fid_)<<" = "<<(*features)[im.fid_]<<" "<<im.state(st)<<" ->"<<TD::Convert(w)<<" ");
+    FSADBG(edge,"Scan "<<FD::Convert(im.fid_)<<" = "<<(*features)[im.fid_]<<" "<<im.state(st)<<" ->"<<TD::Convert(w)<<" ");
     im.ScanT(smeta,edge,w,im.state(st),im.state(next_state),features);
-    FSADBG(im.state(next_state)<<" = "<<(*features)[im.fid_]<<std::endl);
+    FSADBG(edge,im.state(next_state)<<" = "<<(*features)[im.fid_]);
+    FSADBGnl(edge);
   }
 
 };
diff --git a/decoder/hg.cc b/decoder/hg.cc
index e7a86c5b..8292639b 100644
--- a/decoder/hg.cc
+++ b/decoder/hg.cc
@@ -397,6 +397,7 @@ void Hypergraph::RemoveNoncoaccessibleStates(int goal_node_id) {
   assert(goal_node_id >= 0);
   assert(goal_node_id < nodes_.size());
 
+  // I don't get it: does TopologicallySortNodesAndEdges not remove things that don't connect to goal_index?  it uses goal_index just to order things?  InsideOutside pruning can do this anyway (nearly infinite beam, viterbi semiring)
   // TODO finish implementation
   abort();
 }
@@ -614,15 +615,100 @@ struct EdgeWeightSorter {
     return hg.edges_[a].edge_prob_ > hg.edges_[b].edge_prob_;
   }
 };
-
-void Hypergraph::SortInEdgesByEdgeWeights() {
+/*
+  void Hypergraph::SortInEdgesByEdgeWeights() {
   for (int i = 0; i < nodes_.size(); ++i) {
-    Node& node = nodes_[i];
-    sort(node.in_edges_.begin(), node.in_edges_.end(), EdgeWeightSorter(*this));
+  Node& node = nodes_[i];
+  sort(node.in_edges_.begin(), node.in_edges_.end(), EdgeWeightSorter(*this));
+  }
+  }
+*/
+
+std::string Hypergraph::show_viterbi_tree(bool indent,int show_mask,int maxdepth,int depth) const {
+  HypergraphP v=CreateViterbiHypergraph();
+  //FIXME: remove dbg print, fix.
+  cerr<<viterbi_stats(*this,"debug show_viterbi_tree",true,true,false)<<endl;
+  if (v->NumberOfEdges()) {
+    Edge const* beste=&v->edges_.back();   //FIXME: this doesn't work. check CreateViterbiHypergraph ?
+    return beste->derivation_tree(*this,beste,indent,show_mask,maxdepth,depth);
+  }
+  return std::string();
+}
+
+HypergraphP Hypergraph::CreateEdgeSubset(EdgeMask &keep_edges) const {
+  NodeMask kn;
+  return CreateEdgeSubset(keep_edges,kn);
+}
+
+HypergraphP Hypergraph::CreateNodeEdgeSubset(NodeMask const& keep_nodes,EdgeMask const&keep_edges) const {
+  indices_after n2(keep_nodes);
+  indices_after e2(keep_edges);
+  HypergraphP ret(new Hypergraph(n2.n_kept, e2.n_kept, is_linear_chain_));
+  Nodes &rn=ret->nodes_;
+  for (int i=0;i<nodes_.size();++i)
+    if (n2.keeping(i))
+      rn[n2[i]].copy_reindex(nodes_[i],n2,e2);
+  Edges &re=ret->edges_;
+  for (int i=0;i<edges_.size();++i)
+    if (e2.keeping(i))
+      re[e2[i]].copy_reindex(edges_[i],n2,e2);
+  return ret;
+}
+
+void Hypergraph::TightenEdgeMask(EdgeMask &ke,NodeMask const& kn) const
+{
+  for (int i = 0; i < edges_.size(); ++i) {
+    if (ke[i]) {
+      const Edge& edge = edges_[i];
+      TailNodeVector const& tails=edge.tail_nodes_;
+      for (int j = 0; j < edge.tail_nodes_.size(); ++j) {
+        if (!kn[tails[j]]) {
+          ke[i]=false;
+          goto next_edge;
+        }
+      }
+    }
+    next_edge:;
+  }
+}
+
+HypergraphP Hypergraph::CreateEdgeSubset(EdgeMask &keep_edges,NodeMask &kn) const {
+  kn.clear();
+  kn.resize(nodes_.size());
+  for (int n=0;n<nodes_.size();++n) { // this nested iteration gives us edges in topo order too
+    EdgesVector const& es=nodes_[n].in_edges_;
+    for (int i=0;i<es.size();++i) {
+      int ei=es[i];
+      const Edge& e = edges_[ei];
+      TailNodeVector const& tails=e.tail_nodes_;
+      for (int j=0;j<e.tail_nodes_.size();++j) {
+        if (!kn[tails[j]]) {
+          keep_edges[ei]=false;
+          goto next_edge;
+        }
+      }
+      kn[e.head_node_]=true;
+    next_edge: ;
+    }
   }
+  return CreateNodeEdgeSubset(kn,keep_edges);
+}
+
+void Hypergraph::set_ids() {
+  for (int i = 0; i < edges_.size(); ++i)
+    edges_[i].id_=i;
+  for (int i = 0; i < nodes_.size(); ++i)
+    nodes_[i].id_=i;
+}
+
+void Hypergraph::check_ids() {
+  for (int i = 0; i < edges_.size(); ++i)
+    assert(edges_[i].id_==i);
+  for (int i = 0; i < nodes_.size(); ++i)
+    assert(nodes_[i].id_==i);
 }
 
-Hypergraph* Hypergraph::CreateViterbiHypergraph(const vector<bool>* edges) const {
+HypergraphP Hypergraph::CreateViterbiHypergraph(const vector<bool>* edges) const {
   typedef ViterbiPathTraversal::Result VE;
   VE vit_edges;
   if (edges) {
@@ -631,18 +717,29 @@ Hypergraph* Hypergraph::CreateViterbiHypergraph(const vector<bool>* edges) const
   } else {
     Viterbi(*this, &vit_edges, ViterbiPathTraversal() ,EdgeProb());
   }
+#if 0
+# if 1
+    check_ids();
+# else
+    set_ids();
+# endif
+    EdgeMask used(edges_.size());
+    for (int i = 0; i < vit_edges.size(); ++i)
+      used[vit_edges[i]->id_]=true;
+    return CreateEdgeSubset(used);
+#else
   map<int, int> old2new_node;
   int num_new_nodes = 0;
   for (int i = 0; i < vit_edges.size(); ++i) {
     const Edge& edge = *vit_edges[i];
-    for (int j = 0; j < edge.tail_nodes_.size(); ++j)
-      assert(old2new_node.count(edge.tail_nodes_[j]) > 0);
+    for (int j = 0; j < edge.tail_nodes_.size(); ++j) assert(old2new_node.count(edge.tail_nodes_[j]) > 0);
     if (old2new_node.count(edge.head_node_) == 0) {
       old2new_node[edge.head_node_] = num_new_nodes;
       ++num_new_nodes;
     }
   }
-  Hypergraph* out = new Hypergraph(num_new_nodes, vit_edges.size(), is_linear_chain_);
+  HypergraphP ret(new Hypergraph(num_new_nodes, vit_edges.size(), is_linear_chain_));
+  Hypergraph* out = ret.get();
   for (map<int, int>::iterator it = old2new_node.begin();
        it != old2new_node.end(); ++it) {
     const Node& old_node = nodes_[it->first];
@@ -665,6 +762,7 @@ Hypergraph* Hypergraph::CreateViterbiHypergraph(const vector<bool>* edges) const
       out->nodes_[new_tail_node].out_edges_.push_back(i);
     }
   }
-  return out;
+#endif
+  return ret;
 }
 
diff --git a/decoder/hg.h b/decoder/hg.h
index 34638e04..90ae0935 100644
--- a/decoder/hg.h
+++ b/decoder/hg.h
@@ -1,11 +1,13 @@
 #ifndef _HG_H_
 #define _HG_H_
 
+
+//FIXME: is the edge given to ffs the coarse (previous forest) edge?  if so, then INFO_EDGE is effectively not working.  supposed to have logging associated with each edge and see how it fits together in kbest afterwards.
 #define USE_INFO_EDGE 1
 #if USE_INFO_EDGE
 # include <sstream>
-# define INFO_EDGE(e,msg) do { std::ostringstream &o=e.info_;o<<msg; } while(0)
-# define INFO_EDGEw(e,msg) do { std::ostringstream &o=e.info_;if (o.empty()) o<<' ';o<<msg; } while(0)
+# define INFO_EDGE(e,msg) do { std::ostringstream &o=const_cast<std::ostringstream &>(e.info_);o<<msg; } while(0)
+# define INFO_EDGEw(e,msg) do { std::ostringstream &o=const_cast<std::ostringstream &>(e.info_);if (o.empty()) o<<' ';o<<msg; } while(0)
 #else
 # define INFO_EDGE(e,msg)
 # define INFO_EDGEw(e,msg)
@@ -14,6 +16,7 @@
 
 #include <string>
 #include <vector>
+#include <boost/shared_ptr.hpp>
 
 #include "feature_vector.h"
 #include "small_vector.h"
@@ -21,16 +24,20 @@
 #include "tdict.h"
 #include "trule.h"
 #include "prob.h"
+#include "indices_after.h"
 
 // if you define this, edges_ will be sorted
 // (normally, just nodes_ are - root must be nodes_.back()), but this can be quite
 // slow
 #undef HG_EDGES_TOPO_SORTED
 
+class Hypergraph;
+typedef boost::shared_ptr<Hypergraph> HypergraphP;
+
 // class representing an acyclic hypergraph
 //  - edges have 1 head, 0..n tails
 class Hypergraph {
- public:
+public:
   Hypergraph() : is_linear_chain_(false) {}
 
   // SmallVector is a fast, small vector<int> implementation for sizes <= 2
@@ -43,9 +50,20 @@ class Hypergraph {
     Node() : id_(), cat_(), promise(1) {}
     int id_; // equal to this object's position in the nodes_ vector
     WordID cat_;  // non-terminal category if <0, 0 if not set
-    std::vector<int> in_edges_;   // contents refer to positions in edges_
-    std::vector<int> out_edges_;  // contents refer to positions in edges_
+    EdgesVector in_edges_;   // contents refer to positions in edges_
+    EdgesVector out_edges_;  // contents refer to positions in edges_
     double promise; // set in global pruning; in [0,infty) so that mean is 1.  use: e.g. scale cube poplimit
+    void copy_fixed(Node const& o) { // nonstructural fields only - structural ones are managed by sorting/pruning/subsetting
+      cat_=o.cat_;
+      promise=o.promise;
+    }
+    void copy_reindex(Node const& o,indices_after const& n2,indices_after const& e2) {
+      copy_fixed(o);
+      id_=n2[id_];
+      e2.copy_to(in_edges_,o.in_edges_);
+      e2.copy_to(out_edges_,o.out_edges_);
+    }
+
   };
 
   // TODO get rid of edge_prob_? (can be computed on the fly as the dot
@@ -72,14 +90,30 @@ class Hypergraph {
     short int j_;
     short int prev_i_;
     short int prev_j_;
+
+    void copy_fixed(Edge const& o) {
+      rule_=o.rule_;
+      feature_values_ = o.feature_values_;
+      edge_prob_ = o.edge_prob_;
+      i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_;
+#if USE_INFO_EDGE
+      info_.str(o.info_.str());
+#endif
+    }
+    void copy_reindex(Edge const& o,indices_after const& n2,indices_after const& e2) {
+      copy_fixed(o);
+      head_node_=n2[o.head_node_];
+      id_=e2[o.id_];
+      n2.copy_to(tail_nodes_,o.tail_nodes_);
+    }
+
 #if USE_INFO_EDGE
- private:
-    std::ostringstream info_;
- public:
+    mutable std::ostringstream info_;
+
     Edge(Edge const& o) : head_node_(o.head_node_),tail_nodes_(o.tail_nodes_),rule_(o.rule_),feature_values_(o.feature_values_),edge_prob_(o.edge_prob_),id_(o.id_),i_(o.i_),j_(o.j_),prev_i_(o.prev_i_),prev_j_(o.prev_j_), info_(o.info_.str()) {  }
     void operator=(Edge const& o) {
       head_node_ = o.head_node_; tail_nodes_ = o.tail_nodes_; rule_ = o.rule_; feature_values_ = o.feature_values_; edge_prob_ = o.edge_prob_; id_ = o.id_; i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_;  info_.str(o.info_.str());
-        }
+    }
     std::string info() const { return info_.str(); }
 #else
     std::string info() const { return std::string(); }
@@ -109,9 +143,47 @@ class Hypergraph {
       show(o,mask);
       return o.str();
     }
+    /* generic recursion re: child_handle=re(tail_nodes_[i],i,parent_handle)
+
+       FIXME: make kbest create a simple derivation-tree structure (could be a
+       hg), and replace the list-of-edges viterbi.h with a tree-structured one.
+       CreateViterbiHypergraph can do for 1best, though.
+    */
+    template <class EdgeRecurse,class EdgeHandle>
+    std::string derivation_tree(EdgeRecurse const& re,EdgeHandle const& eh,bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const {
+      std::ostringstream o;
+      derivation_tree_stream(re,eh,o,indent,show_mask,maxdepth,depth);
+      return o.str();
+    }
+    template <class EdgeRecurse,class EdgeHandle>
+    void derivation_tree_stream(EdgeRecurse const& re,EdgeHandle const& eh,std::ostream &o,bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const {
+      if (depth>maxdepth) return;
+      if (indent) for (int i=0;i<depth;++i) o<<' ';
+      o<<'(';
+      show(o,show_mask);
+      if (indent) o<<'\n';
+      for (int i=0;i<tail_nodes_.size();++i) {
+        EdgeHandle c=re(tail_nodes_[i],i,eh);
+        Edge const* cp=c;
+        if (cp) {
+          cp->derivation_tree_stream(re,c,o,indent,show_mask,maxdepth,depth+1);
+          if (!indent) o<<' ';
+        }
+      }
+      if (indent) for (int i=0;i<depth;++i) o<<' ';
+      o<<")";
+      if (indent) o<<"\n";
+    }
   };
 
-  Edge const* viterbi_edge(int node) const { // assumes sorting has best edge as first.  FIXME: check.  SortInEdgesByEdgeWeights appears to accomplish this
+  std::string show_viterbi_tree(bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const;
+
+  typedef Edge const* EdgeHandle;
+  EdgeHandle operator()(int tailn,int /*taili*/,EdgeHandle /*parent*/) const {
+    return viterbi_edge(tailn);
+  }
+
+  Edge const* viterbi_edge(int node) const { // even if edges are sorted by edgeweights, this doesn't give viterbi
     EdgesVector const& v=nodes_[node].in_edges_;
     return v.empty() ? 0 : &edges_[v.front()];
   }
@@ -119,28 +191,6 @@ class Hypergraph {
   enum {
     NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF
   };
-  std::string show_tree(Edge const& e, bool indent=true,unsigned show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const {
-    std::ostringstream o;
-    show_tree(o,e,show_mask,indent,maxdepth,depth);
-    return o.str();
-  }
-  void show_tree(std::ostream &o,Edge const& e,bool indent=true,unsigned show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const {
-    if (depth>maxdepth) return;
-    if (indent) for(int i=0;i<depth;++i) o<<' ';
-    o<<'(';
-    e.show(o,show_mask);
-    if (indent) o<<'\n';
-    for (int i=0;i<e.tail_nodes_.size();++i) {
-      Edge const *c=viterbi_edge(e.tail_nodes_[i]);
-      if (!c) continue;
-      assert(c); // leaf?
-      show_tree(o,*c,indent,show_mask,maxdepth,depth+1);
-      if (!indent) o<<' ';
-    }
-    if (indent) for(int i=0;i<depth;++i) o<<' ';
-    o<<")";
-    if (indent) o<<"\n";
-  }
 
   // returns edge with rule_.IsGoal, returns 0 if none found.  otherwise gives best edge_prob_ - note: I don't think edge_prob_ is viterbi cumulative, so this would just be the best local probability.
   Edge const* ViterbiGoalEdge() const;
@@ -220,6 +270,7 @@ class Hypergraph {
 
   typedef std::vector<prob_t> EdgeProbs;
   typedef std::vector<bool> EdgeMask;
+  typedef std::vector<bool> NodeMask;
 
   // computes inside and outside scores for each
   // edge in the hypergraph
@@ -231,10 +282,33 @@ class Hypergraph {
   // find the score of the very best path passing through each edge
   prob_t ComputeBestPathThroughEdges(EdgeProbs* posts) const;
 
+
+  /* for all of the below subsets, the hg Nodes must be topo sorted already*/
+
+  // keep_nodes is an output-only param, keep_edges is in-out (more may be pruned than you asked for if the tails can't be built)
+  HypergraphP CreateEdgeSubset(EdgeMask & keep_edges_in_out,NodeMask &keep_nodes_out) const;
+  HypergraphP CreateEdgeSubset(EdgeMask & keep_edges) const;
+  // node keeping is final (const), edge keeping is an additional constraint which will sometimes be made stricter (and output back to you) by the absence of nodes.  you may end up with some empty nodes.  that is, kept edges will be made consistent with kept nodes first (but empty nodes are allowed)
+  HypergraphP CreateNodeSubset(NodeMask const& keep_nodes,EdgeMask &keep_edges_in_out) const {
+    TightenEdgeMask(keep_edges_in_out,keep_nodes);
+    return CreateNodeEdgeSubset(keep_nodes,keep_edges_in_out);
+  }
+  HypergraphP CreateNodeSubset(NodeMask const& keep_nodes) const {
+    EdgeMask ke(edges_.size(),true);
+    return CreateNodeSubset(keep_nodes,ke);
+  }
+
+  void TightenEdgeMask(EdgeMask &ke,NodeMask const& kn) const;
+  // kept edges are consistent with kept nodes already:
+  HypergraphP CreateNodeEdgeSubset(NodeMask const& keep_nodes,EdgeMask const&keep_edges_in_out) const;
+  HypergraphP CreateNodeEdgeSubset(NodeMask const& keep_nodes) const;
+
+
   // create a new hypergraph consisting only of the nodes / edges
   // in the Viterbi derivation of this hypergraph
   // if edges is set, use the EdgeSelectEdgeWeightFunction
-  Hypergraph* CreateViterbiHypergraph(const EdgeMask* edges = NULL) const;
+  // NOTE: last edge/node index are goal
+  HypergraphP CreateViterbiHypergraph(const EdgeMask* edges = NULL) const;
 
   // move weights as near to the source as possible, resulting in a
   // stochastic automaton.  ONLY FUNCTIONAL FOR *LATTICES*.
@@ -246,7 +320,7 @@ class Hypergraph {
   // not just lattices
   void PushWeightsToGoal(double scale = 1.0);
 
-  void SortInEdgesByEdgeWeights();
+//  void SortInEdgesByEdgeWeights(); // local sort only - pretty useless
 
   void PruneUnreachable(int goal_node_id); // DEPRECATED
 
@@ -301,21 +375,28 @@ class Hypergraph {
   bool is_linear_chain_;
 
   // nodes_ is sorted in topological order
-  std::vector<Node> nodes_;
+  typedef std::vector<Node> Nodes;
+  Nodes nodes_;
   // edges_ is not guaranteed to be in any particular order
-  std::vector<Edge> edges_;
+  typedef std::vector<Edge> Edges;
+  Edges edges_;
 
   // reorder nodes_ so they are in topological order
   // source nodes at 0 sink nodes at size-1
   void TopologicallySortNodesAndEdges(int goal_idx,
                                       const EdgeMask* prune_edges = NULL);
- private:
+
+  void set_ids(); // resync edge,node .id_
+  void check_ids(); // assert that .id_ have been kept in sync
+
+private:
   Hypergraph(int num_nodes, int num_edges, bool is_lc) : is_linear_chain_(is_lc), nodes_(num_nodes), edges_(num_edges) {}
 
   static TRulePtr kEPSRule;
   static TRulePtr kUnaryRule;
 };
 
+
 // common WeightFunctions, map an edge -> WeightType
 // for generic Viterbi/Inside algorithms
 struct EdgeProb {
@@ -331,7 +412,7 @@ struct EdgeSelectEdgeWeightFunction {
     if (v_[e.id_]) return prob_t::One();
     else return prob_t::Zero();
   }
- private:
+private:
   const EdgeMask& v_;
 };
 
diff --git a/decoder/indices_after.h b/decoder/indices_after.h
new file mode 100755
index 00000000..ad94c576
--- /dev/null
+++ b/decoder/indices_after.h
@@ -0,0 +1,152 @@
+#ifndef INDICES_AFTER_REMOVING_H
+#define INDICES_AFTER_REMOVING_H
+
+#include <boost/config.hpp> // STATIC_CONSTANT
+#include <algorithm> //swap
+
+// iterator wrapper.  inverts boolean value.
+template <class AB>
+struct keep_not_marked
+{
+    typedef keep_not_marked<AB> self_type;
+    AB i;
+    bool operator *() const
+    {
+        return !*i;
+    }
+    void operator++()
+    {
+        ++i;
+    }
+    bool operator ==(self_type const& o)
+    {
+        return i==o.i;
+    }
+};
+
+// outputs sequence to out iterator, of new indices for each element i, corresponding to deleting element i from an array when remove[i] is true (-1 if it was deleted, new index otherwise), returning one-past-end of out (the return value = # of elements left after deletion)
+template <class KEEP,class KEEPe,class O>
+unsigned new_indices_keep(KEEP i, KEEPe end,O out) {
+    unsigned f=0;
+    while (i!=end)
+      *out++ = *i++ ? f++ : (unsigned)-1;
+    return f;
+};
+
+template <class It,class KeepIf,class O>
+unsigned new_indices_keep_if_n(unsigned n,It i,KeepIf const& r,O out)
+{
+    unsigned f=0;
+    while(n--)
+      *out++ = r(*i++) ? f++ : (unsigned)-1;
+    return f;
+}
+
+template <class KEEP,class O>
+unsigned new_indices(KEEP keep,O out) {
+  return new_indices(keep.begin(),keep.end(),out);
+}
+
+// given a vector and a parallel sequence of bools where true means keep, keep only the marked elements while maintaining order
+struct indices_after
+{
+  BOOST_STATIC_CONSTANT(unsigned,REMOVED=(unsigned)-1);
+  unsigned *map; // map[i] == REMOVED if i is deleted
+  unsigned n_kept;
+  unsigned n_mapped;
+  template <class AB,class ABe>
+  indices_after(AB i, ABe end) {
+    init(i,end);
+  }
+
+  template <class Vec,class R>
+  void init_keep_if(Vec v,R const& r)
+  {
+    n_mapped=v.size();
+    if ( !n_mapped ) return;
+    map=(unsigned *)::operator new(sizeof(unsigned)*n_mapped);
+    n_kept=new_indices_keep_if_n(n_mapped,r,map);
+  }
+
+  template <class AB,class ABe>
+  void init(AB i, ABe end) {
+    n_mapped=end-i;
+    if (n_mapped>0) {
+      map=(unsigned *)::operator new(sizeof(unsigned)*n_mapped);
+      n_kept=new_indices_keep(i,end,map);
+    } else
+      map=NULL;
+  }
+  template <class A>
+  void init(A const& a)
+  {
+    init(a.begin(),a.end());
+  }
+
+  template <class A>
+  indices_after(A const& a)
+  {
+    init(a.begin(),a.end());
+  }
+  indices_after() : n_mapped(0) {}
+  ~indices_after()
+  {
+    if (n_mapped)
+      ::operator delete((void*)map);
+  }
+  bool removing(unsigned i) const
+  {
+    return map[i] == REMOVED;
+  }
+  bool keeping(unsigned i) const
+  {
+    return map[i] != REMOVED;
+  }
+
+  unsigned operator[](unsigned i) const
+  {
+    return map[i];
+  }
+
+  template <class Vec>
+  void do_moves(Vec &v) const
+  {
+    assert(v.size()==n_mapped);
+    unsigned i=0;
+    for (;i<n_mapped&&keeping(i);++i) ;
+    for(;i<n_mapped;++i)
+      if (keeping(i))
+        v[map[i]]=v[i];
+    v.resize(n_kept);
+  }
+
+  template <class Vec>
+  void do_moves_swap(Vec &v) const
+  {
+    using std::swap;
+    assert(v.size()==n_mapped);
+    unsigned r=n_mapped;
+    unsigned i=0;
+    for (;i<n_mapped&&keeping(i);++i) ;
+    for(;i<n_mapped;++i)
+      if (keeping(i))
+        std::swap(v[map[i]],v[i]);
+    v.resize(n_kept);
+  }
+
+  template <class Vecto,class Vecfrom>
+  void copy_to(Vecto &to,Vecfrom const& v) const {
+    to.resize(n_kept);
+    for (unsigned i=0;i<n_mapped;++i)
+      if (keeping(i))
+        to[map[i]]=v[i];
+  }
+
+private:
+  indices_after(indices_after const& o)
+  {
+    map=NULL;
+  }
+};
+
+#endif
diff --git a/decoder/kbest.h b/decoder/kbest.h
index 35e79bcc..b6f55502 100644
--- a/decoder/kbest.h
+++ b/decoder/kbest.h
@@ -77,6 +77,24 @@ namespace KBest {
         return a->score > b->score;
       }
     };
+
+    struct EdgeHandle {
+      Derivation const* d;
+      explicit EdgeHandle(Derivation const* d) : d(d) {  }
+//      operator bool() const { return d->edge; }
+      operator Hypergraph::Edge const* () const { return d->edge; }
+//      const Hypergraph::Edge* operator ->() const { return d->edge; }
+    };
+
+//    typedef Derivation EdgeHandle; // will cause copying, below but not performance critical; change to actual handle if you want
+    EdgeHandle operator()(int t,int taili,EdgeHandle const& parent) const {
+      return EdgeHandle(nds[t].D[parent.d->j[taili]]);
+    }
+
+    std::string derivation_tree(Derivation const& d,bool indent=true,int show_mask=Hypergraph::SPAN|Hypergraph::RULE,int maxdepth=0x7FFFFFFF,int depth=0) const {
+      return d.edge->derivation_tree(*this,EdgeHandle(&d),indent,show_mask,maxdepth,depth);
+    }
+
     struct DerivationUniquenessHash {
       size_t operator()(const Derivation* d) const {
         size_t x = 5381;
diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h
index 4a2cbbe5..fbb681e0 100755
--- a/decoder/oracle_bleu.h
+++ b/decoder/oracle_bleu.h
@@ -246,7 +246,6 @@ struct OracleBleu {
   void ReweightBleu(Hypergraph *dest_forest,double bleu_weight=-1.) {
     feature_weights_.set_value(0,bleu_weight);
 	dest_forest->Reweight(feature_weights_);
-//	dest_forest->SortInEdgesByEdgeWeights();
   }
 
   bool show_derivation;
@@ -272,7 +271,7 @@ struct OracleBleu {
       kbest_out<<endl<<flush;
       if (show_derivation) {
         deriv_out<<"\nsent_id="<<sent_id<<"\n";
-        forest.show_tree(cerr,*d->edge);
+        deriv_out<<kbest.derivation_tree(*d,true);
         deriv_out<<flush;
       }
     }
diff --git a/decoder/small_vector.h b/decoder/small_vector.h
index 4183cde2..7ed99e77 100644
--- a/decoder/small_vector.h
+++ b/decoder/small_vector.h
@@ -57,7 +57,8 @@ class SmallVector {
 
   SmallVector(const Self& o) : size_(o.size_) {
     if (size_ <= SV_MAX) {
-      for (int i = 0; i < SV_MAX; ++i) data_.vals[i] = o.data_.vals[i];
+      std::memcpy(data_.vals,o.data_.vals,size_*sizeof(T));
+//      for (int i = 0; i < size_; ++i) data_.vals[i] = o.data_.vals[i];
     } else {
       capacity_ = size_ = o.size_;
       data_.ptr = new T[capacity_];
diff --git a/decoder/viterbi.cc b/decoder/viterbi.cc
index d0b7e6ec..7214c600 100644
--- a/decoder/viterbi.cc
+++ b/decoder/viterbi.cc
@@ -13,20 +13,16 @@ std::string viterbi_stats(Hypergraph const& hg, std::string const& name, bool es
   if (estring) {
     vector<WordID> trans;
     const prob_t vs = ViterbiESentence(hg, &trans);
-    o<<name<<"       Viterbi: "<<log(vs)<<endl;
+    o<<name<<"  Viterbi logp: "<<log(vs)<<endl;
     o<<name<<"       Viterbi: "<<TD::GetString(trans)<<endl;
   }
   if (etree) {
     o<<name<<"          tree: "<<ViterbiETree(hg)<<endl;
   }
+  //FIXME: this doesn't work.
   if (show_derivation) {
     o<<name<<"          derivation: ";
-    ViterbiPathTraversal::Result d;
-    Viterbi<ViterbiPathTraversal>(hg, &d);
-    if (d.empty())
-      o<<"(empty viterbi hyperpath - no translation)";
-    else
-      hg.show_tree(o,*d.back(),false); // last item should be goal (or at least depend on prev items).  TODO: this doesn't actually reorder the nodes in hg.
+    o << hg.show_viterbi_tree(false); // last item should be goal (or at least depend on prev items).  TODO: this doesn't actually reorder the nodes in hg.
     o<<endl;
   }