5 files changed, 83 insertions, 127 deletions
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index a69a6d05..47b298b9 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -871,13 +871,13 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
     if (rp.fid_summary) {
       if (summary_feature_type == kEDGE_PROB) {
         const prob_t z = forest.PushWeightsToGoal(1.0);
-        if (!isfinite(log(z)) || isnan(log(z))) {
+        if (!std::isfinite(log(z)) || std::isnan(log(z))) {
           cerr << "  " << passtr << " !!! Invalid partition detected, abandoning.\n";
         } else {
           for (int i = 0; i < forest.edges_.size(); ++i) {
             const double log_prob_transition = log(forest.edges_[i].edge_prob_); // locally normalized by the edge
                                                                               // head node by forest.PushWeightsToGoal
-            if (!isfinite(log_prob_transition) || isnan(log_prob_transition)) {
+            if (!std::isfinite(log_prob_transition) || std::isnan(log_prob_transition)) {
               cerr << "Edge: i=" << i << " got bad inside prob: " << *forest.edges_[i].rule_ << endl;
               abort();
             }
@@ -889,7 +889,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
       } else if (summary_feature_type == kNODE_RISK) {
         Hypergraph::EdgeProbs posts;
         const prob_t z = forest.ComputeEdgePosteriors(1.0, &posts);
-        if (!isfinite(log(z)) || isnan(log(z))) {
+        if (!std::isfinite(log(z)) || std::isnan(log(z))) {
           cerr << "  " << passtr << " !!! Invalid partition detected, abandoning.\n";
         } else {
           for (int i = 0; i < forest.nodes_.size(); ++i) {
@@ -898,7 +898,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
             for (int j = 0; j < in_edges.size(); ++j)
               node_post += (posts[in_edges[j]] / z);
             const double log_np = log(node_post);
-            if (!isfinite(log_np) || isnan(log_np)) {
+            if (!std::isfinite(log_np) || std::isnan(log_np)) {
               cerr << "got bad posterior prob for node " << i << endl;
               abort();
             }
@@ -913,13 +913,13 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
       } else if (summary_feature_type == kEDGE_RISK) {
         Hypergraph::EdgeProbs posts;
         const prob_t z = forest.ComputeEdgePosteriors(1.0, &posts);
-        if (!isfinite(log(z)) || isnan(log(z))) {
+        if (!std::isfinite(log(z)) || std::isnan(log(z))) {
           cerr << "  " << passtr << " !!! Invalid partition detected, abandoning.\n";
         } else {
           assert(posts.size() == forest.edges_.size());
           for (int i = 0; i < posts.size(); ++i) {
             const double log_np = log(posts[i] / z);
-            if (!isfinite(log_np) || isnan(log_np)) {
+            if (!std::isfinite(log_np) || std::isnan(log_np)) {
               cerr << "got bad posterior prob for node " << i << endl;
               abort();
             }
@@ -1090,7 +1090,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
           cerr << "DIFF. ERR! log_z < log_ref_z: " << log_z << " " << log_ref_z << endl;
           exit(1);
         }
-        assert(!isnan(log_ref_z));
+        assert(!std::isnan(log_ref_z));
         ref_exp -= full_exp;
         acc_vec += ref_exp;
         acc_obj += (log_z - log_ref_z);
diff --git a/decoder/ff.cc b/decoder/ff.cc
index 557e0b5f..008fcad4 100644
--- a/decoder/ff.cc
+++ b/decoder/ff.cc
@@ -175,7 +175,7 @@ void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta,
                                  Hypergraph::Edge* edge,
                                  FFState* context,
                                  prob_t* combination_cost_estimate) const {
-  edge->reset_info();
+  //edge->reset_info();
   context->resize(state_size_);
   if (state_size_ > 0) {
     memset(&(*context)[0], 0, state_size_);
@@ -203,7 +203,7 @@ void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta,
 
 void ModelSet::AddFinalFeatures(const FFState& state, Hypergraph::Edge* edge,SentenceMetadata const& smeta) const {
   assert(1 == edge->rule_->Arity());
-  edge->reset_info();
+  //edge->reset_info();
   for (int i = 0; i < models_.size(); ++i) {
     const FeatureFunction& ff = *models_[i];
     const void* ant_state = NULL;
diff --git a/decoder/hg.h b/decoder/hg.h
index 6d67f2fa..f53d2fd2 100644
--- a/decoder/hg.h
+++ b/decoder/hg.h
@@ -33,47 +33,20 @@
 // slow
 #undef HG_EDGES_TOPO_SORTED
 
-class Hypergraph;
-typedef boost::shared_ptr<Hypergraph> HypergraphP;
-
-// class representing an acyclic hypergraph
-//  - edges have 1 head, 0..n tails
-class Hypergraph {
-public:
-  Hypergraph() : is_linear_chain_(false) {}
+// SmallVector is a fast, small vector<int> implementation for sizes <= 2
+typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_
+typedef std::vector<int> EdgesVector; // indices in edges_
 
-  // SmallVector is a fast, small vector<int> implementation for sizes <= 2
-  typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_
-  typedef std::vector<int> EdgesVector; // indices in edges_
-
-  // TODO get rid of cat_?
-  // TODO keep cat_ and add span and/or state? :)
-  struct Node {
-    Node() : id_(), cat_() {}
-    int id_; // equal to this object's position in the nodes_ vector
-    WordID cat_;  // non-terminal category if <0, 0 if not set
-    WordID NT() const { return -cat_; }
-    EdgesVector in_edges_;   // an in edge is an edge with this node as its head.  (in edges come from the bottom up to us)  indices in edges_
-    EdgesVector out_edges_;  // an out edge is an edge with this node as its tail.  (out edges leave us up toward the top/goal). indices in edges_
-    void copy_fixed(Node const& o) { // nonstructural fields only - structural ones are managed by sorting/pruning/subsetting
-      cat_=o.cat_;
-    }
-    void copy_reindex(Node const& o,indices_after const& n2,indices_after const& e2) {
-      copy_fixed(o);
-      id_=n2[id_];
-      e2.reindex_push_back(o.in_edges_,in_edges_);
-      e2.reindex_push_back(o.out_edges_,out_edges_);
-    }
-  };
+enum {
+  NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF
+};
 
+namespace HG {
 
-  // TODO get rid of edge_prob_? (can be computed on the fly as the dot
-  // product of the weight vector and the feature values)
   struct Edge {
-//    int poplimit; //TODO: cube pruning per edge limit?  per node didn't work well at all.  also, inside cost + outside(node) is the same information i'd use to set a per-edge limit anyway - and nonmonotonicity in cube pruning may mean it's good to favor edge (in same node) w/ relatively worse score
     Edge() : i_(-1), j_(-1), prev_i_(-1), prev_j_(-1) {}
     Edge(int id,Edge const& copy_pod_from) : id_(id) { copy_pod(copy_pod_from); } // call copy_features yourself later.
-    Edge(int id,Edge const& copy_from,TailNodeVector const& tail) // fully inits - probably more expensive when push_back(Edge(...)) than setting after
+    Edge(int id,Edge const& copy_from,TailNodeVector const& tail) // fully inits - probably more expensive when push_back(Edge(...)) than sett
       : tail_nodes_(tail),id_(id) { copy_pod(copy_from);copy_features(copy_from); }
     inline int Arity() const { return tail_nodes_.size(); }
     int head_node_;               // refers to a position in nodes_
@@ -83,8 +56,6 @@ public:
     prob_t edge_prob_;             // dot product of weights and feat_values
     int id_;   // equal to this object's position in the edges_ vector
 
-    //FIXME: these span ids belong in Node, not Edge, right?  every node should have the same spans.
-
     // span info. typically, i_ and j_ refer to indices in the source sentence.
     // In synchronous parsing, i_ and j_ will refer to target sentence/lattice indices
     // while prev_i_ prev_j_ will refer to positions in the source.
@@ -97,54 +68,6 @@ public:
     short int j_;
     short int prev_i_;
     short int prev_j_;
-
-    void copy_info(Edge const& o) {
-#if USE_INFO_EDGE
-      set_info(o.info_.str()); // by convention, each person putting info here starts with a separator (e.g. space).  it's empty if nobody put any info there.
-#else
-      (void) o;
-#endif
-    }
-    void copy_pod(Edge const& o) {
-      rule_=o.rule_;
-      i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_;
-    }
-    void copy_features(Edge const& o) {
-      feature_values_=o.feature_values_;
-      copy_info(o);
-    }
-    void copy_fixed(Edge const& o) {
-      copy_pod(o);
-      copy_features(o);
-      edge_prob_ = o.edge_prob_;
-    }
-    void copy_reindex(Edge const& o,indices_after const& n2,indices_after const& e2) {
-      copy_fixed(o);
-      head_node_=n2[o.head_node_];
-      id_=e2[o.id_];
-      n2.reindex_push_back(o.tail_nodes_,tail_nodes_);
-    }
-
-#if USE_INFO_EDGE
-    std::ostringstream info_;
-    void set_info(std::string const& s) {
-      info_.str(s);
-      info_.seekp(0,std::ios_base::end);
-    }
-    Edge(Edge const& o) : head_node_(o.head_node_),tail_nodes_(o.tail_nodes_),rule_(o.rule_),feature_values_(o.feature_values_),edge_prob_(o.edge_prob_),id_(o.id_),i_(o.i_),j_(o.j_),prev_i_(o.prev_i_),prev_j_(o.prev_j_), info_(o.info_.str(),std::ios_base::ate) {
-//      info_.seekp(0,std::ios_base::end);
- }
-    void operator=(Edge const& o) {
-      head_node_ = o.head_node_; tail_nodes_ = o.tail_nodes_; rule_ = o.rule_; feature_values_ = o.feature_values_; edge_prob_ = o.edge_prob_; id_ = o.id_; i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_;
-      set_info(o.info_.str());
-    }
-    std::string info() const { return info_.str(); }
-    void reset_info() { info_.str(""); info_.clear(); }
-#else
-    std::string info() const { return std::string(); }
-    void reset_info() {  }
-    void set_info(std::string const& ) {  }
-#endif
     void show(std::ostream &o,unsigned mask=SPAN|RULE) const {
       o<<'{';
       if (mask&CATEGORY)
@@ -159,10 +82,6 @@ public:
         o<<' '<<feature_values_;
       if (mask&RULE)
         o<<' '<<rule_->AsString(mask&RULE_LHS);
-      if (USE_INFO_EDGE) {
-        std::string const& i=info();
-        if (mask&&!i.empty()) o << " |||"<<i; // remember, the initial space is expected as part of i
-      }
       o<<'}';
     }
     std::string show(unsigned mask=SPAN|RULE) const {
@@ -170,12 +89,28 @@ public:
       show(o,mask);
       return o.str();
     }
-    /* generic recursion re: child_handle=re(tail_nodes_[i],i,parent_handle)
-
-       FIXME: make kbest create a simple derivation-tree structure (could be a
-       hg), and replace the list-of-edges viterbi.h with a tree-structured one.
-       CreateViterbiHypergraph can do for 1best, though.
-    */
+    void copy_pod(Edge const& o) {
+      rule_=o.rule_;
+      i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_;
+    }
+    void copy_features(Edge const& o) {
+      feature_values_=o.feature_values_;
+    }
+    void copy_fixed(Edge const& o) {
+      copy_pod(o);
+      copy_features(o);
+      edge_prob_ = o.edge_prob_;
+    }
+    void copy_reindex(Edge const& o,indices_after const& n2,indices_after const& e2) {
+      copy_fixed(o);
+      head_node_=n2[o.head_node_];
+      id_=e2[o.id_];
+      n2.reindex_push_back(o.tail_nodes_,tail_nodes_);
+    }
+    // generic recursion re: child_handle=re(tail_nodes_[i],i,parent_handle)
+    //   FIXME: make kbest create a simple derivation-tree structure (could be a
+    //   hg), and replace the list-of-edges viterbi.h with a tree-structured one.
+    //   CreateViterbiHypergraph can do for 1best, though.
     template <class EdgeRecurse,class TEdgeHandle>
     std::string derivation_tree(EdgeRecurse const& re,TEdgeHandle const& eh,bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const {
       std::ostringstream o;
@@ -203,7 +138,43 @@ public:
     }
   };
 
-  // all this info ought to live in Node, but for some reason it's on Edges.
+  // TODO get rid of cat_?
+  // TODO keep cat_ and add span and/or state? :)
+  struct Node {
+    Node() : id_(), cat_() {}
+    int id_; // equal to this object's position in the nodes_ vector
+    WordID cat_;  // non-terminal category if <0, 0 if not set
+    WordID NT() const { return -cat_; }
+    EdgesVector in_edges_;   // an in edge is an edge with this node as its head.  (in edges come from the bottom up to us)  indices in edges_
+    EdgesVector out_edges_;  // an out edge is an edge with this node as its tail.  (out edges leave us up toward the top/goal). indices in edges_
+    void copy_fixed(Node const& o) { // nonstructural fields only - structural ones are managed by sorting/pruning/subsetting
+      cat_=o.cat_;
+    }
+    void copy_reindex(Node const& o,indices_after const& n2,indices_after const& e2) {
+      copy_fixed(o);
+      id_=n2[id_];
+      e2.reindex_push_back(o.in_edges_,in_edges_);
+      e2.reindex_push_back(o.out_edges_,out_edges_);
+    }
+  };
+
+} // namespace HG
+
+class Hypergraph;
+typedef boost::shared_ptr<Hypergraph> HypergraphP;
+// class representing an acyclic hypergraph
+//  - edges have 1 head, 0..n tails
+class Hypergraph {
+public:
+  Hypergraph() : is_linear_chain_(false) {}
+  typedef HG::Node Node;
+  typedef HG::Edge Edge;
+  typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_
+  typedef std::vector<int> EdgesVector; // indices in edges_
+  enum {
+    NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF
+  };
+
   // except for stateful models that have split nt,span, this should identify the node
   void SetNodeOrigin(int nodeid,NTSpan &r) const {
     Node const &n=nodes_[nodeid];
@@ -230,18 +201,9 @@ public:
     }
     return s;
   }
-  // 0 if none, -TD index otherwise (just like in rule)
   WordID NodeLHS(int nodeid) const {
     Node const &n=nodes_[nodeid];
     return n.NT();
-    /*
-    if (!n.in_edges_.empty()) {
-      Edge const& e=edges_[n.in_edges_.front()];
-      if (e.rule_)
-        return -e.rule_->lhs_;
-    }
-    return 0;
-    */
   }
 
   typedef std::vector<prob_t> EdgeProbs;
@@ -250,14 +212,8 @@ public:
   typedef std::vector<bool> NodeMask;
 
   std::string show_viterbi_tree(bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const;
-// builds viterbi hg and returns it formatted as a pretty string
-
-  enum {
-    NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF
-  };
 
   std::string show_first_tree(bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const;
-  // same as above, but takes in_edges_[0] all the way down - to make it viterbi cost (1-best), call ViterbiSortInEdges() first
 
   typedef Edge const* EdgeHandle;
   EdgeHandle operator()(int tailn,int /*taili*/,EdgeHandle /*parent*/) const {
@@ -334,7 +290,7 @@ public:
   Edge* AddEdge(Edge const& in_edge, const TailNodeVector& tail) {
     edges_.push_back(Edge(edges_.size(),in_edge));
     Edge* edge = &edges_.back();
-    edge->copy_features(in_edge);
+    edge->feature_values_ = in_edge.feature_values_;
     edge->tail_nodes_ = tail; // possibly faster than copying to Edge() constructed above then copying via push_back.  perhaps optimized it's the same.
     index_tails(*edge);
     return edge;
diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc
index 3a68a429..8f604c89 100644
--- a/decoder/hg_io.cc
+++ b/decoder/hg_io.cc
@@ -392,8 +392,8 @@ string HypergraphIO::AsPLF(const Hypergraph& hg, bool include_global_parentheses
         const Hypergraph::Edge& e = hg.edges_[hg.nodes_[i].out_edges_[j]];
         const string output = e.rule_->e_.size() ==2 ? Escape(TD::Convert(e.rule_->e_[1])) : EPS;
         double prob = log(e.edge_prob_);
-        if (isinf(prob)) { prob = -9e20; }
-        if (isnan(prob)) { prob = 0; }
+        if (std::isinf(prob)) { prob = -9e20; }
+        if (std::isnan(prob)) { prob = 0; }
         os << "('" << output << "'," << prob << "," << e.head_node_ - i << "),";
       }
       os << "),";
diff --git a/utils/weights.cc b/utils/weights.cc
index f56e2a20..575877b6 100644
--- a/utils/weights.cc
+++ b/utils/weights.cc
@@ -34,7 +34,7 @@ void Weights::InitFromFile(const string& filename,
     int weight_count = 0;
     bool fl = false;
     string buf;
-    weight_t val = 0;
+    double val = 0;
     while (in) {
       getline(in, buf);
       if (buf.size() == 0) continue;
@@ -53,7 +53,7 @@ void Weights::InitFromFile(const string& filename,
       if (feature_list) { feature_list->push_back(buf.substr(start, end - start)); }
       while(end < buf.size() && buf[end] == ' ') ++end;
       val = strtod(&buf.c_str()[end], NULL);
-      if (isnan(val)) {
+      if (std::isnan(val)) {
         cerr << FD::Convert(fid) << " has weight NaN!\n";
         abort();
       }
@@ -127,8 +127,8 @@ void Weights::InitSparseVector(const vector<weight_t>& dv,
 
 void Weights::SanityCheck(const vector<weight_t>& w) {
   for (unsigned i = 0; i < w.size(); ++i) {
-    assert(!isnan(w[i]));
-    assert(!isinf(w[i]));
+    assert(!std::isnan(w[i]));
+    assert(!std::isinf(w[i]));
   }
 }