From 606e3e38b8a830dbbe65963ebf6c5ce7866b7800 Mon Sep 17 00:00:00 2001
From: Chris Dyer <redpony@gmail.com>
Date: Tue, 18 Mar 2014 01:41:17 -0400
Subject: star function

---
 utils/Makefile.am    |  2 ++
 utils/exp_semiring.h | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 utils/logval.h       | 12 ++++++++++
 utils/star.h         | 12 ++++++++++
 4 files changed, 90 insertions(+)
 create mode 100644 utils/exp_semiring.h
 create mode 100644 utils/star.h

(limited to 'utils')
diff --git a/utils/Makefile.am b/utils/Makefile.am
index a22b6727..c0ce3509 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -27,6 +27,7 @@ libutils_a_SOURCES = \
   citycrc.h \
   corpus_tools.h \
   dict.h \
+  exp_semiring.h \
   fast_sparse_vector.h \
   fdict.h \
   feature_vector.h \
@@ -49,6 +50,7 @@ libutils_a_SOURCES = \
   show.h \
   small_vector.h \
   sparse_vector.h \
+  star.h \
   static_utoa.h \
   stringlib.h \
   tdict.h \
diff --git a/utils/exp_semiring.h b/utils/exp_semiring.h
new file mode 100644
index 00000000..7572ccf5
--- /dev/null
+++ b/utils/exp_semiring.h
@@ -0,0 +1,64 @@
+#ifndef _EXP_SEMIRING_H_
+#define _EXP_SEMIRING_H_
+
+#include <iostream>
+#include "star.h"
+
+// this file implements the first-order expectation semiring described
+// in Li & Eisner (EMNLP 2009)
+
+// requirements:
+//   RType * RType ==> RType
+//   PType * PType ==> PType
+//   RType * PType ==> RType
+// good examples:
+//   PType scalar, RType vector
+// BAD examples:
+//   PType vector, RType scalar
+template <class PType, class RType>
+struct PRPair {
+  PRPair() : p(), r() {}
+  // Inside algorithm requires that T(0) and T(1)
+  // return the 0 and 1 values of the semiring
+  explicit PRPair(double x) : p(x), r() {}
+  PRPair(const PType& p, const RType& r) : p(p), r(r) {}
+  PRPair& operator+=(const PRPair& o) {
+    p += o.p;
+    r += o.r;
+    return *this;
+  }
+  PRPair& operator*=(const PRPair& o) {
+    r = (o.r * p) + (o.p * r);
+    p *= o.p;
+    return *this;
+  }
+  PType p;
+  RType r;
+};
+
+template <class P, class R>
+std::ostream& operator<<(std::ostream& o, const PRPair<P,R>& x) {
+  return o << '<' << x.p << ", " << x.r << '>';
+}
+
+template <class P, class R>
+const PRPair<P,R> operator+(const PRPair<P,R>& a, const PRPair<P,R>& b) {
+  PRPair<P,R> result = a;
+  result += b;
+  return result;
+}
+
+template <class P, class R>
+const PRPair<P,R> operator*(const PRPair<P,R>& a, const PRPair<P,R>& b) {
+  PRPair<P,R> result = a;
+  result *= b;
+  return result;
+}
+
+template <class P, class R>
+const PRPair<P,R> star(const PRPair<P,R>& x) {
+  const P pstar = star(x.p);
+  return PRPair<P,R>(pstar, pstar * x.r * pstar);
+}
+
+#endif
diff --git a/utils/logval.h b/utils/logval.h
index ec1f6acd..7f1e1024 100644
--- a/utils/logval.h
+++ b/utils/logval.h
@@ -11,6 +11,7 @@
 #include <cassert>
 #include "semiring.h"
 #include "show.h"
+#include "star.h"
 
 //TODO: template for supporting negation or not - most uses are for nonnegative "probs" only; probably some 10-20% speedup available
 template <class T>
@@ -242,4 +243,15 @@ bool operator>=(const LogVal<T>& lhs, const LogVal<T>& rhs) {
 template <class T>
 std::size_t hash_value(const LogVal<T>& x) { return x.hash_impl(); }
 
+template <class T>
+LogVal<T> star(LogVal<T> x) {
+  if (x.is_0()) return x;
+  if (x.v_ >= 0) {
+    x.v_ = std::numeric_limits<T>::infinity();
+  } else {
+    x.v_ = -log1p(-x.as_float());
+  }
+  return x;
+}
+
 #endif
diff --git a/utils/star.h b/utils/star.h
new file mode 100644
index 00000000..3295112c
--- /dev/null
+++ b/utils/star.h
@@ -0,0 +1,12 @@
+#ifndef _STAR_H_
+#define _STAR_H_
+
+template <typename T>
+T star(const T& x) {
+  if (!x) return T();
+  if (x > T(1)) return std::numeric_limits<T>::infinity();
+  if (x < -T(1)) return -std::numeric_limits<T>::infinity();
+  return T(1) / (T(1) - x);
+}
+
+#endif
-- 
cgit v1.2.3


From 55beb71dbbfe8421a8e8be9d2d7868a5d87d77d5 Mon Sep 17 00:00:00 2001
From: Chris Dyer <redpony@gmail.com>
Date: Tue, 18 Mar 2014 01:55:19 -0400
Subject: star bool

---
 utils/star.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'utils')

diff --git a/utils/star.h b/utils/star.h
index 3295112c..e7358ffa 100644
--- a/utils/star.h
+++ b/utils/star.h
@@ -1,6 +1,8 @@
 #ifndef _STAR_H_
 #define _STAR_H_
 
+// star(x) computes the infinite sum x^0 + x^1 + x^2 + ...
+
 template <typename T>
 T star(const T& x) {
   if (!x) return T();
@@ -9,4 +11,8 @@ T star(const T& x) {
   return T(1) / (T(1) - x);
 }
 
+bool star(bool x) {
+  return x;
+}
+
 #endif
-- 
cgit v1.2.3


From 2a9ee1febae6a63173f74ae24e2bfe439e409525 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@allegro.clab.cs.cmu.edu>
Date: Tue, 18 Mar 2014 02:05:25 -0400
Subject: chris edits

---
 corpus/support/tokenizer.pl | 4 ++++
 utils/exp_semiring.h        | 2 +-
 utils/logval.h              | 2 +-
 utils/star.h                | 4 ++--
 4 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'utils')

diff --git a/corpus/support/tokenizer.pl b/corpus/support/tokenizer.pl
index 7771201f..f57bc87a 100755
--- a/corpus/support/tokenizer.pl
+++ b/corpus/support/tokenizer.pl
@@ -240,6 +240,10 @@ sub proc_token {
 	return $token;
     }
 
+    if($token =~ /^\d+(.\d+)+(亿|百万|万|千)?$/){
+	return $token;
+    }
+
     ## 1,234,345.34
     if($token =~ /^\d+(\.\d{3})*,\d+$/){
 	## number
diff --git a/utils/exp_semiring.h b/utils/exp_semiring.h
index 7572ccf5..26a22071 100644
--- a/utils/exp_semiring.h
+++ b/utils/exp_semiring.h
@@ -56,7 +56,7 @@ const PRPair<P,R> operator*(const PRPair<P,R>& a, const PRPair<P,R>& b) {
 }
 
 template <class P, class R>
-const PRPair<P,R> star(const PRPair<P,R>& x) {
+inline const PRPair<P,R> star(const PRPair<P,R>& x) {
   const P pstar = star(x.p);
   return PRPair<P,R>(pstar, pstar * x.r * pstar);
 }
diff --git a/utils/logval.h b/utils/logval.h
index 7f1e1024..0c9ee982 100644
--- a/utils/logval.h
+++ b/utils/logval.h
@@ -244,7 +244,7 @@ template <class T>
 std::size_t hash_value(const LogVal<T>& x) { return x.hash_impl(); }
 
 template <class T>
-LogVal<T> star(LogVal<T> x) {
+inline LogVal<T> star(LogVal<T> x) {
   if (x.is_0()) return x;
   if (x.v_ >= 0) {
     x.v_ = std::numeric_limits<T>::infinity();
diff --git a/utils/star.h b/utils/star.h
index e7358ffa..21977dc9 100644
--- a/utils/star.h
+++ b/utils/star.h
@@ -4,14 +4,14 @@
 // star(x) computes the infinite sum x^0 + x^1 + x^2 + ...
 
 template <typename T>
-T star(const T& x) {
+inline T star(const T& x) {
   if (!x) return T();
   if (x > T(1)) return std::numeric_limits<T>::infinity();
   if (x < -T(1)) return -std::numeric_limits<T>::infinity();
   return T(1) / (T(1) - x);
 }
 
-bool star(bool x) {
+inline bool star(bool x) {
   return x;
 }
 
-- 
cgit v1.2.3


From b9e6e7e24cc48021090b689e143288e2b7f2b5fc Mon Sep 17 00:00:00 2001
From: Chris Dyer <redpony@gmail.com>
Date: Mon, 7 Apr 2014 22:56:34 -0400
Subject: track node state for smarter union

---
 Makefile.am                       |   2 +-
 decoder/Makefile.am               |   1 +
 decoder/apply_models.cc           | 306 +++++++++++++++++-----------------
 decoder/bottom_up_parser.cc       |  10 ++
 decoder/decoder.cc                |  13 ++
 decoder/fst_translator.cc         |   6 +
 decoder/hg.cc                     |  47 ++----
 decoder/hg.h                      |  10 +-
 decoder/lexalign.cc               |   5 +
 decoder/lextrans.cc               |   5 +
 decoder/node_state_hash.h         |  36 ++++
 decoder/nt_span.h                 |   2 +-
 decoder/tagger.cc                 |   5 +
 decoder/tree2string_translator.cc |  14 +-
 mteval/Makefile.am                |   8 +-
 tests/tools/filter-stderr.pl      |   1 +
 utils/Makefile.am                 |   3 +-
 utils/hash.h                      |  21 +--
 utils/murmur_hash.h               | 186 ---------------------
 utils/murmur_hash3.cc             | 340 ++++++++++++++++++++++++++++++++++++++
 utils/murmur_hash3.h              |  67 ++++++++
 21 files changed, 699 insertions(+), 389 deletions(-)
 create mode 100644 decoder/node_state_hash.h
 delete mode 100644 utils/murmur_hash.h
 create mode 100644 utils/murmur_hash3.cc
 create mode 100644 utils/murmur_hash3.h

(limited to 'utils')

diff --git a/Makefile.am b/Makefile.am
index 598293d1..88327477 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -3,13 +3,13 @@
 # cyclic dependencies between these directories!
 SUBDIRS = \
   utils \
-  mteval \
   klm/util/double-conversion \
   klm/util \
   klm/util/stream \
   klm/lm \
   klm/lm/builder \
   klm/search \
+  mteval \
   decoder \
   training \
   word-aligner \
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index 5c91fe65..c85f17ed 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -144,6 +144,7 @@ libcdec_a_SOURCES = \
   lattice.cc \
   lexalign.cc \
   lextrans.cc \
+  node_state_hash.h \
   tree_fragment.cc \
   tree_fragment.h \
   maxtrans_blunsom.cc \
diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc
index 9a8f60be..9f8bbead 100644
--- a/decoder/apply_models.cc
+++ b/decoder/apply_models.cc
@@ -19,6 +19,7 @@ namespace std { using std::tr1::unordered_map; using std::tr1::unordered_set; }
 
 #include <boost/functional/hash.hpp>
 
+#include "node_state_hash.h"
 #include "verbose.h"
 #include "hg.h"
 #include "ff.h"
@@ -229,7 +230,7 @@ public:
     D.clear();
   }
 
-  void IncorporateIntoPlusLMForest(Candidate* item, State2Node* s2n, CandidateList* freelist) {
+  void IncorporateIntoPlusLMForest(size_t head_node_hash, Candidate* item, State2Node* s2n, CandidateList* freelist) {
     Hypergraph::Edge* new_edge = out.AddEdge(item->out_edge_);
     new_edge->edge_prob_ = item->out_edge_.edge_prob_;
     Candidate*& o_item = (*s2n)[item->state_];
@@ -238,6 +239,7 @@ public:
     int& node_id = o_item->node_index_;
     if (node_id < 0) {
       Hypergraph::Node* new_node = out.AddNode(in.nodes_[item->in_edge_->head_node_].cat_);
+      new_node->node_hash = cdec::HashNode(head_node_hash, item->state_); // ID is combination of existing state + residual state
       node_states_.push_back(item->state_);
       node_id = new_node->id_;
     }
@@ -287,7 +289,7 @@ public:
       cand.pop_back();
       // cerr << "POPPED: " << *item << endl;
       PushSucc(*item, is_goal, &cand, &unique_cands);
-      IncorporateIntoPlusLMForest(item, &state2node, &freelist);
+      IncorporateIntoPlusLMForest(v.node_hash, item, &state2node, &freelist);
       ++pops;
     }
     D_v.resize(state2node.size());
@@ -306,112 +308,112 @@ public:
   }
 
   void KBestFast(const int vert_index, const bool is_goal) {
-	  // cerr << "KBest(" << vert_index << ")\n";
-	  CandidateList& D_v = D[vert_index];
-	  assert(D_v.empty());
-	  const Hypergraph::Node& v = in.nodes_[vert_index];
-	  // cerr << " has " << v.in_edges_.size() << " in-coming edges\n";
-	  const vector<int>& in_edges = v.in_edges_;
-	  CandidateHeap cand;
-	  CandidateList freelist;
-	  cand.reserve(in_edges.size());
-	  //init with j<0,0> for all rules-edges that lead to node-(NT-span)
-	  for (int i = 0; i < in_edges.size(); ++i) {
-		  const Hypergraph::Edge& edge = in.edges_[in_edges[i]];
-		  const JVector j(edge.tail_nodes_.size(), 0);
-		  cand.push_back(new Candidate(edge, j, out, D, node_states_, smeta, models, is_goal));
-	  }
-	  // cerr << " making heap of " << cand.size() << " candidates\n";
-	  make_heap(cand.begin(), cand.end(), HeapCandCompare());
-	  State2Node state2node; // "buf" in Figure 2
-	  int pops = 0;
-	  while(!cand.empty() && pops < pop_limit_) {
-		  pop_heap(cand.begin(), cand.end(), HeapCandCompare());
-		  Candidate* item = cand.back();
-		  cand.pop_back();
-		  // cerr << "POPPED: " << *item << endl;
-
-		  PushSuccFast(*item, is_goal, &cand);
-		  IncorporateIntoPlusLMForest(item, &state2node, &freelist);
-		  ++pops;
-	  }
-	  D_v.resize(state2node.size());
-	  int c = 0;
-	  for (State2Node::iterator i = state2node.begin(); i != state2node.end(); ++i){
-		  D_v[c++] = i->second;
-		  // cerr << "MERGED: " << *i->second << endl;
-	  }
-	  //cerr <<"Node id: "<< vert_index<< endl;
-	  //#ifdef MEASURE_CA
-	  // cerr << "countInProcess (pop/tot): node id: " << vert_index << " (" << count_in_process_pop << "/" << count_in_process_tot << ")"<<endl;
-	  // cerr << "countAtEnd (pop/tot): node id: " << vert_index << " (" << count_at_end_pop << "/" << count_at_end_tot << ")"<<endl;
-	  //#endif
-	  sort(D_v.begin(), D_v.end(), EstProbSorter());
-
-	  // cerr << " expanded to " << D_v.size() << " nodes\n";
-
-	  for (int i = 0; i < cand.size(); ++i)
-		  delete cand[i];
-	  // freelist is necessary since even after an item merged, it still stays in
-	  // the unique set so it can't be deleted til now
-	  for (int i = 0; i < freelist.size(); ++i)
-		  delete freelist[i];
+    // cerr << "KBest(" << vert_index << ")\n";
+    CandidateList& D_v = D[vert_index];
+    assert(D_v.empty());
+    const Hypergraph::Node& v = in.nodes_[vert_index];
+    // cerr << " has " << v.in_edges_.size() << " in-coming edges\n";
+    const vector<int>& in_edges = v.in_edges_;
+    CandidateHeap cand;
+    CandidateList freelist;
+    cand.reserve(in_edges.size());
+    //init with j<0,0> for all rules-edges that lead to node-(NT-span)
+    for (int i = 0; i < in_edges.size(); ++i) {
+      const Hypergraph::Edge& edge = in.edges_[in_edges[i]];
+      const JVector j(edge.tail_nodes_.size(), 0);
+      cand.push_back(new Candidate(edge, j, out, D, node_states_, smeta, models, is_goal));
+    }
+    // cerr << " making heap of " << cand.size() << " candidates\n";
+    make_heap(cand.begin(), cand.end(), HeapCandCompare());
+    State2Node state2node; // "buf" in Figure 2
+    int pops = 0;
+    while(!cand.empty() && pops < pop_limit_) {
+      pop_heap(cand.begin(), cand.end(), HeapCandCompare());
+      Candidate* item = cand.back();
+      cand.pop_back();
+      // cerr << "POPPED: " << *item << endl;
+
+      PushSuccFast(*item, is_goal, &cand);
+      IncorporateIntoPlusLMForest(v.node_hash, item, &state2node, &freelist);
+      ++pops;
+    }
+    D_v.resize(state2node.size());
+    int c = 0;
+    for (auto& i : state2node) {
+      D_v[c++] = i.second;
+      // cerr << "MERGED: " << *i.second << endl;
+    }
+    //cerr <<"Node id: "<< vert_index<< endl;
+    //#ifdef MEASURE_CA
+    // cerr << "countInProcess (pop/tot): node id: " << vert_index << " (" << count_in_process_pop << "/" << count_in_process_tot << ")"<<endl;
+    // cerr << "countAtEnd (pop/tot): node id: " << vert_index << " (" << count_at_end_pop << "/" << count_at_end_tot << ")"<<endl;
+    //#endif
+    sort(D_v.begin(), D_v.end(), EstProbSorter());
+
+    // cerr << " expanded to " << D_v.size() << " nodes\n";
+
+    for (int i = 0; i < cand.size(); ++i)
+      delete cand[i];
+    // freelist is necessary since even after an item merged, it still stays in
+    // the unique set so it can't be deleted til now
+    for (int i = 0; i < freelist.size(); ++i)
+      delete freelist[i];
   }
 
   void KBestFast2(const int vert_index, const bool is_goal) {
-	  // cerr << "KBest(" << vert_index << ")\n";
-	  CandidateList& D_v = D[vert_index];
-	  assert(D_v.empty());
-	  const Hypergraph::Node& v = in.nodes_[vert_index];
-	  // cerr << " has " << v.in_edges_.size() << " in-coming edges\n";
-	  const vector<int>& in_edges = v.in_edges_;
-	  CandidateHeap cand;
-	  CandidateList freelist;
-	  cand.reserve(in_edges.size());
-	  UniqueCandidateSet unique_accepted;
-	  //init with j<0,0> for all rules-edges that lead to node-(NT-span)
-	  for (int i = 0; i < in_edges.size(); ++i) {
-		  const Hypergraph::Edge& edge = in.edges_[in_edges[i]];
-		  const JVector j(edge.tail_nodes_.size(), 0);
-		  cand.push_back(new Candidate(edge, j, out, D, node_states_, smeta, models, is_goal));
-	  }
-	  // cerr << " making heap of " << cand.size() << " candidates\n";
-	  make_heap(cand.begin(), cand.end(), HeapCandCompare());
-	  State2Node state2node; // "buf" in Figure 2
-	  int pops = 0;
-	  while(!cand.empty() && pops < pop_limit_) {
-		  pop_heap(cand.begin(), cand.end(), HeapCandCompare());
-		  Candidate* item = cand.back();
-		  cand.pop_back();
+    // cerr << "KBest(" << vert_index << ")\n";
+    CandidateList& D_v = D[vert_index];
+    assert(D_v.empty());
+    const Hypergraph::Node& v = in.nodes_[vert_index];
+    // cerr << " has " << v.in_edges_.size() << " in-coming edges\n";
+    const vector<int>& in_edges = v.in_edges_;
+    CandidateHeap cand;
+    CandidateList freelist;
+    cand.reserve(in_edges.size());
+    UniqueCandidateSet unique_accepted;
+    //init with j<0,0> for all rules-edges that lead to node-(NT-span)
+    for (int i = 0; i < in_edges.size(); ++i) {
+      const Hypergraph::Edge& edge = in.edges_[in_edges[i]];
+      const JVector j(edge.tail_nodes_.size(), 0);
+      cand.push_back(new Candidate(edge, j, out, D, node_states_, smeta, models, is_goal));
+    }
+    // cerr << " making heap of " << cand.size() << " candidates\n";
+    make_heap(cand.begin(), cand.end(), HeapCandCompare());
+    State2Node state2node; // "buf" in Figure 2
+    int pops = 0;
+    while(!cand.empty() && pops < pop_limit_) {
+      pop_heap(cand.begin(), cand.end(), HeapCandCompare());
+      Candidate* item = cand.back();
+      cand.pop_back();
                   bool is_new = unique_accepted.insert(item).second;
-		  assert(is_new); // these should all be unique!
-		  // cerr << "POPPED: " << *item << endl;
-
-		  PushSuccFast2(*item, is_goal, &cand, &unique_accepted);
-		  IncorporateIntoPlusLMForest(item, &state2node, &freelist);
-		  ++pops;
-	  }
-	  D_v.resize(state2node.size());
-	  int c = 0;
-	  for (State2Node::iterator i = state2node.begin(); i != state2node.end(); ++i){
-		  D_v[c++] = i->second;
-		  // cerr << "MERGED: " << *i->second << endl;
-	  }
-	  //cerr <<"Node id: "<< vert_index<< endl;
-	  //#ifdef MEASURE_CA
-	  // cerr << "countInProcess (pop/tot): node id: " << vert_index << " (" << count_in_process_pop << "/" << count_in_process_tot << ")"<<endl;
-	  // cerr << "countAtEnd (pop/tot): node id: " << vert_index << " (" << count_at_end_pop << "/" << count_at_end_tot << ")"<<endl;
-	  //#endif
-	  sort(D_v.begin(), D_v.end(), EstProbSorter());
-
-	  // cerr << " expanded to " << D_v.size() << " nodes\n";
-
-	  for (int i = 0; i < cand.size(); ++i)
-		  delete cand[i];
-	  // freelist is necessary since even after an item merged, it still stays in
-	  // the unique set so it can't be deleted til now
-	  for (int i = 0; i < freelist.size(); ++i)
-		  delete freelist[i];
+      assert(is_new); // these should all be unique!
+      // cerr << "POPPED: " << *item << endl;
+
+      PushSuccFast2(*item, is_goal, &cand, &unique_accepted);
+      IncorporateIntoPlusLMForest(v.node_hash, item, &state2node, &freelist);
+      ++pops;
+    }
+    D_v.resize(state2node.size());
+    int c = 0;
+    for (State2Node::iterator i = state2node.begin(); i != state2node.end(); ++i){
+      D_v[c++] = i->second;
+      // cerr << "MERGED: " << *i->second << endl;
+    }
+    //cerr <<"Node id: "<< vert_index<< endl;
+    //#ifdef MEASURE_CA
+    // cerr << "countInProcess (pop/tot): node id: " << vert_index << " (" << count_in_process_pop << "/" << count_in_process_tot << ")"<<endl;
+    // cerr << "countAtEnd (pop/tot): node id: " << vert_index << " (" << count_at_end_pop << "/" << count_at_end_tot << ")"<<endl;
+    //#endif
+    sort(D_v.begin(), D_v.end(), EstProbSorter());
+
+    // cerr << " expanded to " << D_v.size() << " nodes\n";
+
+    for (int i = 0; i < cand.size(); ++i)
+      delete cand[i];
+    // freelist is necessary since even after an item merged, it still stays in
+    // the unique set so it can't be deleted til now
+    for (int i = 0; i < freelist.size(); ++i)
+      delete freelist[i];
   }
 
   void PushSucc(const Candidate& item, const bool is_goal, CandidateHeap* pcand, UniqueCandidateSet* cs) {
@@ -434,50 +436,50 @@ public:
 
   //PushSucc following unique ancestor generation function
   void PushSuccFast(const Candidate& item, const bool is_goal, CandidateHeap* pcand){
-	  CandidateHeap& cand = *pcand;
-	  for (int i = 0; i < item.j_.size(); ++i) {
-		  JVector j = item.j_;
-		  ++j[i];
-		  if (j[i] < D[item.in_edge_->tail_nodes_[i]].size()) {
-			  Candidate* new_cand = new Candidate(*item.in_edge_, j, out, D, node_states_, smeta, models, is_goal);
-			  cand.push_back(new_cand);
-			  push_heap(cand.begin(), cand.end(), HeapCandCompare());
-		  }
-		  if(item.j_[i]!=0){
-			  return;
-		  }
-	  }
+    CandidateHeap& cand = *pcand;
+    for (int i = 0; i < item.j_.size(); ++i) {
+      JVector j = item.j_;
+      ++j[i];
+      if (j[i] < D[item.in_edge_->tail_nodes_[i]].size()) {
+        Candidate* new_cand = new Candidate(*item.in_edge_, j, out, D, node_states_, smeta, models, is_goal);
+        cand.push_back(new_cand);
+        push_heap(cand.begin(), cand.end(), HeapCandCompare());
+      }
+      if(item.j_[i]!=0){
+        return;
+      }
+    }
   }
 
   //PushSucc only if all ancest Cand are added
   void PushSuccFast2(const Candidate& item, const bool is_goal, CandidateHeap* pcand, UniqueCandidateSet* ps){
-	  CandidateHeap& cand = *pcand;
-	  for (int i = 0; i < item.j_.size(); ++i) {
-		  JVector j = item.j_;
-		  ++j[i];
-		  if (j[i] < D[item.in_edge_->tail_nodes_[i]].size()) {
-			  Candidate query_unique(*item.in_edge_, j);
-			  if (HasAllAncestors(&query_unique,ps)) {
-				  Candidate* new_cand = new Candidate(*item.in_edge_, j, out, D, node_states_, smeta, models, is_goal);
-				  cand.push_back(new_cand);
-				  push_heap(cand.begin(), cand.end(), HeapCandCompare());
-			  }
-		  }
-	  }
+    CandidateHeap& cand = *pcand;
+    for (int i = 0; i < item.j_.size(); ++i) {
+      JVector j = item.j_;
+      ++j[i];
+      if (j[i] < D[item.in_edge_->tail_nodes_[i]].size()) {
+        Candidate query_unique(*item.in_edge_, j);
+        if (HasAllAncestors(&query_unique,ps)) {
+          Candidate* new_cand = new Candidate(*item.in_edge_, j, out, D, node_states_, smeta, models, is_goal);
+          cand.push_back(new_cand);
+          push_heap(cand.begin(), cand.end(), HeapCandCompare());
+        }
+      }
+    }
   }
 
   bool HasAllAncestors(const Candidate* item, UniqueCandidateSet* cs){
-	  for (int i = 0; i < item->j_.size(); ++i) {
-		  JVector j = item->j_;
-		  --j[i];
-		  if (j[i] >=0) {
-			  Candidate query_unique(*item->in_edge_, j);
-			  if (cs->count(&query_unique) == 0) {
-				  return false;
-			  }
-		  }
-	  }
-	  return true;
+    for (int i = 0; i < item->j_.size(); ++i) {
+      JVector j = item->j_;
+      --j[i];
+      if (j[i] >=0) {
+        Candidate query_unique(*item->in_edge_, j);
+        if (cs->count(&query_unique) == 0) {
+          return false;
+        }
+      }
+    }
+    return true;
   }
 
   const ModelSet& models;
@@ -491,7 +493,7 @@ public:
   FFStates node_states_;  // for each node in the out-HG what is
                              // its q function value?
   const int pop_limit_;
- const int strategy_;       //switch Cube Pruning strategy: 1 normal, 2 fast (alg 2), 3 fast_2 (alg 3). (see: Gesmundo A., Henderson J,. Faster Cube Pruning, IWSLT 2010)
+  const int strategy_;       //switch Cube Pruning strategy: 1 normal, 2 fast (alg 2), 3 fast_2 (alg 3). (see: Gesmundo A., Henderson J,. Faster Cube Pruning, IWSLT 2010)
 };
 
 struct NoPruningRescorer {
@@ -507,7 +509,7 @@ struct NoPruningRescorer {
 
   typedef unordered_map<FFState, int, boost::hash<FFState> > State2NodeIndex;
 
-  void ExpandEdge(const Hypergraph::Edge& in_edge, bool is_goal, State2NodeIndex* state2node) {
+  void ExpandEdge(const Hypergraph::Edge& in_edge, bool is_goal, size_t head_node_hash, State2NodeIndex* state2node) {
     const int arity = in_edge.Arity();
     Hypergraph::TailNodeVector ends(arity);
     for (int i = 0; i < arity; ++i)
@@ -531,7 +533,9 @@ struct NoPruningRescorer {
       }
       int& head_plus1 = (*state2node)[head_state];
       if (!head_plus1) {
-        head_plus1 = out.AddNode(in_edge.rule_->GetLHS())->id_ + 1;
+        HG::Node* new_node = out.AddNode(in_edge.rule_->GetLHS());
+        new_node->node_hash = cdec::HashNode(head_node_hash, head_state); // ID is combination of existing state + residual state
+        head_plus1 = new_node->id_ + 1;
         node_states_.push_back(head_state);
         nodemap[in_edge.head_node_].push_back(head_plus1 - 1);
       }
@@ -553,7 +557,7 @@ struct NoPruningRescorer {
     const Hypergraph::Node& node = in.nodes_[node_num];
     for (int i = 0; i < node.in_edges_.size(); ++i) {
       const Hypergraph::Edge& edge = in.edges_[node.in_edges_[i]];
-      ExpandEdge(edge, is_goal, &state2node);
+      ExpandEdge(edge, is_goal, node.node_hash, &state2node);
     }
   }
 
@@ -605,16 +609,16 @@ void ApplyModelSet(const Hypergraph& in,
       cerr << "  Note: reducing pop_limit to " << pl << " for very large forest\n";
     }
     if      (config.algorithm == IntersectionConfiguration::CUBE) {
-    	CubePruningRescorer ma(models, smeta, in, pl, out);
-        ma.Apply();
+      CubePruningRescorer ma(models, smeta, in, pl, out);
+      ma.Apply();
     }
     else if (config.algorithm == IntersectionConfiguration::FAST_CUBE_PRUNING){
-    	CubePruningRescorer ma(models, smeta, in, pl, out, FAST_CP);
-        ma.Apply();
+      CubePruningRescorer ma(models, smeta, in, pl, out, FAST_CP);
+      ma.Apply();
     }
     else if (config.algorithm == IntersectionConfiguration::FAST_CUBE_PRUNING_2){
-    	CubePruningRescorer ma(models, smeta, in, pl, out, FAST_CP_2);
-        ma.Apply();
+      CubePruningRescorer ma(models, smeta, in, pl, out, FAST_CP_2);
+      ma.Apply();
     }
 
   } else {
diff --git a/decoder/bottom_up_parser.cc b/decoder/bottom_up_parser.cc
index ff4c7a90..b30f1ec6 100644
--- a/decoder/bottom_up_parser.cc
+++ b/decoder/bottom_up_parser.cc
@@ -7,6 +7,8 @@
 #include <iostream>
 #include <map>
 
+#include "node_state_hash.h"
+#include "nt_span.h"
 #include "hg.h"
 #include "array2d.h"
 #include "tdict.h"
@@ -356,5 +358,13 @@ bool ExhaustiveBottomUpParser::Parse(const Lattice& input,
   kEPS = TD::Convert("*EPS*");
   PassiveChart chart(goal_sym_, grammars_, input, forest);
   const bool result = chart.Parse();
+
+  if (result) {
+    for (auto& node : forest->nodes_) {
+      Span prev;
+      const Span s = forest->NodeSpan(node.id_, &prev);
+      node.node_hash = cdec::HashNode(node.cat_, s.l, s.r, prev.l, prev.r);
+    }
+  }
   return result;
 }
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index 43e2640d..354ea2d9 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -750,6 +750,11 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
     return false;
   }
 
+  // this is mainly used for debugging, eventually this will be an assertion
+  if (!forest.AreNodesUniquelyIdentified()) {
+    if (!SILENT) cerr << "  *** NODES NOT UNIQUELY IDENTIFIED ***\n";
+  }
+
   const bool show_tree_structure=conf.count("show_tree_structure");
   if (!SILENT) forest_stats(forest,"  Init. forest",show_tree_structure,oracle.show_derivation);
   if (conf.count("show_expected_length")) {
@@ -813,6 +818,10 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
       forest.swap(rescored_forest);
       forest.Reweight(cur_weights);
       if (!SILENT) forest_stats(forest,"  " + passtr +" forest",show_tree_structure,oracle.show_derivation, conf.count("extract_rules"), extract_file);
+      // this is mainly used for debugging, eventually this will be an assertion
+      if (!forest.AreNodesUniquelyIdentified()) {
+        if (!SILENT) cerr << "  *** NODES NOT UNIQUELY IDENTIFIED ***\n";
+      }
     }
 
     if (conf.count("show_partition")) {
@@ -984,6 +993,10 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
             forest.edges_[i].rule_ = forest.edges_[i].rule_->parent_rule_;
       }
       forest.Reweight(last_weights);
+      // this is mainly used for debugging, eventually this will be an assertion
+      if (!forest.AreNodesUniquelyIdentified()) {
+        if (!SILENT) cerr << "  *** NODES NOT UNIQUELY IDENTIFIED ***\n";
+      }
       if (!SILENT) forest_stats(forest,"  Constr. forest",show_tree_structure,oracle.show_derivation);
       if (!SILENT) cerr << "  Constr. VitTree: " << ViterbiFTree(forest) << endl;
       if (conf.count("show_partition")) {
diff --git a/decoder/fst_translator.cc b/decoder/fst_translator.cc
index 074de4c9..4253b652 100644
--- a/decoder/fst_translator.cc
+++ b/decoder/fst_translator.cc
@@ -67,6 +67,12 @@ struct FSTTranslatorImpl {
       Hypergraph::Edge* hg_edge = forest->AddEdge(kGOAL_RULE, tail);
       forest->ConnectEdgeToHeadNode(hg_edge, goal);
       forest->Reweight(weights);
+
+      // since we don't do any pruning, the node_hash will be the same for
+      // every run of the composer
+      int nc = 0;
+      for (auto& node : forest->nodes_)
+        node.node_hash = ++nc;
     }
     if (add_pass_through_rules)
       fst->ClearPassThroughTranslations();
diff --git a/decoder/hg.cc b/decoder/hg.cc
index 7240a8ab..405169c6 100644
--- a/decoder/hg.cc
+++ b/decoder/hg.cc
@@ -1,14 +1,17 @@
-//TODO: lazily generate feature vectors for hyperarcs (because some of them will be pruned).  this means 1) storing ref to rule for those features 2) providing ff interface for regenerating its feature vector from hyperedge+states and probably 3) still caching feat. vect on hyperedge once it's been generated.  ff would normally just contribute its weighted score and result state, not component features.  however, the hypergraph drops the state used by ffs after rescoring is done, so recomputation would have to start at the leaves and work bottom up.  question: which takes more space, feature id+value, or state?
-
 #include "hg.h"
 
 #include <algorithm>
 #include <cassert>
 #include <numeric>
-#include <set>
 #include <map>
 #include <iostream>
 #include <sstream>
+#ifndef HAVE_OLD_CPP
+# include <unordered_set>
+#else
+# include <tr1/unordered_set>
+namespace std { using std::tr1::unordered_set; }
+#endif
 
 #include "viterbi.h"
 #include "inside_outside.h"
@@ -17,28 +20,21 @@
 
 using namespace std;
 
-#if 0
-Hypergraph::Edge const* Hypergraph::ViterbiGoalEdge() const
-{
-  Edge const* r=0;
-  for (unsigned i=0,e=edges_.size();i<e;++i) {
-    Edge const& e=edges_[i];
-    if (e.rule_ && e.rule_->IsGoal() && (!r || e.edge_prob_ > r->edge_prob_))
-      r=&e;
-  }
-  return r;
+bool Hypergraph::AreNodesUniquelyIdentified() const {
+  unordered_set<size_t> s(nodes_.size() * 3 + 7);
+  for (const auto& n : nodes_)
+    if (!s.insert(n.node_hash).second)
+      return false;
+  return true;
 }
-#endif
 
-Hypergraph::Edge const* Hypergraph::ViterbiSortInEdges()
-{
+Hypergraph::Edge const* Hypergraph::ViterbiSortInEdges() {
   NodeProbs nv;
   ComputeNodeViterbi(&nv);
   return SortInEdgesByNodeViterbi(nv);
 }
 
-Hypergraph::Edge const* Hypergraph::SortInEdgesByNodeViterbi(NodeProbs const& nv)
-{
+Hypergraph::Edge const* Hypergraph::SortInEdgesByNodeViterbi(NodeProbs const& nv) {
   EdgeProbs ev;
   ComputeEdgeViterbi(nv,&ev);
   return ViterbiSortInEdges(ev);
@@ -375,9 +371,7 @@ bool Hypergraph::PruneInsideOutside(double alpha,double density,const EdgeMask*
 void Hypergraph::PrintGraphviz() const {
   int ei = 0;
   cerr << "digraph G {\n  rankdir=LR;\n  nodesep=.05;\n";
-  for (vector<Edge>::const_iterator i = edges_.begin();
-       i != edges_.end(); ++i) {
-    const Edge& edge=*i;
+  for (const auto& edge : edges_) {
     ++ei;
     static const string none = "<null>";
     string rule = (edge.rule_ ? edge.rule_->AsString(false) : none);
@@ -399,14 +393,9 @@ void Hypergraph::PrintGraphviz() const {
     }
     cerr << "     A_" << ei << " -> " << edge.head_node_ << ";\n";
   }
-  for (vector<Node>::const_iterator ni = nodes_.begin();
-      ni != nodes_.end(); ++ni) {
-    cerr << "  " << ni->id_ << "[label=\"" << (ni->cat_ < 0 ? TD::Convert(ni->cat_ * -1) : "")
-    //cerr << "  " << ni->id_ << "[label=\"" << ni->cat_
-         << " n=" << ni->id_
-//         << ",x=" << &*ni
-//         << ",in=" << ni->in_edges_.size()
-//         << ",out=" << ni->out_edges_.size()
+  for (const auto& node : nodes_) {
+    cerr << "  " << node.id_ << "[label=\"" << (node.cat_ < 0 ? TD::Convert(node.cat_ * -1) : "")
+         << " n=" << node.id_
          << "\"];\n";
   }
   cerr << "}\n";
diff --git a/decoder/hg.h b/decoder/hg.h
index 343b99cf..43fb275b 100644
--- a/decoder/hg.h
+++ b/decoder/hg.h
@@ -142,13 +142,15 @@ namespace HG {
   // TODO get rid of cat_?
   // TODO keep cat_ and add span and/or state? :)
   struct Node {
-    Node() : id_(), cat_() {}
+    Node() : node_hash(), id_(), cat_() {}
+    size_t node_hash;  // hash of all the information that makes this node unique
     int id_; // equal to this object's position in the nodes_ vector
     WordID cat_;  // non-terminal category if <0, 0 if not set
     WordID NT() const { return -cat_; }
     EdgesVector in_edges_;   // an in edge is an edge with this node as its head.  (in edges come from the bottom up to us)  indices in edges_
     EdgesVector out_edges_;  // an out edge is an edge with this node as its tail.  (out edges leave us up toward the top/goal). indices in edges_
     void copy_fixed(Node const& o) { // nonstructural fields only - structural ones are managed by sorting/pruning/subsetting
+      node_hash = o.node_hash;
       cat_=o.cat_;
     }
     void copy_reindex(Node const& o,indices_after const& n2,indices_after const& e2) {
@@ -192,13 +194,14 @@ public:
     SetNodeOrigin(nodeid,r);
     return r;
   }
-  Span NodeSpan(int nodeid) const {
+  Span NodeSpan(int nodeid, Span* prev = nullptr) const {
     Span s;
     Node const &n=nodes_[nodeid];
     if (!n.in_edges_.empty()) {
       Edge const& e=edges_[n.in_edges_.front()];
       s.l=e.i_;
       s.r=e.j_;
+      if (prev) { prev->l = e.prev_i_; prev->r = e.prev_j_; }
     }
     return s;
   }
@@ -262,6 +265,9 @@ public:
     for (int i = 0; i < size; ++i) nodes_[i].id_ = i;
   }
 
+  // if all node states are unique, return true
+  bool AreNodesUniquelyIdentified() const;
+
   // reserves space in the nodes vector to prevent memory locations
   // from changing
   void ReserveNodes(size_t n, size_t e = 0) {
diff --git a/decoder/lexalign.cc b/decoder/lexalign.cc
index 6adb1892..11f20de7 100644
--- a/decoder/lexalign.cc
+++ b/decoder/lexalign.cc
@@ -124,6 +124,11 @@ bool LexicalAlign::TranslateImpl(const string& input,
   pimpl_->BuildTrellis(lattice, *smeta, forest);
   forest->is_linear_chain_ = true;
   forest->Reweight(weights);
+  // since we don't do any pruning, the node_hash will be the same for
+  // every run of the composer
+  int nc = 0;
+  for (auto& node : forest->nodes_)
+    node.node_hash = ++nc;
   return true;
 }
 
diff --git a/decoder/lextrans.cc b/decoder/lextrans.cc
index 8c3269bf..74a18c3f 100644
--- a/decoder/lextrans.cc
+++ b/decoder/lextrans.cc
@@ -280,6 +280,11 @@ bool LexicalTrans::TranslateImpl(const string& input,
   smeta->SetSourceLength(lattice.size());
   if (!pimpl_->BuildTrellis(lattice, *smeta, forest)) return false;
   forest->Reweight(weights);
+  // since we don't do any pruning, the node_hash will be the same for
+  // every run of the composer
+  int nc = 0;
+  for (auto& node : forest->nodes_)
+    node.node_hash = ++nc;
   return true;
 }
 
diff --git a/decoder/node_state_hash.h b/decoder/node_state_hash.h
new file mode 100644
index 00000000..cdc05877
--- /dev/null
+++ b/decoder/node_state_hash.h
@@ -0,0 +1,36 @@
+#ifndef _NODE_STATE_HASH_
+#define _NODE_STATE_HASH_
+
+#include <cassert>
+#include <cstring>
+#include "murmur_hash3.h"
+#include "ffset.h"
+
+namespace cdec {
+
+  struct FirstPassNode {
+    FirstPassNode(int cat, int i, int j, int pi, int pj) : lhs(cat), s(i), t(j), u(pi), v(pj) {}
+    int32_t lhs;
+    short s;
+    short t;
+    short u;
+    short v;
+  };
+
+  inline uint64_t HashNode(int cat, int i, int j, int pi, int pj) {
+    FirstPassNode fpn(cat, i, j, pi, pj);
+    return MurmurHash3_64(&fpn, sizeof(FirstPassNode), 2654435769U);
+  }
+
+  inline uint64_t HashNode(uint64_t old_hash, const FFState& state) {
+    uint8_t buf[1024];
+    std::memcpy(buf, &old_hash, sizeof(uint64_t));
+    assert(state.size() < (1024u - sizeof(uint64_t)));
+    std::memcpy(&buf[sizeof(uint64_t)], state.begin(), state.size());
+    return MurmurHash3_64(buf, sizeof(uint64_t) + state.size(), 2654435769U);
+  }
+
+}
+
+#endif
+
diff --git a/decoder/nt_span.h b/decoder/nt_span.h
index a918f301..6ff9391f 100644
--- a/decoder/nt_span.h
+++ b/decoder/nt_span.h
@@ -7,7 +7,7 @@
 
 struct Span {
   int l,r;
-  Span() : l(-1) {  }
+  Span() : l(-1), r(-1) {  }
   bool is_null() const { return l<0; }
   void print(std::ostream &o,char const* for_null="") const {
     if (is_null())
diff --git a/decoder/tagger.cc b/decoder/tagger.cc
index 63e855c8..30fb055f 100644
--- a/decoder/tagger.cc
+++ b/decoder/tagger.cc
@@ -108,6 +108,11 @@ bool Tagger::TranslateImpl(const string& input,
   pimpl_->BuildTrellis(sequence, forest);
   forest->Reweight(weights);
   forest->is_linear_chain_ = true;
+  // since we don't do any pruning, the node_hash will be the same for
+  // every run of the composer
+  int nc = 0;
+  for (auto& node : forest->nodes_)
+    node.node_hash = ++nc;
   return true;
 }
 
diff --git a/decoder/tree2string_translator.cc b/decoder/tree2string_translator.cc
index f288ab4e..8d12d01d 100644
--- a/decoder/tree2string_translator.cc
+++ b/decoder/tree2string_translator.cc
@@ -184,13 +184,19 @@ struct Tree2StringTranslatorImpl {
         //   TD::Convert(input_tree.nodes[s.input_node_idx].lhs & cdec::ALL_MASK) << endl;
         if (s.node->rules.size()) {
           int& node_id = tree2hg[s.input_node_idx];
-          if (node_id < 0)
-            node_id = hg.AddNode(-(input_tree.nodes[s.input_node_idx].lhs & cdec::ALL_MASK))->id_;
+          if (node_id < 0) {
+            HG::Node* new_node = hg.AddNode(-(input_tree.nodes[s.input_node_idx].lhs & cdec::ALL_MASK));
+            new_node->node_hash = s.input_node_idx + 1;
+            node_id = new_node->id_;
+          }
           TailNodeVector tail;
           for (auto n : s.future_work) {
             int& nix = tree2hg[n];
-            if (nix < 0)
-              nix = hg.AddNode(-(input_tree.nodes[n].lhs & cdec::ALL_MASK))->id_;
+            if (nix < 0) {
+              HG::Node* new_node = hg.AddNode(-(input_tree.nodes[n].lhs & cdec::ALL_MASK));
+              new_node->node_hash = n + 1;
+              nix = new_node->id_;
+            }
             tail.push_back(nix);
           }
           for (auto& r : s.node->rules) {
diff --git a/mteval/Makefile.am b/mteval/Makefile.am
index 681e798e..08591c9a 100644
--- a/mteval/Makefile.am
+++ b/mteval/Makefile.am
@@ -1,6 +1,7 @@
 bin_PROGRAMS = \
   fast_score \
-  mbr_kbest
+  mbr_kbest\
+  marginalize
 
 noinst_PROGRAMS = \
   scorer_test
@@ -46,4 +47,7 @@ mbr_kbest_LDADD = libmteval.a ../utils/libutils.a
 scorer_test_SOURCES = scorer_test.cc
 scorer_test_LDADD = libmteval.a ../utils/libutils.a $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS)
 
-AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/mteval/test_data\" -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/utils
+marginalize_SOURCES = marginalize.cc
+marginalize_LDADD = libmteval.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a ../klm/util/double-conversion/libklm_util_double.a ../utils/libutils.a
+
+AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/mteval/test_data\" -DBOOST_TEST_DYN_LINK -W -Wall -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/utils -I$(top_srcdir)/klm
diff --git a/tests/tools/filter-stderr.pl b/tests/tools/filter-stderr.pl
index 4a762324..54fe9210 100755
--- a/tests/tools/filter-stderr.pl
+++ b/tests/tools/filter-stderr.pl
@@ -13,6 +13,7 @@ if (/Init.*\s+Viterbi:\s+($REAL)/) {
   # -LM Viterbi: australia is have diplomatic relations with north korea one of the few countries .
   print "-lm_trans $1\n";
 }
+if (/NODES NOT UNIQUELY IDENTIFIED/) { print "NODES_NOT_UNIQUE 1\n"; }
 #Constr. forest (nodes/edges): 111/305
 #Constr. forest       (paths): 9899
 if (/Constr\. forest\s+\(nodes\/edges\): (\d+)\/(\d+)/) { print "constr_nodes $1\nconstr_edges $2\n"; }
diff --git a/utils/Makefile.am b/utils/Makefile.am
index c0ce3509..341fd80b 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -39,7 +39,8 @@ libutils_a_SOURCES = \
   kernel_string_subseq.h \
   logval.h \
   m.h \
-  murmur_hash.h \
+  murmur_hash3.h \
+  murmur_hash3.cc \
   named_enum.h \
   null_deleter.h \
   null_traits.h \
diff --git a/utils/hash.h b/utils/hash.h
index e1426ffb..24d2b6ad 100644
--- a/utils/hash.h
+++ b/utils/hash.h
@@ -3,7 +3,7 @@
 
 #include <boost/functional/hash.hpp>
 
-#include "murmur_hash.h"
+#include "murmur_hash3.h"
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
@@ -44,23 +44,21 @@ const unsigned GOLDEN_MEAN_FRACTION=2654435769U;
 
 // assumes C is POD
 template <class C>
-struct murmur_hash
-{
-  typedef MurmurInt result_type;
+struct murmur_hash {
+  typedef size_t result_type;
   typedef C /*const&*/ argument_type;
   result_type operator()(argument_type const& c) const {
-    return MurmurHash((void*)&c,sizeof(c));
+    return cdec::MurmurHash3_64((void*)&c, sizeof(c), GOLDEN_MEAN_FRACTION);
   }
 };
 
 // murmur_hash_array isn't std guaranteed safe (you need to use string::data())
 template <>
-struct murmur_hash<std::string>
-{
-  typedef MurmurInt result_type;
+struct murmur_hash<std::string> {
+  typedef size_t result_type;
   typedef std::string /*const&*/ argument_type;
   result_type operator()(argument_type const& c) const {
-    return MurmurHash(c.data(),c.size());
+    return cdec::MurmurHash3_64(c.data(), c.size(), GOLDEN_MEAN_FRACTION);
   }
 };
 
@@ -68,10 +66,10 @@ struct murmur_hash<std::string>
 template <class C>
 struct murmur_hash_array
 {
-  typedef MurmurInt result_type;
+  typedef size_t result_type;
   typedef C /*const&*/ argument_type;
   result_type operator()(argument_type const& c) const {
-    return MurmurHash(&*c.begin(),c.size()*sizeof(*c.begin()));
+    return cdec::MurmurHash3_64(&*c.begin(), c.size()*sizeof(*c.begin()), GOLDEN_MEAN_FRACTION);
   }
 };
 
@@ -95,7 +93,6 @@ typename H::mapped_type & get_or_construct(H &ht,K const& k,C0 const& c0) {
   }
 }
 
-
 // get_or_call (0 arg)
 template <class H,class K,class F>
 typename H::mapped_type & get_or_call(H &ht,K const& k,F const& f) {
diff --git a/utils/murmur_hash.h b/utils/murmur_hash.h
deleted file mode 100644
index 6063d524..00000000
--- a/utils/murmur_hash.h
+++ /dev/null
@@ -1,186 +0,0 @@
-#ifndef _MURMUR_HASH_H_
-#define _MURMUR_HASH_H_
-
-//NOTE: quite fast, nice collision properties, but endian dependent hash values
-
-#include "have_64_bits.h"
-typedef uintptr_t MurmurInt;
-
-// MurmurHash2, by Austin Appleby
-
-static const uint32_t DEFAULT_SEED=2654435769U;
-
-#if HAVE_64_BITS
-//MurmurInt MurmurHash(void const *key, int len, uint32_t seed=DEFAULT_SEED);
-
-inline uint64_t MurmurHash64( const void * key, int len, unsigned int seed=DEFAULT_SEED )
-{
-  const uint64_t m = 0xc6a4a7935bd1e995ULL;
-  const int r = 47;
-
-  uint64_t h = seed ^ (len * m);
-
-  const uint64_t * data = (const uint64_t *)key;
-  const uint64_t * end = data + (len/8);
-
-  while(data != end)
-  {
-    uint64_t k = *data++;
-
-    k *= m;
-    k ^= k >> r;
-    k *= m;
-
-    h ^= k;
-    h *= m;
-  }
-
-  const unsigned char * data2 = (const unsigned char*)data;
-
-  switch(len & 7)
-  {
-  case 7: h ^= uint64_t(data2[6]) << 48;
-  case 6: h ^= uint64_t(data2[5]) << 40;
-  case 5: h ^= uint64_t(data2[4]) << 32;
-  case 4: h ^= uint64_t(data2[3]) << 24;
-  case 3: h ^= uint64_t(data2[2]) << 16;
-  case 2: h ^= uint64_t(data2[1]) << 8;
-  case 1: h ^= uint64_t(data2[0]);
-    h *= m;
-  };
-
-  h ^= h >> r;
-  h *= m;
-  h ^= h >> r;
-
-  return h;
-}
-
-inline uint32_t MurmurHash32(void const *key, int len, uint32_t seed=DEFAULT_SEED)
-{
-  return (uint32_t) MurmurHash64(key,len,seed);
-}
-
-inline MurmurInt MurmurHash(void const *key, int len, uint32_t seed=DEFAULT_SEED)
-{
-  return MurmurHash64(key,len,seed);
-}
-
-#else
-// 32-bit
-
-// Note - This code makes a few assumptions about how your machine behaves -
-// 1. We can read a 4-byte value from any address without crashing
-// 2. sizeof(int) == 4
-inline uint32_t MurmurHash32 ( const void * key, int len, uint32_t seed=DEFAULT_SEED)
-{
-  // 'm' and 'r' are mixing constants generated offline.
-  // They're not really 'magic', they just happen to work well.
-
-  const uint32_t m = 0x5bd1e995;
-  const int r = 24;
-
-  // Initialize the hash to a 'random' value
-
-  uint32_t h = seed ^ len;
-
-  // Mix 4 bytes at a time into the hash
-
-  const unsigned char * data = (const unsigned char *)key;
-
-  while(len >= 4)
-  {
-    uint32_t k = *(uint32_t *)data;
-
-    k *= m;
-    k ^= k >> r;
-    k *= m;
-
-    h *= m;
-    h ^= k;
-
-    data += 4;
-    len -= 4;
-  }
-
-  // Handle the last few bytes of the input array
-
-  switch(len)
-  {
-  case 3: h ^= data[2] << 16;
-  case 2: h ^= data[1] << 8;
-  case 1: h ^= data[0];
-    h *= m;
-  };
-
-  // Do a few final mixes of the hash to ensure the last few
-  // bytes are well-incorporated.
-
-  h ^= h >> 13;
-  h *= m;
-  h ^= h >> 15;
-
-  return h;
-}
-
-inline MurmurInt MurmurHash ( const void * key, int len, uint32_t seed=DEFAULT_SEED) {
-  return MurmurHash32(key,len,seed);
-}
-
-// 64-bit hash for 32-bit platforms
-
-inline uint64_t MurmurHash64 ( const void * key, int len, uint32_t seed=DEFAULT_SEED)
-{
-  const uint32_t m = 0x5bd1e995;
-  const int r = 24;
-
-  uint32_t h1 = seed ^ len;
-  uint32_t h2 = 0;
-
-  const uint32_t * data = (const uint32_t *)key;
-
-  while(len >= 8)
-  {
-    uint32_t k1 = *data++;
-    k1 *= m; k1 ^= k1 >> r; k1 *= m;
-    h1 *= m; h1 ^= k1;
-    len -= 4;
-
-    uint32_t k2 = *data++;
-    k2 *= m; k2 ^= k2 >> r; k2 *= m;
-    h2 *= m; h2 ^= k2;
-    len -= 4;
-  }
-
-  if(len >= 4)
-  {
-    uint32_t k1 = *data++;
-    k1 *= m; k1 ^= k1 >> r; k1 *= m;
-    h1 *= m; h1 ^= k1;
-    len -= 4;
-  }
-
-  switch(len)
-  {
-  case 3: h2 ^= ((unsigned char*)data)[2] << 16;
-  case 2: h2 ^= ((unsigned char*)data)[1] << 8;
-  case 1: h2 ^= ((unsigned char*)data)[0];
-    h2 *= m;
-  };
-
-  h1 ^= h2 >> 18; h1 *= m;
-  h2 ^= h1 >> 22; h2 *= m;
-  h1 ^= h2 >> 17; h1 *= m;
-  h2 ^= h1 >> 19; h2 *= m;
-
-  uint64_t h = h1;
-
-  h = (h << 32) | h2;
-
-  return h;
-}
-
-#endif
-//32bit
-
-#endif
diff --git a/utils/murmur_hash3.cc b/utils/murmur_hash3.cc
new file mode 100644
index 00000000..68a71d02
--- /dev/null
+++ b/utils/murmur_hash3.cc
@@ -0,0 +1,340 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+
+#include "murmur_hash3.h"
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define FORCE_INLINE    __forceinline
+
+#include <stdlib.h>
+
+#define ROTL32(x,y)     _rotl(x,y)
+#define ROTL64(x,y)     _rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else   // defined(_MSC_VER)
+
+#define FORCE_INLINE inline __attribute__((always_inline))
+
+namespace cdec {
+
+inline uint32_t rotl32 ( uint32_t x, int8_t r )
+{
+  return (x << r) | (x >> (32 - r));
+}
+
+inline uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+  return (x << r) | (x >> (64 - r));
+}
+
+#define ROTL32(x,y)     rotl32(x,y)
+#define ROTL64(x,y)     rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
+
+FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i )
+{
+  return p[i];
+}
+
+FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
+{
+  return p[i];
+}
+
+//-----------------------------------------------------------------------------
+// Finalization mix - force all bits of a hash block to avalanche
+
+FORCE_INLINE uint32_t fmix32 ( uint32_t h )
+{
+  h ^= h >> 16;
+  h *= 0x85ebca6b;
+  h ^= h >> 13;
+  h *= 0xc2b2ae35;
+  h ^= h >> 16;
+
+  return h;
+}
+
+//----------
+
+FORCE_INLINE uint64_t fmix64 ( uint64_t k )
+{
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+  k ^= k >> 33;
+
+  return k;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32 ( const void * key, int len,
+                          uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 4;
+
+  uint32_t h1 = seed;
+
+  const uint32_t c1 = 0xcc9e2d51;
+  const uint32_t c2 = 0x1b873593;
+
+  //----------
+  // body
+
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
+
+  for(int i = -nblocks; i; i++)
+  {
+    uint32_t k1 = getblock32(blocks,i);
+
+    k1 *= c1;
+    k1 = ROTL32(k1,15);
+    k1 *= c2;
+    
+    h1 ^= k1;
+    h1 = ROTL32(h1,13); 
+    h1 = h1*5+0xe6546b64;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
+
+  uint32_t k1 = 0;
+
+  switch(len & 3)
+  {
+  case 3: k1 ^= tail[2] << 16;
+  case 2: k1 ^= tail[1] << 8;
+  case 1: k1 ^= tail[0];
+          k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len;
+
+  h1 = fmix32(h1);
+
+  *(uint32_t*)out = h1;
+} 
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_128 ( const void * key, const int len,
+                           uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
+
+  uint32_t h1 = seed;
+  uint32_t h2 = seed;
+  uint32_t h3 = seed;
+  uint32_t h4 = seed;
+
+  const uint32_t c1 = 0x239b961b; 
+  const uint32_t c2 = 0xab0e9789;
+  const uint32_t c3 = 0x38b34ae5; 
+  const uint32_t c4 = 0xa1e38b93;
+
+  //----------
+  // body
+
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
+
+  for(int i = -nblocks; i; i++)
+  {
+    uint32_t k1 = getblock32(blocks,i*4+0);
+    uint32_t k2 = getblock32(blocks,i*4+1);
+    uint32_t k3 = getblock32(blocks,i*4+2);
+    uint32_t k4 = getblock32(blocks,i*4+3);
+
+    k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+
+    h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
+
+    k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+    h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
+
+    k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+    h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
+
+    k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+    h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+  uint32_t k1 = 0;
+  uint32_t k2 = 0;
+  uint32_t k3 = 0;
+  uint32_t k4 = 0;
+
+  switch(len & 15)
+  {
+  case 15: k4 ^= tail[14] << 16;
+  case 14: k4 ^= tail[13] << 8;
+  case 13: k4 ^= tail[12] << 0;
+           k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+  case 12: k3 ^= tail[11] << 24;
+  case 11: k3 ^= tail[10] << 16;
+  case 10: k3 ^= tail[ 9] << 8;
+  case  9: k3 ^= tail[ 8] << 0;
+           k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+  case  8: k2 ^= tail[ 7] << 24;
+  case  7: k2 ^= tail[ 6] << 16;
+  case  6: k2 ^= tail[ 5] << 8;
+  case  5: k2 ^= tail[ 4] << 0;
+           k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+  case  4: k1 ^= tail[ 3] << 24;
+  case  3: k1 ^= tail[ 2] << 16;
+  case  2: k1 ^= tail[ 1] << 8;
+  case  1: k1 ^= tail[ 0] << 0;
+           k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
+
+  h1 += h2; h1 += h3; h1 += h4;
+  h2 += h1; h3 += h1; h4 += h1;
+
+  h1 = fmix32(h1);
+  h2 = fmix32(h2);
+  h3 = fmix32(h3);
+  h4 = fmix32(h4);
+
+  h1 += h2; h1 += h3; h1 += h4;
+  h2 += h1; h3 += h1; h4 += h1;
+
+  ((uint32_t*)out)[0] = h1;
+  ((uint32_t*)out)[1] = h2;
+  ((uint32_t*)out)[2] = h3;
+  ((uint32_t*)out)[3] = h4;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x64_128 ( const void * key, const int len,
+                           const uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
+
+  uint64_t h1 = seed;
+  uint64_t h2 = seed;
+
+  const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
+  const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
+
+  //----------
+  // body
+
+  const uint64_t * blocks = (const uint64_t *)(data);
+
+  for(int i = 0; i < nblocks; i++)
+  {
+    uint64_t k1 = getblock64(blocks,i*2+0);
+    uint64_t k2 = getblock64(blocks,i*2+1);
+
+    k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+
+    h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
+
+    k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+    h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+  uint64_t k1 = 0;
+  uint64_t k2 = 0;
+
+  switch(len & 15)
+  {
+  case 15: k2 ^= ((uint64_t)tail[14]) << 48;
+  case 14: k2 ^= ((uint64_t)tail[13]) << 40;
+  case 13: k2 ^= ((uint64_t)tail[12]) << 32;
+  case 12: k2 ^= ((uint64_t)tail[11]) << 24;
+  case 11: k2 ^= ((uint64_t)tail[10]) << 16;
+  case 10: k2 ^= ((uint64_t)tail[ 9]) << 8;
+  case  9: k2 ^= ((uint64_t)tail[ 8]) << 0;
+           k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+  case  8: k1 ^= ((uint64_t)tail[ 7]) << 56;
+  case  7: k1 ^= ((uint64_t)tail[ 6]) << 48;
+  case  6: k1 ^= ((uint64_t)tail[ 5]) << 40;
+  case  5: k1 ^= ((uint64_t)tail[ 4]) << 32;
+  case  4: k1 ^= ((uint64_t)tail[ 3]) << 24;
+  case  3: k1 ^= ((uint64_t)tail[ 2]) << 16;
+  case  2: k1 ^= ((uint64_t)tail[ 1]) << 8;
+  case  1: k1 ^= ((uint64_t)tail[ 0]) << 0;
+           k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len; h2 ^= len;
+
+  h1 += h2;
+  h2 += h1;
+
+  h1 = fmix64(h1);
+  h2 = fmix64(h2);
+
+  h1 += h2;
+  h2 += h1;
+
+  ((uint64_t*)out)[0] = h1;
+  ((uint64_t*)out)[1] = h2;
+}
+
+//-----------------------------------------------------------------------------
+
+} // namespace cdec
+
+
diff --git a/utils/murmur_hash3.h b/utils/murmur_hash3.h
new file mode 100644
index 00000000..a125d775
--- /dev/null
+++ b/utils/murmur_hash3.h
@@ -0,0 +1,67 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH3_H_
+#define _MURMURHASH3_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER) && (_MSC_VER < 1600)
+
+typedef unsigned char uint8_t;
+typedef unsigned int uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else   // defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+namespace cdec {
+
+void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
+
+namespace {
+  #ifdef __clang__
+  #pragma clang diagnostic push
+  #pragma clang diagnostic ignored "-Wunused-function"
+  #endif
+  template <unsigned L> inline void cdecMurmurHashNativeBackend(const void * key, int len, uint32_t seed, void * out) {
+    MurmurHash3_x86_128(key, len, seed, out);
+  }
+  template <> inline void cdecMurmurHashNativeBackend<4>(const void * key, int len, uint32_t seed, void * out) {
+    MurmurHash3_x64_128(key, len, seed, out);
+  }
+  #ifdef __clang__
+  #pragma clang diagnostic pop
+  #endif
+} // namespace
+
+inline uint64_t MurmurHash3_64(const void * key, int len, uint32_t seed) {
+  uint64_t out[2];
+  cdecMurmurHashNativeBackend<sizeof(void*)>(key, len, seed, &out);
+  return out[0];
+}
+
+inline void MurmurHash3_128(const void * key, int len, uint32_t seed, void * out) {
+  cdecMurmurHashNativeBackend<sizeof(void*)>(key, len, seed, out);
+}
+
+}
+
+//-----------------------------------------------------------------------------
+
+#endif // _MURMURHASH3_H_
-- 
cgit v1.2.3


From 926ed91f7364150eac0867097d80457999a8132f Mon Sep 17 00:00:00 2001
From: Chris Dyer <redpony@gmail.com>
Date: Thu, 15 May 2014 00:45:56 -0400
Subject: use fma if available

---
 utils/fast_sparse_vector.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'utils')

diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h
index 590a60c4..6e2a77cd 100644
--- a/utils/fast_sparse_vector.h
+++ b/utils/fast_sparse_vector.h
@@ -384,13 +384,21 @@ class FastSparseVector {
   T dot(const std::vector<T>& v) const {
     T res = T();
     for (const_iterator it = begin(), e = end(); it != e; ++it)
+#if FP_FAST_FMA
+      if (static_cast<unsigned>(it->first) < v.size()) res = std::fma(it->second, v[it->first], res);
+#else
       if (static_cast<unsigned>(it->first) < v.size()) res += it->second * v[it->first];
+#endif
     return res;
   }
   T dot(const FastSparseVector<T>& other) const {
     T res = T();
     for (const_iterator it = begin(), e = end(); it != e; ++it)
+#if FP_FAST_FMA
+      res = std::fma(other.value(it->first), it->second, res);
+#else
       res += other.value(it->first) * it->second;
+#endif
     return res;
   }
   bool operator==(const FastSparseVector<T>& other) const {
-- 
cgit v1.2.3