summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mteval/ns.h5
-rw-r--r--training/kbest_repository.cc37
-rw-r--r--training/kbest_repository.h19
3 files changed, 61 insertions, 0 deletions
diff --git a/mteval/ns.h b/mteval/ns.h
index 4e4c6975..ac7b0a23 100644
--- a/mteval/ns.h
+++ b/mteval/ns.h
@@ -56,6 +56,11 @@ class SufficientStats {
}
void Encode(std::string* out) const;
+ void swap(SufficientStats& other) {
+ id_.swap(other.id_);
+ fields.swap(other.fields);
+ }
+
std::string id_;
std::vector<float> fields;
};
diff --git a/training/kbest_repository.cc b/training/kbest_repository.cc
new file mode 100644
index 00000000..145b40a2
--- /dev/null
+++ b/training/kbest_repository.cc
@@ -0,0 +1,37 @@
+#include "kbest_repository.h"
+
+#include <boost/functional/hash.hpp>
+
+using namespace std;
+
+struct ApproxVectorHasher {
+ static const size_t MASK = 0xFFFFFFFFull;
+ union UType {
+ double f; // leave as double
+ size_t i;
+ };
+ static inline double round(const double x) {
+ UType t;
+ t.f = x;
+ size_t r = t.i & MASK;
+ if ((r << 1) > MASK)
+ t.i += MASK - r + 1;
+ else
+ t.i &= (1ull - MASK);
+ return t.f;
+ }
+ size_t operator()(const SparseVector<double>& x) const {
+ size_t h = 0x573915839;
+ for (SparseVector<double>::const_iterator it = x.begin(); it != x.end(); ++it) {
+ UType t;
+ t.f = it->second;
+ if (t.f) {
+ size_t z = (t.i >> 32);
+ boost::hash_combine(h, it->first);
+ boost::hash_combine(h, z);
+ }
+ }
+ return h;
+ }
+};
+
diff --git a/training/kbest_repository.h b/training/kbest_repository.h
new file mode 100644
index 00000000..0345394a
--- /dev/null
+++ b/training/kbest_repository.h
@@ -0,0 +1,19 @@
+#ifndef _KBEST_REPOSITORY_H_
+#define _KBEST_REPOSITORY_H_
+
+#include <vector>
+#include "wordid.h"
+#include "ns.h"
+#include "sparse_vector.h"
+
+class KBestRepository {
+ struct HypInfo {
+ std::vector<WordID> words;
+ SparseVector<double> x;
+ SufficientStats score_stats;
+ };
+
+ std::vector<HypInfo> candidates;
+};
+
+#endif