From fc936db02d42cc3978a4cc2017efe7a15c78855d Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 27 May 2012 22:28:34 -0400 Subject: kbest repo --- mteval/ns.h | 5 +++++ training/kbest_repository.cc | 37 +++++++++++++++++++++++++++++++++++++ training/kbest_repository.h | 19 +++++++++++++++++++ 3 files changed, 61 insertions(+) create mode 100644 training/kbest_repository.cc create mode 100644 training/kbest_repository.h diff --git a/mteval/ns.h b/mteval/ns.h index 4e4c6975..ac7b0a23 100644 --- a/mteval/ns.h +++ b/mteval/ns.h @@ -56,6 +56,11 @@ class SufficientStats { } void Encode(std::string* out) const; + void swap(SufficientStats& other) { + id_.swap(other.id_); + fields.swap(other.fields); + } + std::string id_; std::vector fields; }; diff --git a/training/kbest_repository.cc b/training/kbest_repository.cc new file mode 100644 index 00000000..145b40a2 --- /dev/null +++ b/training/kbest_repository.cc @@ -0,0 +1,37 @@ +#include "kbest_repository.h" + +#include + +using namespace std; + +struct ApproxVectorHasher { + static const size_t MASK = 0xFFFFFFFFull; + union UType { + double f; // leave as double + size_t i; + }; + static inline double round(const double x) { + UType t; + t.f = x; + size_t r = t.i & MASK; + if ((r << 1) > MASK) + t.i += MASK - r + 1; + else + t.i &= (1ull - MASK); + return t.f; + } + size_t operator()(const SparseVector& x) const { + size_t h = 0x573915839; + for (SparseVector::const_iterator it = x.begin(); it != x.end(); ++it) { + UType t; + t.f = it->second; + if (t.f) { + size_t z = (t.i >> 32); + boost::hash_combine(h, it->first); + boost::hash_combine(h, z); + } + } + return h; + } +}; + diff --git a/training/kbest_repository.h b/training/kbest_repository.h new file mode 100644 index 00000000..0345394a --- /dev/null +++ b/training/kbest_repository.h @@ -0,0 +1,19 @@ +#ifndef _KBEST_REPOSITORY_H_ +#define _KBEST_REPOSITORY_H_ + +#include +#include "wordid.h" +#include "ns.h" +#include "sparse_vector.h" + +class KBestRepository { + struct HypInfo { + std::vector words; + SparseVector x; + SufficientStats score_stats; + }; + + std::vector candidates; +}; + +#endif -- cgit v1.2.3