From b9e6e7e24cc48021090b689e143288e2b7f2b5fc Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 7 Apr 2014 22:56:34 -0400 Subject: track node state for smarter union --- decoder/node_state_hash.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 decoder/node_state_hash.h (limited to 'decoder/node_state_hash.h') diff --git a/decoder/node_state_hash.h b/decoder/node_state_hash.h new file mode 100644 index 00000000..cdc05877 --- /dev/null +++ b/decoder/node_state_hash.h @@ -0,0 +1,36 @@ +#ifndef _NODE_STATE_HASH_ +#define _NODE_STATE_HASH_ + +#include +#include +#include "murmur_hash3.h" +#include "ffset.h" + +namespace cdec { + + struct FirstPassNode { + FirstPassNode(int cat, int i, int j, int pi, int pj) : lhs(cat), s(i), t(j), u(pi), v(pj) {} + int32_t lhs; + short s; + short t; + short u; + short v; + }; + + inline uint64_t HashNode(int cat, int i, int j, int pi, int pj) { + FirstPassNode fpn(cat, i, j, pi, pj); + return MurmurHash3_64(&fpn, sizeof(FirstPassNode), 2654435769U); + } + + inline uint64_t HashNode(uint64_t old_hash, const FFState& state) { + uint8_t buf[1024]; + std::memcpy(buf, &old_hash, sizeof(uint64_t)); + assert(state.size() < (1024u - sizeof(uint64_t))); + std::memcpy(&buf[sizeof(uint64_t)], state.begin(), state.size()); + return MurmurHash3_64(buf, sizeof(uint64_t) + state.size(), 2654435769U); + } + +} + +#endif + -- cgit v1.2.3 From 6b00a98deed65f8068bcc6f5b5bb3a3a7bd4cfa2 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 9 Apr 2014 01:06:33 -0400 Subject: don't hash on an internal id --- decoder/node_state_hash.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'decoder/node_state_hash.h') diff --git a/decoder/node_state_hash.h b/decoder/node_state_hash.h index cdc05877..9fc01a09 100644 --- a/decoder/node_state_hash.h +++ b/decoder/node_state_hash.h @@ -3,14 +3,19 @@ #include #include +#include "tdict.h" #include "murmur_hash3.h" #include "ffset.h" namespace cdec { struct FirstPassNode { - FirstPassNode(int cat, int i, int j, int pi, int pj) : lhs(cat), s(i), t(j), u(pi), v(pj) {} - int32_t lhs; + FirstPassNode(int cat, int i, int j, int pi, int pj) : s(i), t(j), u(pi), v(pj) { + memset(lhs, 0, 120); + unsigned it = 0; + for (auto& c : TD::Convert(-cat)) { lhs[it++] = c; if (it == 120) break; } + } + char lhs[120]; short s; short t; short u; @@ -23,6 +28,7 @@ namespace cdec { } inline uint64_t HashNode(uint64_t old_hash, const FFState& state) { + if (state.size() == 0) return old_hash; uint8_t buf[1024]; std::memcpy(buf, &old_hash, sizeof(uint64_t)); assert(state.size() < (1024u - sizeof(uint64_t))); -- cgit v1.2.3