From 47a656c0b6fdba8f91f2c5808234cbb1de682652 Mon Sep 17 00:00:00 2001 From: redpony Date: Wed, 10 Nov 2010 02:02:04 +0000 Subject: new version of klm git-svn-id: https://ws10smt.googlecode.com/svn/trunk@706 ec762483-ff6d-05da-a07a-a48fb63a330f --- klm/util/bit_packing.hh | 88 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 klm/util/bit_packing.hh (limited to 'klm/util/bit_packing.hh') diff --git a/klm/util/bit_packing.hh b/klm/util/bit_packing.hh new file mode 100644 index 00000000..422ed873 --- /dev/null +++ b/klm/util/bit_packing.hh @@ -0,0 +1,88 @@ +#ifndef UTIL_BIT_PACKING__ +#define UTIL_BIT_PACKING__ + +/* Bit-level packing routines */ + +#include +#ifdef __APPLE__ +#include +#else +#include +#endif + +#include + +#if __BYTE_ORDER != __LITTLE_ENDIAN +#error The bit aligned storage functions assume little endian architecture +#endif + +namespace util { + +/* WARNING WARNING WARNING: + * The write functions assume that memory is zero initially. This makes them + * faster and is the appropriate case for mmapped language model construction. + * These routines assume that unaligned access to uint64_t is fast and that + * storage is little endian. This is the case on x86_64. It may not be the + * case on 32-bit x86 but my target audience is large language models for which + * 64-bit is necessary. + */ + +/* Pack integers up to 57 bits using their least significant digits. + * The length is specified using mask: + * Assumes mask == (1 << length) - 1 where length <= 57. + */ +inline uint64_t ReadInt57(const void *base, uint8_t bit, uint64_t mask) { + return (*reinterpret_cast(base) >> bit) & mask; +} +/* Assumes value <= mask and mask == (1 << length) - 1 where length <= 57. + * Assumes the memory is zero initially. + */ +inline void WriteInt57(void *base, uint8_t bit, uint64_t value) { + *reinterpret_cast(base) |= (value << bit); +} + +namespace detail { typedef union { float f; uint32_t i; } FloatEnc; } +inline float ReadFloat32(const void *base, uint8_t bit) { + detail::FloatEnc encoded; + encoded.i = *reinterpret_cast(base) >> bit; + return encoded.f; +} +inline void WriteFloat32(void *base, uint8_t bit, float value) { + detail::FloatEnc encoded; + encoded.f = value; + WriteInt57(base, bit, encoded.i); +} + +inline float ReadNonPositiveFloat31(const void *base, uint8_t bit) { + detail::FloatEnc encoded; + encoded.i = *reinterpret_cast(base) >> bit; + // Sign bit set means negative. + encoded.i |= 0x80000000; + return encoded.f; +} +inline void WriteNonPositiveFloat31(void *base, uint8_t bit, float value) { + assert(value <= 0.0); + detail::FloatEnc encoded; + encoded.f = value; + encoded.i &= ~0x80000000; + WriteInt57(base, bit, encoded.i); +} + +void BitPackingSanity(); + +// Return bits required to store integers upto max_value. Not the most +// efficient implementation, but this is only called a few times to size tries. +uint8_t RequiredBits(uint64_t max_value); + +struct BitsMask { + void FromMax(uint64_t max_value) { + bits = RequiredBits(max_value); + mask = (1 << bits) - 1; + } + uint8_t bits; + uint64_t mask; +}; + +} // namespace util + +#endif // UTIL_BIT_PACKING__ -- cgit v1.2.3