#ifndef UTIL_BIT_PACKING__ #define UTIL_BIT_PACKING__ /* Bit-level packing routines * * WARNING WARNING WARNING: * The write functions assume that memory is zero initially. This makes them * faster and is the appropriate case for mmapped language model construction. * These routines assume that unaligned access to uint64_t is fast. This is * the case on x86_64. I'm not sure how fast unaligned 64-bit access is on * x86 but my target audience is large language models for which 64-bit is * necessary. * * Call the BitPackingSanity function to sanity check. Calling once suffices, * but it may be called multiple times when that's inconvenient. * * ARM and MinGW ports contributed by Hideo Okuma and Tomoyuki Yoshimura at * NICT. */ #include <assert.h> #ifdef __APPLE__ #include <architecture/byte_order.h> #elif __linux__ #include <endian.h> #elif !defined(_WIN32) && !defined(_WIN64) #include <arpa/nameser_compat.h> #endif #include <stdint.h> #include <string.h> namespace util { // Fun fact: __BYTE_ORDER is wrong on Solaris Sparc, but the version without __ is correct. #if BYTE_ORDER == LITTLE_ENDIAN inline uint8_t BitPackShift(uint8_t bit, uint8_t /*length*/) { return bit; } #elif BYTE_ORDER == BIG_ENDIAN inline uint8_t BitPackShift(uint8_t bit, uint8_t length) { return 64 - length - bit; } #else #error "Bit packing code isn't written for your byte order." #endif inline uint64_t ReadOff(const void *base, uint64_t bit_off) { #if defined(__arm) || defined(__arm__) const uint8_t *base_off = reinterpret_cast<const uint8_t*>(base) + (bit_off >> 3); uint64_t value64; memcpy(&value64, base_off, sizeof(value64)); return value64; #else return *reinterpret_cast<const uint64_t*>(reinterpret_cast<const uint8_t*>(base) + (bit_off >> 3)); #endif } /* Pack integers up to 57 bits using their least significant digits. * The length is specified using mask: * Assumes mask == (1 << length) - 1 where length <= 57. */ inline uint64_t ReadInt57(const void *base, uint64_t bit_off, uint8_t length, uint64_t mask) { return (ReadOff(base, bit_off) >> BitPackShift(bit_off & 7, length)) & mask; } /* Assumes value < (1 << length) and length <= 57. * Assumes the memory is zero initially. */ inline void WriteInt57(void *base, uint64_t bit_off, uint8_t length, uint64_t value) { #if defined(__arm) || defined(__arm__) uint8_t *base_off = reinterpret_cast<uint8_t*>(base) + (bit_off >> 3); uint64_t value64; memcpy(&value64, base_off, sizeof(value64)); value64 |= (value << BitPackShift(bit_off & 7, length)); memcpy(base_off, &value64, sizeof(value64)); #else *reinterpret_cast<uint64_t*>(reinterpret_cast<uint8_t*>(base) + (bit_off >> 3)) |= (value << BitPackShift(bit_off & 7, length)); #endif } /* Same caveats as above, but for a 25 bit limit. */ inline uint32_t ReadInt25(const void *base, uint64_t bit_off, uint8_t length, uint32_t mask) { #if defined(__arm) || defined(__arm__) const uint8_t *base_off = reinterpret_cast<const uint8_t*>(base) + (bit_off >> 3); uint32_t value32; memcpy(&value32, base_off, sizeof(value32)); return (value32 >> BitPackShift(bit_off & 7, length)) & mask; #else return (*reinterpret_cast<const uint32_t*>(reinterpret_cast<const uint8_t*>(base) + (bit_off >> 3)) >> BitPackShift(bit_off & 7, length)) & mask; #endif } inline void WriteInt25(void *base, uint64_t bit_off, uint8_t length, uint32_t value) { #if defined(__arm) || defined(__arm__) uint8_t *base_off = reinterpret_cast<uint8_t*>(base) + (bit_off >> 3); uint32_t value32; memcpy(&value32, base_off, sizeof(value32)); value32 |= (value << BitPackShift(bit_off & 7, length)); memcpy(base_off, &value32, sizeof(value32)); #else *reinterpret_cast<uint32_t*>(reinterpret_cast<uint8_t*>(base) + (bit_off >> 3)) |= (value << BitPackShift(bit_off & 7, length)); #endif } typedef union { float f; uint32_t i; } FloatEnc; inline float ReadFloat32(const void *base, uint64_t bit_off) { FloatEnc encoded; encoded.i = ReadOff(base, bit_off) >> BitPackShift(bit_off & 7, 32); return encoded.f; } inline void WriteFloat32(void *base, uint64_t bit_off, float value) { FloatEnc encoded; encoded.f = value; WriteInt57(base, bit_off, 32, encoded.i); } const uint32_t kSignBit = 0x80000000; inline void SetSign(float &to) { FloatEnc enc; enc.f = to; enc.i |= kSignBit; to = enc.f; } inline void UnsetSign(float &to) { FloatEnc enc; enc.f = to; enc.i &= ~kSignBit; to = enc.f; } inline float ReadNonPositiveFloat31(const void *base, uint64_t bit_off) { FloatEnc encoded; encoded.i = ReadOff(base, bit_off) >> BitPackShift(bit_off & 7, 31); // Sign bit set means negative. encoded.i |= kSignBit; return encoded.f; } inline void WriteNonPositiveFloat31(void *base, uint64_t bit_off, float value) { FloatEnc encoded; encoded.f = value; encoded.i &= ~kSignBit; WriteInt57(base, bit_off, 31, encoded.i); } void BitPackingSanity(); // Return bits required to store integers upto max_value. Not the most // efficient implementation, but this is only called a few times to size tries. uint8_t RequiredBits(uint64_t max_value); struct BitsMask { static BitsMask ByMax(uint64_t max_value) { BitsMask ret; ret.FromMax(max_value); return ret; } static BitsMask ByBits(uint8_t bits) { BitsMask ret; ret.bits = bits; ret.mask = (1ULL << bits) - 1; return ret; } void FromMax(uint64_t max_value) { bits = RequiredBits(max_value); mask = (1ULL << bits) - 1; } uint8_t bits; uint64_t mask; }; struct BitAddress { BitAddress(void *in_base, uint64_t in_offset) : base(in_base), offset(in_offset) {} void *base; uint64_t offset; }; } // namespace util #endif // UTIL_BIT_PACKING__