Fast packed integers
% Remco Bloemen % 2010-08-04, last updated 2014-03-04
Unsigned integers
The encoding I implemented is the same as the one used in EBML. It works as follows
bits bytes pattern
7 1 1xxx xxxx
14 2 01xx xxxx xxxx xxxx
21 3 001x xxxx xxxx xxxx xxxx xxxx
28 4 0001 xxxx xxxx xxxx xxxx xxxx xxxx xxxx
35 5 0000 1xxx xxxx xxxx xxxx … xxxx xxxx xxxx
42 6 0000 01xx xxxx xxxx xxxx … xxxx xxxx xxxx
49 7 0000 001x xxxx xxxx xxxx … xxxx xxxx xxxx
56 8 0000 0001 xxxx xxxx xxxx … xxxx xxxx xxxx
64 9 0000 0000 xxxx xxxx xxxx … xxxx xxxx xxxx
Using a bit of
int length_uint(uint64 value)
{
int num_bytes = (63 - count_leading_zeros(value)) / 7 + 1;
return (num_bytes > 9) ? 9 : num_bytes;
}
uint64 read_uint(char* &buffer)
{
uint64 value = *reinterpret_cast<uint64*>(buffer);
value = endian_swap(value);
int num_bytes = count_leading_zeros(value) + 1;
buffer += num_bytes;
if(fast_true(num_bytes < 9)) {
value <<= num_bytes;
value >>= 64 - num_bytes*7;
return value;
}
value = *reinterpret_cast<uint64*>(buffer - 8);
value = endian_swap(value);
return value;
}
void write_uint(char* &buffer, uint64 value)
{
int num_bytes = (63 - count_leading_zeros(value)) / 7 + 1;
if(fast_true(num_bytes < 9)) {
value |= ((uint64)1) << (num_bytes*7);
value <<= (8 - num_bytes) * 8;
value = endian_swap(value);
*reinterpret_cast<uint64*>(buffer) = value;
buffer += num_bytes;
return;
}
*buffer = 0xFF;
value = endian_swap(value);
*reinterpret_cast<uint64*>(buffer + 1) = value;
buffer += num_bytes;
}
inline int count_leading_zeros(uint64 value)
{
if(fast_false(value == 0)) return 64;
return __builtin_clzl(value);
}
inline uint64 endian_swap(uint64 value)
{
return __builtin_bswap64(value);
}