Fast packed integers

% Remco Bloemen % 2010-08-04, last updated 2014-03-04

Unsigned integers

The encoding I implemented is the same as the one used in EBML. It works as follows

bits bytes pattern


7 1 1xxx xxxx 14 2 01xx xxxx xxxx xxxx 21 3 001x xxxx xxxx xxxx xxxx xxxx 28 4 0001 xxxx xxxx xxxx xxxx xxxx xxxx xxxx 35 5 0000 1xxx xxxx xxxx xxxx … xxxx xxxx xxxx 42 6 0000 01xx xxxx xxxx xxxx … xxxx xxxx xxxx 49 7 0000 001x xxxx xxxx xxxx … xxxx xxxx xxxx 56 8 0000 0001 xxxx xxxx xxxx … xxxx xxxx xxxx 64 9 0000 0000 xxxx xxxx xxxx … xxxx xxxx xxxx

Using a bit of

int length_uint(uint64 value)
{
	int num_bytes = (63 - count_leading_zeros(value)) / 7 + 1;
	return (num_bytes > 9) ? 9 : num_bytes;
}

uint64 read_uint(char* &buffer)
{
	uint64 value = *reinterpret_cast<uint64*>(buffer);
	value = endian_swap(value);
	int num_bytes = count_leading_zeros(value) + 1;
	buffer += num_bytes;
	if(fast_true(num_bytes < 9)) {
		value <<= num_bytes;
		value >>= 64 - num_bytes*7;
		return value;
	}
	value = *reinterpret_cast<uint64*>(buffer - 8);
	value = endian_swap(value);
	return value;
}

void write_uint(char* &buffer, uint64 value)
{
	int num_bytes = (63 - count_leading_zeros(value)) / 7 + 1;
	if(fast_true(num_bytes < 9)) {
		value |= ((uint64)1) << (num_bytes*7);
		value <<= (8 - num_bytes) * 8;
		value = endian_swap(value);
		*reinterpret_cast<uint64*>(buffer) = value;
		buffer += num_bytes;
		return;
	}
	*buffer = 0xFF;
	value = endian_swap(value);
	*reinterpret_cast<uint64*>(buffer + 1) = value;
	buffer += num_bytes;
}

inline int count_leading_zeros(uint64 value)
{
	if(fast_false(value == 0)) return 64;
	return __builtin_clzl(value);
}

inline uint64 endian_swap(uint64 value)
{
	return __builtin_bswap64(value);
}

Signed integers

Remco Bloemen
Math & Engineering
https://2π.com