Paste: rolling checksum

Author: crest
Mode: c
Date: Sun, 29 Jun 2014 23:12:57
Plain Text |
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <errno.h>
#include <sys/types.h>
#include <sha512.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

struct window {
	uint64_t    hash;
	__uint128_t buffer;
	size_t      pos;
};

int chunk(const char *path);

int
main(int argc, char **argv)
{
	(void)argc;
	(void)argv;

	if ( argc == 1 ) {
		chunk("/dev/stdin");
	}
	for ( int i = 1; i < argc; i++ ) {
		chunk(argv[i]);
	}
	return 0;
}

static const uint32_t q = 0xFFFFFFFBul;

/*static uint32_t init_hash(const uint8_t *restrict data, size_t length, uint32_t *restrict mult)
{
	uint64_t hash = 0;
        uint32_t pow  = 1;

	for ( size_t i = 0; i < length; i++ ) {
		pow = (pow * UINT8_MAX) % q;
	}
	for ( size_t i = 0; i < length; i++ ) {
		hash = ((hash << 8) + data[i]) % q;
	}
	*mult = _mult;

	return hash;
}

static uint32_t roll_hash(uint32_t hash, uint8_t *data, uint32_t mult, size_t i)
{
	hash -= mult * data[i - 1024];
	hash *= hconst;
	hash += data[i];

	return hash;
}
*/

static inline bool
end_of_chunk(uint32_t hash)
{
	return (hash & ((1 << 16) - 1)) == 0;
}

static int
dump_hash(const uint8_t *restrict buffer, size_t length)
{
	SHA512_CTX ctx;
	uint8_t    hash[64];
	uint8_t    hash_hex[2 * 64 + 1];

	SHA512_Init(&ctx);
	SHA512_Update(&ctx, buffer, length);
	SHA512_Final(hash, &ctx);

	for ( size_t i = 0; i < sizeof(hash); i++ ) {
		uint8_t high = hash[i] >> 4;
		uint8_t low  = hash[i] & 0xf;
        	hash_hex[2 * i    ] = high > 9 ? high - 10 + 'A' : high + '0';
        	hash_hex[2 * i + 1] = low  > 9 ? low  - 10 + 'A' : low  + '0';
	}
	hash_hex[2*64] = '\0';

	if ( puts((const char*)hash_hex) == EOF ) {
		return -1;
	}

	return 0;
}

int
chunk(const char *path)
{
	struct stat sb;
	size_t      start  = 0;
	size_t      window = 48;
	int         fd     = open(path, O_RDONLY);

	if ( fd < 0 ) {
		return -1;
	}

	if ( fstat(fd, &sb) ) {
		close(fd);
        	return -1;
	}
	size_t length = sb.st_size;

	uint8_t *data  = mmap(NULL, length, PROT_READ, MAP_ALIGNED_SUPER|MAP_NOCORE|MAP_PREFAULT_READ, fd, 0);
	if ( data == NULL ) {
		close(fd);
		return -1;
	}
	(void)madvise(data, length, MADV_SEQUENTIAL);

	if ( length < window ) { 
        	dump_hash(data, length);
	} else {
		uint64_t pow  = 1;
		uint64_t hash = 0;
		uint64_t temp;
        	for ( size_t i = 1; i < window; i++ ) {
                	pow *= 256;
			pow %= q;
		}
		for ( size_t i = 0; i < window; i++ ) {
                	hash <<= 8;
			hash |=  data[i];
			hash %=  q;
		}
		for ( size_t i = 1; i <= length - window; i++ ) {
			size_t length_of_chunk = i + 1 - start;

			temp  = pow;
			temp *= data[i - 1];
			temp %= q;
			
                	hash += q;
			hash -= temp;
			hash %= q;

			hash <<= 8;
			hash |=  data[i + window - 1];
			hash %=  q;

			if ( (end_of_chunk(hash) && length_of_chunk >= (1<<13)) || length_of_chunk >= (1<<16) ) {
                        	dump_hash(data + start, length_of_chunk);
				start += length_of_chunk;
			}
		}
		if ( start != length ) {
                	dump_hash(data + start, length - start);
		}
	}
	
	if ( munmap(data, sb.st_size) ) {
		close(fd);
		return -1;
	}

	if ( close(fd) ) {
		return -1;
	}

	return 0;
}

New Annotation

Summary:
Author:
Mode:
Body: