Paste: rolling checksum
Author: | crest |
Mode: | c |
Date: | Sun, 29 Jun 2014 23:12:57 |
Plain Text |
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <errno.h>
#include <sys/types.h>
#include <sha512.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
struct window {
uint64_t hash;
__uint128_t buffer;
size_t pos;
};
int chunk(const char *path);
int
main(int argc, char **argv)
{
(void)argc;
(void)argv;
if ( argc == 1 ) {
chunk("/dev/stdin");
}
for ( int i = 1; i < argc; i++ ) {
chunk(argv[i]);
}
return 0;
}
static const uint32_t q = 0xFFFFFFFBul;
static inline bool
end_of_chunk(uint32_t hash)
{
return (hash & ((1 << 16) - 1)) == 0;
}
static int
dump_hash(const uint8_t *restrict buffer, size_t length)
{
SHA512_CTX ctx;
uint8_t hash[64];
uint8_t hash_hex[2 * 64 + 1];
SHA512_Init(&ctx);
SHA512_Update(&ctx, buffer, length);
SHA512_Final(hash, &ctx);
for ( size_t i = 0; i < sizeof(hash); i++ ) {
uint8_t high = hash[i] >> 4;
uint8_t low = hash[i] & 0xf;
hash_hex[2 * i ] = high > 9 ? high - 10 + 'A' : high + '0';
hash_hex[2 * i + 1] = low > 9 ? low - 10 + 'A' : low + '0';
}
hash_hex[2*64] = '\0';
if ( puts((const char*)hash_hex) == EOF ) {
return -1;
}
return 0;
}
int
chunk(const char *path)
{
struct stat sb;
size_t start = 0;
size_t window = 48;
int fd = open(path, O_RDONLY);
if ( fd < 0 ) {
return -1;
}
if ( fstat(fd, &sb) ) {
close(fd);
return -1;
}
size_t length = sb.st_size;
uint8_t *data = mmap(NULL, length, PROT_READ, MAP_ALIGNED_SUPER|MAP_NOCORE|MAP_PREFAULT_READ, fd, 0);
if ( data == NULL ) {
close(fd);
return -1;
}
(void)madvise(data, length, MADV_SEQUENTIAL);
if ( length < window ) {
dump_hash(data, length);
} else {
uint64_t pow = 1;
uint64_t hash = 0;
uint64_t temp;
for ( size_t i = 1; i < window; i++ ) {
pow *= 256;
pow %= q;
}
for ( size_t i = 0; i < window; i++ ) {
hash <<= 8;
hash |= data[i];
hash %= q;
}
for ( size_t i = 1; i <= length - window; i++ ) {
size_t length_of_chunk = i + 1 - start;
temp = pow;
temp *= data[i - 1];
temp %= q;
hash += q;
hash -= temp;
hash %= q;
hash <<= 8;
hash |= data[i + window - 1];
hash %= q;
if ( (end_of_chunk(hash) && length_of_chunk >= (1<<13)) || length_of_chunk >= (1<<16) ) {
dump_hash(data + start, length_of_chunk);
start += length_of_chunk;
}
}
if ( start != length ) {
dump_hash(data + start, length - start);
}
}
if ( munmap(data, sb.st_size) ) {
close(fd);
return -1;
}
if ( close(fd) ) {
return -1;
}
return 0;
}
New Annotation