#include #include #include #include #include #include #include #include #include #include #include struct window { uint64_t hash; __uint128_t buffer; size_t pos; }; int chunk(const char *path); int main(int argc, char **argv) { (void)argc; (void)argv; if ( argc == 1 ) { chunk("/dev/stdin"); } for ( int i = 1; i < argc; i++ ) { chunk(argv[i]); } return 0; } static const uint32_t q = 0xFFFFFFFBul; /*static uint32_t init_hash(const uint8_t *restrict data, size_t length, uint32_t *restrict mult) { uint64_t hash = 0; uint32_t pow = 1; for ( size_t i = 0; i < length; i++ ) { pow = (pow * UINT8_MAX) % q; } for ( size_t i = 0; i < length; i++ ) { hash = ((hash << 8) + data[i]) % q; } *mult = _mult; return hash; } static uint32_t roll_hash(uint32_t hash, uint8_t *data, uint32_t mult, size_t i) { hash -= mult * data[i - 1024]; hash *= hconst; hash += data[i]; return hash; } */ static inline bool end_of_chunk(uint32_t hash) { return (hash & ((1 << 16) - 1)) == 0; } static int dump_hash(const uint8_t *restrict buffer, size_t length) { SHA512_CTX ctx; uint8_t hash[64]; uint8_t hash_hex[2 * 64 + 1]; SHA512_Init(&ctx); SHA512_Update(&ctx, buffer, length); SHA512_Final(hash, &ctx); for ( size_t i = 0; i < sizeof(hash); i++ ) { uint8_t high = hash[i] >> 4; uint8_t low = hash[i] & 0xf; hash_hex[2 * i ] = high > 9 ? high - 10 + 'A' : high + '0'; hash_hex[2 * i + 1] = low > 9 ? low - 10 + 'A' : low + '0'; } hash_hex[2*64] = '\0'; if ( puts((const char*)hash_hex) == EOF ) { return -1; } return 0; } int chunk(const char *path) { struct stat sb; size_t start = 0; size_t window = 48; int fd = open(path, O_RDONLY); if ( fd < 0 ) { return -1; } if ( fstat(fd, &sb) ) { close(fd); return -1; } size_t length = sb.st_size; uint8_t *data = mmap(NULL, length, PROT_READ, MAP_ALIGNED_SUPER|MAP_NOCORE|MAP_PREFAULT_READ, fd, 0); if ( data == NULL ) { close(fd); return -1; } (void)madvise(data, length, MADV_SEQUENTIAL); if ( length < window ) { dump_hash(data, length); } else { uint64_t pow = 1; uint64_t hash = 0; uint64_t temp; for ( size_t i = 1; i < window; i++ ) { pow *= 256; pow %= q; } for ( size_t i = 0; i < window; i++ ) { hash <<= 8; hash |= data[i]; hash %= q; } for ( size_t i = 1; i <= length - window; i++ ) { size_t length_of_chunk = i + 1 - start; temp = pow; temp *= data[i - 1]; temp %= q; hash += q; hash -= temp; hash %= q; hash <<= 8; hash |= data[i + window - 1]; hash %= q; if ( (end_of_chunk(hash) && length_of_chunk >= (1<<13)) || length_of_chunk >= (1<<16) ) { dump_hash(data + start, length_of_chunk); start += length_of_chunk; } } if ( start != length ) { dump_hash(data + start, length - start); } } if ( munmap(data, sb.st_size) ) { close(fd); return -1; } if ( close(fd) ) { return -1; } return 0; }