simplify std.hash.Adler32

This commit is contained in:
Andrew Kelley 2025-07-27 14:10:55 -07:00
parent a4f05a4588
commit 73c98ca0e6
4 changed files with 120 additions and 138 deletions

View File

@ -1,5 +1,4 @@
const adler = @import("hash/adler.zig");
pub const Adler32 = adler.Adler32;
pub const Adler32 = @import("hash/Adler32.zig");
const auto_hash = @import("hash/auto_hash.zig");
pub const autoHash = auto_hash.autoHash;
@ -116,7 +115,7 @@ test int {
}
test {
_ = adler;
_ = Adler32;
_ = auto_hash;
_ = crc;
_ = fnv;

117
lib/std/hash/Adler32.zig Normal file
View File

@ -0,0 +1,117 @@
//! https://tools.ietf.org/html/rfc1950#section-9
//! https://github.com/madler/zlib/blob/master/adler32.c
const Adler32 = @This();
const std = @import("std");
const testing = std.testing;
adler: u32 = 1,
pub fn permute(state: u32, input: []const u8) u32 {
const base = 65521;
const nmax = 5552;
var s1 = state & 0xffff;
var s2 = (state >> 16) & 0xffff;
if (input.len == 1) {
s1 +%= input[0];
if (s1 >= base) {
s1 -= base;
}
s2 +%= s1;
if (s2 >= base) {
s2 -= base;
}
} else if (input.len < 16) {
for (input) |b| {
s1 +%= b;
s2 +%= s1;
}
if (s1 >= base) {
s1 -= base;
}
s2 %= base;
} else {
const n = nmax / 16; // note: 16 | nmax
var i: usize = 0;
while (i + nmax <= input.len) {
var rounds: usize = 0;
while (rounds < n) : (rounds += 1) {
comptime var j: usize = 0;
inline while (j < 16) : (j += 1) {
s1 +%= input[i + j];
s2 +%= s1;
}
i += 16;
}
s1 %= base;
s2 %= base;
}
if (i < input.len) {
while (i + 16 <= input.len) : (i += 16) {
comptime var j: usize = 0;
inline while (j < 16) : (j += 1) {
s1 +%= input[i + j];
s2 +%= s1;
}
}
while (i < input.len) : (i += 1) {
s1 +%= input[i];
s2 +%= s1;
}
s1 %= base;
s2 %= base;
}
}
return s1 | (s2 << 16);
}
pub fn update(a: *Adler32, input: []const u8) void {
a.adler = permute(a.adler, input);
}
pub fn hash(input: []const u8) u32 {
return permute(1, input);
}
test "sanity" {
try testing.expectEqual(@as(u32, 0x620062), hash("a"));
try testing.expectEqual(@as(u32, 0xbc002ed), hash("example"));
}
test "long" {
const long1 = [_]u8{1} ** 1024;
try testing.expectEqual(@as(u32, 0x06780401), hash(long1[0..]));
const long2 = [_]u8{1} ** 1025;
try testing.expectEqual(@as(u32, 0x0a7a0402), hash(long2[0..]));
}
test "very long" {
const long = [_]u8{1} ** 5553;
try testing.expectEqual(@as(u32, 0x707f15b2), hash(long[0..]));
}
test "very long with variation" {
const long = comptime blk: {
@setEvalBranchQuota(7000);
var result: [6000]u8 = undefined;
var i: usize = 0;
while (i < result.len) : (i += 1) {
result[i] = @as(u8, @truncate(i));
}
break :blk result;
};
try testing.expectEqual(@as(u32, 0x5af38d6e), hash(long[0..]));
}

View File

@ -1,134 +0,0 @@
// Adler32 checksum.
//
// https://tools.ietf.org/html/rfc1950#section-9
// https://github.com/madler/zlib/blob/master/adler32.c
const std = @import("std");
const testing = std.testing;
pub const Adler32 = struct {
const base = 65521;
const nmax = 5552;
adler: u32,
pub fn init() Adler32 {
return Adler32{ .adler = 1 };
}
// This fast variant is taken from zlib. It reduces the required modulos and unrolls longer
// buffer inputs and should be much quicker.
pub fn update(self: *Adler32, input: []const u8) void {
var s1 = self.adler & 0xffff;
var s2 = (self.adler >> 16) & 0xffff;
if (input.len == 1) {
s1 +%= input[0];
if (s1 >= base) {
s1 -= base;
}
s2 +%= s1;
if (s2 >= base) {
s2 -= base;
}
} else if (input.len < 16) {
for (input) |b| {
s1 +%= b;
s2 +%= s1;
}
if (s1 >= base) {
s1 -= base;
}
s2 %= base;
} else {
const n = nmax / 16; // note: 16 | nmax
var i: usize = 0;
while (i + nmax <= input.len) {
var rounds: usize = 0;
while (rounds < n) : (rounds += 1) {
comptime var j: usize = 0;
inline while (j < 16) : (j += 1) {
s1 +%= input[i + j];
s2 +%= s1;
}
i += 16;
}
s1 %= base;
s2 %= base;
}
if (i < input.len) {
while (i + 16 <= input.len) : (i += 16) {
comptime var j: usize = 0;
inline while (j < 16) : (j += 1) {
s1 +%= input[i + j];
s2 +%= s1;
}
}
while (i < input.len) : (i += 1) {
s1 +%= input[i];
s2 +%= s1;
}
s1 %= base;
s2 %= base;
}
}
self.adler = s1 | (s2 << 16);
}
pub fn final(self: *Adler32) u32 {
return self.adler;
}
pub fn hash(input: []const u8) u32 {
var c = Adler32.init();
c.update(input);
return c.final();
}
};
test "adler32 sanity" {
try testing.expectEqual(@as(u32, 0x620062), Adler32.hash("a"));
try testing.expectEqual(@as(u32, 0xbc002ed), Adler32.hash("example"));
}
test "adler32 long" {
const long1 = [_]u8{1} ** 1024;
try testing.expectEqual(@as(u32, 0x06780401), Adler32.hash(long1[0..]));
const long2 = [_]u8{1} ** 1025;
try testing.expectEqual(@as(u32, 0x0a7a0402), Adler32.hash(long2[0..]));
}
test "adler32 very long" {
const long = [_]u8{1} ** 5553;
try testing.expectEqual(@as(u32, 0x707f15b2), Adler32.hash(long[0..]));
}
test "adler32 very long with variation" {
const long = comptime blk: {
@setEvalBranchQuota(7000);
var result: [6000]u8 = undefined;
var i: usize = 0;
while (i < result.len) : (i += 1) {
result[i] = @as(u8, @truncate(i));
}
break :blk result;
};
try testing.expectEqual(@as(u32, 0x5af38d6e), std.hash.Adler32.hash(long[0..]));
}
const verify = @import("verify.zig");
test "adler32 iterative" {
try verify.iterativeApi(Adler32);
}

View File

@ -45,7 +45,7 @@ pub fn smhasher(comptime hash_fn: anytype) u32 {
pub fn iterativeApi(comptime Hash: anytype) !void {
// Sum(1..32) = 528
var buf: [528]u8 = [_]u8{0} ** 528;
var buf: [528]u8 = @splat(0);
var len: usize = 0;
const seed = 0;