From c050ec4e570caf53be924bcbbe4194512b6a022c Mon Sep 17 00:00:00 2001 From: Marc Tiehuis Date: Wed, 21 Aug 2019 20:34:12 +1200 Subject: [PATCH 1/4] Update hash/crypto benchmark scripts --- .../{throughput_test.zig => benchmark.zig} | 19 +++-- std/crypto/blake2.zig | 4 +- std/crypto/sha2.zig | 4 +- .../{throughput_test.zig => benchmark.zig} | 70 +++++++++++++++++-- 4 files changed, 81 insertions(+), 16 deletions(-) rename std/crypto/{throughput_test.zig => benchmark.zig} (91%) rename std/hash/{throughput_test.zig => benchmark.zig} (61%) diff --git a/std/crypto/throughput_test.zig b/std/crypto/benchmark.zig similarity index 91% rename from std/crypto/throughput_test.zig rename to std/crypto/benchmark.zig index aee06571a0..ae5087a847 100644 --- a/std/crypto/throughput_test.zig +++ b/std/crypto/benchmark.zig @@ -1,8 +1,10 @@ +// zig run benchmark.zig --release-fast --override-std-dir .. + const builtin = @import("builtin"); -const std = @import("std"); +const std = @import("../std.zig"); const time = std.time; const Timer = time.Timer; -const crypto = @import("../crypto.zig"); +const crypto = std.crypto; const KiB = 1024; const MiB = 1024 * KiB; @@ -14,7 +16,7 @@ const Crypto = struct { name: []const u8, }; -const hashes = []Crypto{ +const hashes = [_]Crypto{ Crypto{ .ty = crypto.Md5, .name = "md5" }, Crypto{ .ty = crypto.Sha1, .name = "sha1" }, Crypto{ .ty = crypto.Sha256, .name = "sha256" }, @@ -45,7 +47,7 @@ pub fn benchmarkHash(comptime Hash: var, comptime bytes: comptime_int) !u64 { return throughput; } -const macs = []Crypto{ +const macs = [_]Crypto{ Crypto{ .ty = crypto.Poly1305, .name = "poly1305" }, Crypto{ .ty = crypto.HmacMd5, .name = "hmac-md5" }, Crypto{ .ty = crypto.HmacSha1, .name = "hmac-sha1" }, @@ -75,7 +77,7 @@ pub fn benchmarkMac(comptime Mac: var, comptime bytes: comptime_int) !u64 { return throughput; } -const exchanges = []Crypto{Crypto{ .ty = crypto.X25519, .name = "x25519" }}; +const exchanges = [_]Crypto{Crypto{ .ty = crypto.X25519, .name = "x25519" }}; pub fn benchmarkKeyExchange(comptime DhKeyExchange: var, comptime exchange_count: comptime_int) !u64 { std.debug.assert(DhKeyExchange.minimum_key_length >= DhKeyExchange.secret_length); @@ -135,13 +137,16 @@ pub fn main() !void { var buffer: [1024]u8 = undefined; var fixed = std.heap.FixedBufferAllocator.init(buffer[0..]); - const args = try std.os.argsAlloc(&fixed.allocator); + const args = try std.process.argsAlloc(&fixed.allocator); var filter: ?[]u8 = ""; var i: usize = 1; while (i < args.len) : (i += 1) { - if (std.mem.eql(u8, args[i], "--seed")) { + if (std.mem.eql(u8, args[i], "--mode")) { + try stdout.print("{}\n", builtin.mode); + return; + } else if (std.mem.eql(u8, args[i], "--seed")) { i += 1; if (i == args.len) { usage(); diff --git a/std/crypto/blake2.zig b/std/crypto/blake2.zig index 2a2236dfd7..6bb2764b92 100644 --- a/std/crypto/blake2.zig +++ b/std/crypto/blake2.zig @@ -269,8 +269,8 @@ pub const Blake2b512 = Blake2b(512); fn Blake2b(comptime out_len: usize) type { return struct { const Self = @This(); - const block_length = 128; - const digest_length = out_len / 8; + pub const block_length = 128; + pub const digest_length = out_len / 8; const iv = [8]u64{ 0x6a09e667f3bcc908, diff --git a/std/crypto/sha2.zig b/std/crypto/sha2.zig index c1f7bd228d..b40a39d579 100644 --- a/std/crypto/sha2.zig +++ b/std/crypto/sha2.zig @@ -420,8 +420,8 @@ pub const Sha512 = Sha2_64(Sha512Params); fn Sha2_64(comptime params: Sha2Params64) type { return struct { const Self = @This(); - const block_length = 128; - const digest_length = params.out_len / 8; + pub const block_length = 128; + pub const digest_length = params.out_len / 8; s: [8]u64, // Streaming Cache diff --git a/std/hash/throughput_test.zig b/std/hash/benchmark.zig similarity index 61% rename from std/hash/throughput_test.zig rename to std/hash/benchmark.zig index 4b7e8ef344..b42a34ef31 100644 --- a/std/hash/throughput_test.zig +++ b/std/hash/benchmark.zig @@ -1,3 +1,5 @@ +// zig run benchmark.zig --release-fast --override-std-dir .. + const builtin = @import("builtin"); const std = @import("std"); const time = std.time; @@ -32,6 +34,8 @@ const Result = struct { throughput: u64, }; +const block_size: usize = 8192; + pub fn benchmarkHash(comptime H: var, bytes: usize) !Result { var h = blk: { if (H.init_u8s) |init| { @@ -43,7 +47,7 @@ pub fn benchmarkHash(comptime H: var, bytes: usize) !Result { break :blk H.ty.init(); }; - var block: [8192]u8 = undefined; + var block: [block_size]u8 = undefined; prng.random.bytes(block[0..]); var offset: usize = 0; @@ -63,6 +67,43 @@ pub fn benchmarkHash(comptime H: var, bytes: usize) !Result { }; } +pub fn benchmarkHashSmallKeys(comptime H: var, key_size: usize, bytes: usize) !Result { + const key_count = bytes / key_size; + var block: [block_size]u8 = undefined; + prng.random.bytes(block[0..]); + + var i: usize = 0; + var timer = try Timer.start(); + const start = timer.lap(); + + var sum: u64 = 0; + while (i < key_count) : (i += 1) { + const o = i % (block_size - key_size); + const small_key = block[o .. key_size + o]; + + const result = blk: { + if (H.init_u8s) |init| { + break :blk H.ty.hash(init, small_key); + } + if (H.init_u64) |init| { + break :blk H.ty.hash(init, small_key); + } + break :blk H.ty.hash(small_key); + }; + + sum +%= result; + } + const end = timer.read(); + + const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s; + const throughput = @floatToInt(u64, @intToFloat(f64, bytes) / elapsed_s); + + return Result{ + .hash = sum, + .throughput = throughput, + }; +} + fn usage() void { std.debug.warn( \\throughput_test [options] @@ -100,10 +141,14 @@ pub fn main() !void { var filter: ?[]u8 = ""; var count: usize = mode(128 * MiB); + var key_size: usize = 32; var i: usize = 1; while (i < args.len) : (i += 1) { - if (std.mem.eql(u8, args[i], "--seed")) { + if (std.mem.eql(u8, args[i], "--mode")) { + try stdout.print("{}\n", builtin.mode); + return; + } else if (std.mem.eql(u8, args[i], "--seed")) { i += 1; if (i == args.len) { usage(); @@ -127,8 +172,20 @@ pub fn main() !void { std.os.exit(1); } - const c = try std.fmt.parseUnsigned(u32, args[i], 10); + const c = try std.fmt.parseUnsigned(usize, args[i], 10); count = c * MiB; + } else if (std.mem.eql(u8, args[i], "--key-size")) { + i += 1; + if (i == args.len) { + usage(); + std.os.exit(1); + } + + key_size = try std.fmt.parseUnsigned(usize, args[i], 10); + if (key_size > block_size) { + try stdout.print("key_size cannot exceed block size of {}\n", block_size); + std.os.exit(1); + } } else if (std.mem.eql(u8, args[i], "--help")) { usage(); return; @@ -141,8 +198,11 @@ pub fn main() !void { inline for (hashes) |H| { if (filter == null or std.mem.indexOf(u8, H.name, filter.?) != null) { const result = try benchmarkHash(H, count); - try printPad(stdout, H.name); - try stdout.print(": {:4} MiB/s [{:16}]\n", result.throughput / (1 * MiB), result.hash); + const result_small = try benchmarkHashSmallKeys(H, key_size, count); + + try stdout.print("{}\n", H.name); + try stdout.print(" iterative: {:4} MiB/s [{x:0<16}]\n", result.throughput / (1 * MiB), result.hash); + try stdout.print(" small keys: {:4} MiB/s [{x:0<16}]\n", result_small.throughput / (1 * MiB), result_small.hash); } } } From 48410943cbcbf53ae3c9895b2452218d6eacbc4b Mon Sep 17 00:00:00 2001 From: Marc Tiehuis Date: Wed, 21 Aug 2019 20:46:15 +1200 Subject: [PATCH 2/4] Add more hash functions to benchmark scripts Changed CRC api so the polynomial is specified as an enum for simpler construction. --- std/hash/benchmark.zig | 90 ++++++++++++++++++++++++++++++++++-------- std/hash/crc.zig | 26 ++++++------ 2 files changed, 86 insertions(+), 30 deletions(-) diff --git a/std/hash/benchmark.zig b/std/hash/benchmark.zig index b42a34ef31..ca948d21bf 100644 --- a/std/hash/benchmark.zig +++ b/std/hash/benchmark.zig @@ -15,6 +15,7 @@ var prng = std.rand.DefaultPrng.init(0); const Hash = struct { ty: type, name: []const u8, + has_iterative_api: bool = true, init_u8s: ?[]const u8 = null, init_u64: ?u64 = null, }; @@ -22,11 +23,62 @@ const Hash = struct { const siphash_key = "0123456789abcdef"; const hashes = [_]Hash{ - Hash{ .ty = hash.Wyhash, .name = "wyhash", .init_u64 = 0 }, - Hash{ .ty = hash.SipHash64(1, 3), .name = "siphash(1,3)", .init_u8s = siphash_key }, - Hash{ .ty = hash.SipHash64(2, 4), .name = "siphash(2,4)", .init_u8s = siphash_key }, - Hash{ .ty = hash.Fnv1a_64, .name = "fnv1a" }, - Hash{ .ty = hash.Crc32, .name = "crc32" }, + Hash{ + .ty = hash.Wyhash, + .name = "wyhash", + .init_u64 = 0, + }, + Hash{ + .ty = hash.SipHash64(1, 3), + .name = "siphash(1,3)", + .init_u8s = siphash_key, + }, + Hash{ + .ty = hash.SipHash64(2, 4), + .name = "siphash(2,4)", + .init_u8s = siphash_key, + }, + Hash{ + .ty = hash.Fnv1a_64, + .name = "fnv1a", + }, + Hash{ + .ty = hash.Adler32, + .name = "adler32", + }, + Hash{ + .ty = hash.crc.Crc32WithPoly(.IEEE), + .name = "crc32-slicing-by-8", + }, + Hash{ + .ty = hash.crc.Crc32SmallWithPoly(.IEEE), + .name = "crc32-half-byte-lookup", + }, + Hash{ + .ty = hash.CityHash32, + .name = "cityhash-32", + .has_iterative_api = false, + }, + Hash{ + .ty = hash.CityHash64, + .name = "cityhash-64", + .has_iterative_api = false, + }, + Hash{ + .ty = hash.Murmur2_32, + .name = "murmur2-32", + .has_iterative_api = false, + }, + Hash{ + .ty = hash.Murmur2_64, + .name = "murmur2-64", + .has_iterative_api = false, + }, + Hash{ + .ty = hash.Murmur3_32, + .name = "murmur3-32", + .has_iterative_api = false, + }, }; const Result = struct { @@ -78,10 +130,8 @@ pub fn benchmarkHashSmallKeys(comptime H: var, key_size: usize, bytes: usize) !R var sum: u64 = 0; while (i < key_count) : (i += 1) { - const o = i % (block_size - key_size); - const small_key = block[o .. key_size + o]; - - const result = blk: { + const small_key = block[0..key_size]; + sum +%= blk: { if (H.init_u8s) |init| { break :blk H.ty.hash(init, small_key); } @@ -90,8 +140,6 @@ pub fn benchmarkHashSmallKeys(comptime H: var, key_size: usize, bytes: usize) !R } break :blk H.ty.hash(small_key); }; - - sum +%= result; } const end = timer.read(); @@ -142,6 +190,7 @@ pub fn main() !void { var filter: ?[]u8 = ""; var count: usize = mode(128 * MiB); var key_size: usize = 32; + var seed: u32 = 0; var i: usize = 1; while (i < args.len) : (i += 1) { @@ -155,8 +204,8 @@ pub fn main() !void { std.os.exit(1); } - const seed = try std.fmt.parseUnsigned(u32, args[i], 10); - prng.seed(seed); + seed = try std.fmt.parseUnsigned(u32, args[i], 10); + // we seed later } else if (std.mem.eql(u8, args[i], "--filter")) { i += 1; if (i == args.len) { @@ -197,11 +246,18 @@ pub fn main() !void { inline for (hashes) |H| { if (filter == null or std.mem.indexOf(u8, H.name, filter.?) != null) { - const result = try benchmarkHash(H, count); - const result_small = try benchmarkHashSmallKeys(H, key_size, count); - try stdout.print("{}\n", H.name); - try stdout.print(" iterative: {:4} MiB/s [{x:0<16}]\n", result.throughput / (1 * MiB), result.hash); + + // Always reseed prior to every call so we are hashing the same buffer contents. + // This allows easier comparison between different implementations. + if (H.has_iterative_api) { + prng.seed(seed); + const result = try benchmarkHash(H, count); + try stdout.print(" iterative: {:4} MiB/s [{x:0<16}]\n", result.throughput / (1 * MiB), result.hash); + } + + prng.seed(seed); + const result_small = try benchmarkHashSmallKeys(H, key_size, count); try stdout.print(" small keys: {:4} MiB/s [{x:0<16}]\n", result_small.throughput / (1 * MiB), result_small.hash); } } diff --git a/std/hash/crc.zig b/std/hash/crc.zig index 53b4262c93..73e5bb0371 100644 --- a/std/hash/crc.zig +++ b/std/hash/crc.zig @@ -9,17 +9,17 @@ const std = @import("../std.zig"); const debug = std.debug; const testing = std.testing; -pub const Polynomial = struct { - const IEEE = 0xedb88320; - const Castagnoli = 0x82f63b78; - const Koopman = 0xeb31d82e; +pub const Polynomial = enum(u32) { + IEEE = 0xedb88320, + Castagnoli = 0x82f63b78, + Koopman = 0xeb31d82e, }; // IEEE is by far the most common CRC and so is aliased by default. -pub const Crc32 = Crc32WithPoly(Polynomial.IEEE); +pub const Crc32 = Crc32WithPoly(.IEEE); // slicing-by-8 crc32 implementation. -pub fn Crc32WithPoly(comptime poly: u32) type { +pub fn Crc32WithPoly(comptime poly: Polynomial) type { return struct { const Self = @This(); const lookup_tables = comptime block: { @@ -31,7 +31,7 @@ pub fn Crc32WithPoly(comptime poly: u32) type { var j: usize = 0; while (j < 8) : (j += 1) { if (crc & 1 == 1) { - crc = (crc >> 1) ^ poly; + crc = (crc >> 1) ^ @enumToInt(poly); } else { crc = (crc >> 1); } @@ -100,7 +100,7 @@ pub fn Crc32WithPoly(comptime poly: u32) type { } test "crc32 ieee" { - const Crc32Ieee = Crc32WithPoly(Polynomial.IEEE); + const Crc32Ieee = Crc32WithPoly(.IEEE); testing.expect(Crc32Ieee.hash("") == 0x00000000); testing.expect(Crc32Ieee.hash("a") == 0xe8b7be43); @@ -108,7 +108,7 @@ test "crc32 ieee" { } test "crc32 castagnoli" { - const Crc32Castagnoli = Crc32WithPoly(Polynomial.Castagnoli); + const Crc32Castagnoli = Crc32WithPoly(.Castagnoli); testing.expect(Crc32Castagnoli.hash("") == 0x00000000); testing.expect(Crc32Castagnoli.hash("a") == 0xc1d04330); @@ -116,7 +116,7 @@ test "crc32 castagnoli" { } // half-byte lookup table implementation. -pub fn Crc32SmallWithPoly(comptime poly: u32) type { +pub fn Crc32SmallWithPoly(comptime poly: Polynomial) type { return struct { const Self = @This(); const lookup_table = comptime block: { @@ -127,7 +127,7 @@ pub fn Crc32SmallWithPoly(comptime poly: u32) type { var j: usize = 0; while (j < 8) : (j += 1) { if (crc & 1 == 1) { - crc = (crc >> 1) ^ poly; + crc = (crc >> 1) ^ @enumToInt(poly); } else { crc = (crc >> 1); } @@ -164,7 +164,7 @@ pub fn Crc32SmallWithPoly(comptime poly: u32) type { } test "small crc32 ieee" { - const Crc32Ieee = Crc32SmallWithPoly(Polynomial.IEEE); + const Crc32Ieee = Crc32SmallWithPoly(.IEEE); testing.expect(Crc32Ieee.hash("") == 0x00000000); testing.expect(Crc32Ieee.hash("a") == 0xe8b7be43); @@ -172,7 +172,7 @@ test "small crc32 ieee" { } test "small crc32 castagnoli" { - const Crc32Castagnoli = Crc32SmallWithPoly(Polynomial.Castagnoli); + const Crc32Castagnoli = Crc32SmallWithPoly(.Castagnoli); testing.expect(Crc32Castagnoli.hash("") == 0x00000000); testing.expect(Crc32Castagnoli.hash("a") == 0xc1d04330); From 7854a52a6b8ba541e302c5466f61e0970e5d6062 Mon Sep 17 00:00:00 2001 From: Marc Tiehuis Date: Wed, 21 Aug 2019 21:02:24 +1200 Subject: [PATCH 3/4] Add iterative-only filter to hash benchmark --- std/hash/benchmark.zig | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/std/hash/benchmark.zig b/std/hash/benchmark.zig index ca948d21bf..f09d431fce 100644 --- a/std/hash/benchmark.zig +++ b/std/hash/benchmark.zig @@ -157,9 +157,11 @@ fn usage() void { \\throughput_test [options] \\ \\Options: - \\ --filter [test-name] - \\ --seed [int] - \\ --count [int] + \\ --filter [test-name] + \\ --seed [int] + \\ --count [int] + \\ --key-size [int] + \\ --iterative-only \\ --help \\ ); @@ -191,6 +193,7 @@ pub fn main() !void { var count: usize = mode(128 * MiB); var key_size: usize = 32; var seed: u32 = 0; + var test_iterative_only = false; var i: usize = 1; while (i < args.len) : (i += 1) { @@ -235,6 +238,8 @@ pub fn main() !void { try stdout.print("key_size cannot exceed block size of {}\n", block_size); std.os.exit(1); } + } else if (std.mem.eql(u8, args[i], "--iterative-only")) { + test_iterative_only = true; } else if (std.mem.eql(u8, args[i], "--help")) { usage(); return; @@ -246,19 +251,23 @@ pub fn main() !void { inline for (hashes) |H| { if (filter == null or std.mem.indexOf(u8, H.name, filter.?) != null) { - try stdout.print("{}\n", H.name); + if (!test_iterative_only or H.has_iterative_api) { + try stdout.print("{}\n", H.name); - // Always reseed prior to every call so we are hashing the same buffer contents. - // This allows easier comparison between different implementations. - if (H.has_iterative_api) { - prng.seed(seed); - const result = try benchmarkHash(H, count); - try stdout.print(" iterative: {:4} MiB/s [{x:0<16}]\n", result.throughput / (1 * MiB), result.hash); + // Always reseed prior to every call so we are hashing the same buffer contents. + // This allows easier comparison between different implementations. + if (H.has_iterative_api) { + prng.seed(seed); + const result = try benchmarkHash(H, count); + try stdout.print(" iterative: {:4} MiB/s [{x:0<16}]\n", result.throughput / (1 * MiB), result.hash); + } + + if (!test_iterative_only) { + prng.seed(seed); + const result_small = try benchmarkHashSmallKeys(H, key_size, count); + try stdout.print(" small keys: {:4} MiB/s [{x:0<16}]\n", result_small.throughput / (1 * MiB), result_small.hash); + } } - - prng.seed(seed); - const result_small = try benchmarkHashSmallKeys(H, key_size, count); - try stdout.print(" small keys: {:4} MiB/s [{x:0<16}]\n", result_small.throughput / (1 * MiB), result_small.hash); } } } From 16fa255f48ae2d290bc26ceb41489f2c3e21b96d Mon Sep 17 00:00:00 2001 From: Marc Tiehuis Date: Wed, 21 Aug 2019 21:34:42 +1200 Subject: [PATCH 4/4] Inline full slice hashing This gives moderate speed improvements when hashing small keys. The crc/adler/fnv inlining did not provide enough speed up to warrant the change. OLD: wyhash small keys: 2277 MiB/s [c14617a1e3800000] siphash(1,3) small keys: 937 MiB/s [b2919222ed400000] siphash(2,4) small keys: 722 MiB/s [3c3d974cc2800000] fnv1a small keys: 1580 MiB/s [70155e1cb7000000] adler32 small keys: 1898 MiB/s [00013883ef800000] crc32-slicing-by-8 small keys: 2323 MiB/s [0035bf3dcac00000] crc32-half-byte-lookup small keys: 218 MiB/s [0035bf3dcac00000] NEW: wyhash small keys: 2775 MiB/s [c14617a1e3800000] siphash(1,3) small keys: 1086 MiB/s [b2919222ed400000] siphash(2,4) small keys: 789 MiB/s [3c3d974cc2800000] fnv1a small keys: 1604 MiB/s [70155e1cb7000000] adler32 small keys: 1856 MiB/s [00013883ef800000] crc32-slicing-by-8 small keys: 2336 MiB/s [0035bf3dcac00000] crc32-half-byte-lookup small keys: 218 MiB/s [0035bf3dcac00000] --- std/hash/siphash.zig | 4 ++-- std/hash/wyhash.zig | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/std/hash/siphash.zig b/std/hash/siphash.zig index 8e83d67897..aa38c61863 100644 --- a/std/hash/siphash.zig +++ b/std/hash/siphash.zig @@ -152,8 +152,8 @@ fn SipHash(comptime T: type, comptime c_rounds: usize, comptime d_rounds: usize) pub fn hash(key: []const u8, input: []const u8) T { var c = Self.init(key); - c.update(input); - return c.final(); + @inlineCall(c.update, input); + return @inlineCall(c.final); } }; } diff --git a/std/hash/wyhash.zig b/std/hash/wyhash.zig index dfa5156cad..f5cca121f4 100644 --- a/std/hash/wyhash.zig +++ b/std/hash/wyhash.zig @@ -116,8 +116,8 @@ pub const Wyhash = struct { pub fn hash(seed: u64, input: []const u8) u64 { var c = Wyhash.init(seed); - c.update(input); - return c.final(); + @inlineCall(c.update, input); + return @inlineCall(c.final); } };