diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f4cecd8f6..16744eba51 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -498,7 +498,7 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/multi3.zig" "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/negXf2.zig" "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/os_version_check.zig" - "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/popcountdi2.zig" + "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/popcount.zig" "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/shift.zig" "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/stack_probe.zig" "${CMAKE_SOURCE_DIR}/lib/std/special/compiler_rt/truncXfYf2.zig" diff --git a/lib/std/special/compiler_rt.zig b/lib/std/special/compiler_rt.zig index ee32e49282..e3d2c5b7e5 100644 --- a/lib/std/special/compiler_rt.zig +++ b/lib/std/special/compiler_rt.zig @@ -177,7 +177,6 @@ comptime { .linkage = linkage, }); }, - else => {}, } @@ -334,8 +333,12 @@ comptime { const __udivmoddi4 = @import("compiler_rt/int.zig").__udivmoddi4; @export(__udivmoddi4, .{ .name = "__udivmoddi4", .linkage = linkage }); - const __popcountdi2 = @import("compiler_rt/popcountdi2.zig").__popcountdi2; + const __popcountsi2 = @import("compiler_rt/popcount.zig").__popcountsi2; + @export(__popcountsi2, .{ .name = "__popcountsi2", .linkage = linkage }); + const __popcountdi2 = @import("compiler_rt/popcount.zig").__popcountdi2; @export(__popcountdi2, .{ .name = "__popcountdi2", .linkage = linkage }); + const __popcountti2 = @import("compiler_rt/popcount.zig").__popcountti2; + @export(__popcountti2, .{ .name = "__popcountti2", .linkage = linkage }); if (is_darwin) { const __isPlatformVersionAtLeast = @import("compiler_rt/os_version_check.zig").__isPlatformVersionAtLeast; diff --git a/lib/std/special/compiler_rt/popcount.zig b/lib/std/special/compiler_rt/popcount.zig new file mode 100644 index 0000000000..2d4adec8c3 --- /dev/null +++ b/lib/std/special/compiler_rt/popcount.zig @@ -0,0 +1,66 @@ +const builtin = @import("builtin"); +const std = @import("std"); + +// popcount - population count +// counts the number of 1 bits + +// SWAR-Popcount: count bits of duos, aggregate to nibbles, and bytes inside +// x-bit register in parallel to sum up all bytes +// SWAR-Masks and factors can be defined as 2-adic fractions +// TAOCP: Combinational Algorithms, Bitwise Tricks And Techniques, +// subsubsection "Working with the rightmost bits" and "Sideways addition". +fn popcountXi2_generic(comptime T: type) fn (a: T) callconv(.C) i32 { + return struct { + fn f(a: T) callconv(.C) i32 { + @setRuntimeSafety(builtin.is_test); + + var x = switch (@bitSizeOf(T)) { + 32 => @bitCast(u32, a), + 64 => @bitCast(u64, a), + 128 => @bitCast(u128, a), + else => unreachable, + }; + const k1 = switch (@bitSizeOf(T)) { // -1/3 + 32 => @as(u32, 0x55555555), + 64 => @as(u64, 0x55555555_55555555), + 128 => @as(u128, 0x55555555_55555555_55555555_55555555), + else => unreachable, + }; + const k2 = switch (@bitSizeOf(T)) { // -1/5 + 32 => @as(u32, 0x33333333), + 64 => @as(u64, 0x33333333_33333333), + 128 => @as(u128, 0x33333333_33333333_33333333_33333333), + else => unreachable, + }; + const k4 = switch (@bitSizeOf(T)) { // -1/17 + 32 => @as(u32, 0x0f0f0f0f), + 64 => @as(u64, 0x0f0f0f0f_0f0f0f0f), + 128 => @as(u128, 0x0f0f0f0f_0f0f0f0f_0f0f0f0f_0f0f0f0f), + else => unreachable, + }; + const kf = switch (@bitSizeOf(T)) { // -1/255 + 32 => @as(u32, 0x01010101), + 64 => @as(u64, 0x01010101_01010101), + 128 => @as(u128, 0x01010101_01010101_01010101_01010101), + else => unreachable, + }; + x = x - ((x >> 1) & k1); // aggregate duos + x = (x & k2) + ((x >> 2) & k2); // aggregate nibbles + x = (x + (x >> 4)) & k4; // aggregate bytes + x = (x *% kf) >> @bitSizeOf(T) - 8; // 8 most significant bits of x + (x<<8) + (x<<16) + .. + return @intCast(i32, x); + } + }.f; +} + +pub const __popcountsi2 = popcountXi2_generic(i32); + +pub const __popcountdi2 = popcountXi2_generic(i64); + +pub const __popcountti2 = popcountXi2_generic(i128); + +test { + _ = @import("popcountsi2_test.zig"); + _ = @import("popcountdi2_test.zig"); + _ = @import("popcountti2_test.zig"); +} diff --git a/lib/std/special/compiler_rt/popcountdi2.zig b/lib/std/special/compiler_rt/popcountdi2.zig deleted file mode 100644 index 9835e60fbc..0000000000 --- a/lib/std/special/compiler_rt/popcountdi2.zig +++ /dev/null @@ -1,24 +0,0 @@ -const builtin = @import("builtin"); -const compiler_rt = @import("../compiler_rt.zig"); - -// ported from llvm compiler-rt 8.0.0rc3 95e1c294cb0415a377a7b1d6c7c7d4f89e1c04e4 -pub fn __popcountdi2(a: i64) callconv(.C) i32 { - var x2 = @bitCast(u64, a); - x2 = x2 - ((x2 >> 1) & 0x5555555555555555); - // Every 2 bits holds the sum of every pair of bits (32) - x2 = ((x2 >> 2) & 0x3333333333333333) + (x2 & 0x3333333333333333); - // Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (16) - x2 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0F; - // Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (8) - var x: u32 = @truncate(u32, x2 + (x2 >> 32)); - // The lower 32 bits hold four 16 bit sums (5 significant bits). - // Upper 32 bits are garbage */ - x = x + (x >> 16); - // The lower 16 bits hold two 32 bit sums (6 significant bits). - // Upper 16 bits are garbage */ - return @bitCast(i32, (x + (x >> 8)) & 0x0000007F); // (7 significant bits) -} - -test { - _ = @import("popcountdi2_test.zig"); -} diff --git a/lib/std/special/compiler_rt/popcountdi2_test.zig b/lib/std/special/compiler_rt/popcountdi2_test.zig index 736d04dac1..e20693987a 100644 --- a/lib/std/special/compiler_rt/popcountdi2_test.zig +++ b/lib/std/special/compiler_rt/popcountdi2_test.zig @@ -1,27 +1,34 @@ -const __popcountdi2 = @import("popcountdi2.zig").__popcountdi2; +const popcount = @import("popcount.zig"); const testing = @import("std").testing; -fn naive_popcount(a_param: i64) i32 { - var a = a_param; +fn popcountdi2Naive(a: i64) i32 { + var x = a; var r: i32 = 0; - while (a != 0) : (a = @bitCast(i64, @bitCast(u64, a) >> 1)) { - r += @intCast(i32, a & 1); + while (x != 0) : (x = @bitCast(i64, @bitCast(u64, x) >> 1)) { + r += @intCast(i32, x & 1); } return r; } fn test__popcountdi2(a: i64) !void { - const x = __popcountdi2(a); - const expected = naive_popcount(a); - try testing.expect(expected == x); + const x = popcount.__popcountdi2(a); + const expected = popcountdi2Naive(a); + try testing.expectEqual(expected, x); } test "popcountdi2" { try test__popcountdi2(0); try test__popcountdi2(1); try test__popcountdi2(2); - try test__popcountdi2(@bitCast(i64, @as(u64, 0xFFFFFFFFFFFFFFFD))); - try test__popcountdi2(@bitCast(i64, @as(u64, 0xFFFFFFFFFFFFFFFE))); - try test__popcountdi2(@bitCast(i64, @as(u64, 0xFFFFFFFFFFFFFFFF))); - // TODO some fuzz testing + try test__popcountdi2(@bitCast(i64, @as(u64, 0xffffffff_fffffffd))); + try test__popcountdi2(@bitCast(i64, @as(u64, 0xffffffff_fffffffe))); + try test__popcountdi2(@bitCast(i64, @as(u64, 0xffffffff_ffffffff))); + + const RndGen = @import("std").rand.DefaultPrng; + var rnd = RndGen.init(42); + var i: u32 = 0; + while (i < 10_000) : (i += 1) { + var rand_num = rnd.random().int(i64); + try test__popcountdi2(rand_num); + } } diff --git a/lib/std/special/compiler_rt/popcountsi2_test.zig b/lib/std/special/compiler_rt/popcountsi2_test.zig new file mode 100644 index 0000000000..c0c92e396e --- /dev/null +++ b/lib/std/special/compiler_rt/popcountsi2_test.zig @@ -0,0 +1,34 @@ +const popcount = @import("popcount.zig"); +const testing = @import("std").testing; + +fn popcountsi2Naive(a: i32) i32 { + var x = a; + var r: i32 = 0; + while (x != 0) : (x = @bitCast(i32, @bitCast(u32, x) >> 1)) { + r += @intCast(i32, x & 1); + } + return r; +} + +fn test__popcountsi2(a: i32) !void { + const x = popcount.__popcountsi2(a); + const expected = popcountsi2Naive(a); + try testing.expectEqual(expected, x); +} + +test "popcountsi2" { + try test__popcountsi2(0); + try test__popcountsi2(1); + try test__popcountsi2(2); + try test__popcountsi2(@bitCast(i32, @as(u32, 0xfffffffd))); + try test__popcountsi2(@bitCast(i32, @as(u32, 0xfffffffe))); + try test__popcountsi2(@bitCast(i32, @as(u32, 0xffffffff))); + + const RndGen = @import("std").rand.DefaultPrng; + var rnd = RndGen.init(42); + var i: u32 = 0; + while (i < 10_000) : (i += 1) { + var rand_num = rnd.random().int(i32); + try test__popcountsi2(rand_num); + } +} diff --git a/lib/std/special/compiler_rt/popcountti2_test.zig b/lib/std/special/compiler_rt/popcountti2_test.zig new file mode 100644 index 0000000000..83f2a18e7d --- /dev/null +++ b/lib/std/special/compiler_rt/popcountti2_test.zig @@ -0,0 +1,34 @@ +const popcount = @import("popcount.zig"); +const testing = @import("std").testing; + +fn popcountti2Naive(a: i128) i32 { + var x = a; + var r: i32 = 0; + while (x != 0) : (x = @bitCast(i128, @bitCast(u128, x) >> 1)) { + r += @intCast(i32, x & 1); + } + return r; +} + +fn test__popcountti2(a: i128) !void { + const x = popcount.__popcountti2(a); + const expected = popcountti2Naive(a); + try testing.expectEqual(expected, x); +} + +test "popcountti2" { + try test__popcountti2(0); + try test__popcountti2(1); + try test__popcountti2(2); + try test__popcountti2(@bitCast(i128, @as(u128, 0xffffffff_ffffffff_ffffffff_fffffffd))); + try test__popcountti2(@bitCast(i128, @as(u128, 0xffffffff_ffffffff_ffffffff_fffffffe))); + try test__popcountti2(@bitCast(i128, @as(u128, 0xffffffff_ffffffff_ffffffff_ffffffff))); + + const RndGen = @import("std").rand.DefaultPrng; + var rnd = RndGen.init(42); + var i: u32 = 0; + while (i < 10_000) : (i += 1) { + var rand_num = rnd.random().int(i128); + try test__popcountti2(rand_num); + } +}