mirror of
https://github.com/ziglang/zig.git
synced 2025-12-11 00:33:08 +00:00
- apply simpler approach than LLVM for __popcountdi2
taken from The Art of Computer Programming and generalized
- rename popcountdi2.zig to popcount.zig
- test cases derived from popcountdi2_test.zig
- tests: compare naive approach 10_000 times with
random numbers created from naive seed 42
See #1290
67 lines
2.5 KiB
Zig
67 lines
2.5 KiB
Zig
const builtin = @import("builtin");
|
|
const std = @import("std");
|
|
|
|
// popcount - population count
|
|
// counts the number of 1 bits
|
|
|
|
// SWAR-Popcount: count bits of duos, aggregate to nibbles, and bytes inside
|
|
// x-bit register in parallel to sum up all bytes
|
|
// SWAR-Masks and factors can be defined as 2-adic fractions
|
|
// TAOCP: Combinational Algorithms, Bitwise Tricks And Techniques,
|
|
// subsubsection "Working with the rightmost bits" and "Sideways addition".
|
|
fn popcountXi2_generic(comptime T: type) fn (a: T) callconv(.C) i32 {
|
|
return struct {
|
|
fn f(a: T) callconv(.C) i32 {
|
|
@setRuntimeSafety(builtin.is_test);
|
|
|
|
var x = switch (@bitSizeOf(T)) {
|
|
32 => @bitCast(u32, a),
|
|
64 => @bitCast(u64, a),
|
|
128 => @bitCast(u128, a),
|
|
else => unreachable,
|
|
};
|
|
const k1 = switch (@bitSizeOf(T)) { // -1/3
|
|
32 => @as(u32, 0x55555555),
|
|
64 => @as(u64, 0x55555555_55555555),
|
|
128 => @as(u128, 0x55555555_55555555_55555555_55555555),
|
|
else => unreachable,
|
|
};
|
|
const k2 = switch (@bitSizeOf(T)) { // -1/5
|
|
32 => @as(u32, 0x33333333),
|
|
64 => @as(u64, 0x33333333_33333333),
|
|
128 => @as(u128, 0x33333333_33333333_33333333_33333333),
|
|
else => unreachable,
|
|
};
|
|
const k4 = switch (@bitSizeOf(T)) { // -1/17
|
|
32 => @as(u32, 0x0f0f0f0f),
|
|
64 => @as(u64, 0x0f0f0f0f_0f0f0f0f),
|
|
128 => @as(u128, 0x0f0f0f0f_0f0f0f0f_0f0f0f0f_0f0f0f0f),
|
|
else => unreachable,
|
|
};
|
|
const kf = switch (@bitSizeOf(T)) { // -1/255
|
|
32 => @as(u32, 0x01010101),
|
|
64 => @as(u64, 0x01010101_01010101),
|
|
128 => @as(u128, 0x01010101_01010101_01010101_01010101),
|
|
else => unreachable,
|
|
};
|
|
x = x - ((x >> 1) & k1); // aggregate duos
|
|
x = (x & k2) + ((x >> 2) & k2); // aggregate nibbles
|
|
x = (x + (x >> 4)) & k4; // aggregate bytes
|
|
x = (x *% kf) >> @bitSizeOf(T) - 8; // 8 most significant bits of x + (x<<8) + (x<<16) + ..
|
|
return @intCast(i32, x);
|
|
}
|
|
}.f;
|
|
}
|
|
|
|
pub const __popcountsi2 = popcountXi2_generic(i32);
|
|
|
|
pub const __popcountdi2 = popcountXi2_generic(i64);
|
|
|
|
pub const __popcountti2 = popcountXi2_generic(i128);
|
|
|
|
test {
|
|
_ = @import("popcountsi2_test.zig");
|
|
_ = @import("popcountdi2_test.zig");
|
|
_ = @import("popcountti2_test.zig");
|
|
}
|