zig/lib/compiler_rt/count0bits.zig
Andrew Kelley ec95e00e28 flatten lib/std/special and improve "pkg inside another" logic
stage2: change logic for detecting whether the main package is inside
the std package. Previously it relied on realpath() which is not portable.
This uses resolve() which is how imports already work.

 * stage2: fix cleanup bug when creating Module
 * flatten lib/std/special/* to lib/*
   - this was motivated by making main_pkg_is_inside_std false for
     compiler_rt & friends.
 * rename "mini libc" to "universal libc"
2022-05-06 22:41:00 -07:00

245 lines
6.4 KiB
Zig

const std = @import("std");
const builtin = @import("builtin");
// clz - count leading zeroes
// - clzXi2 for unoptimized little and big endian
// - __clzsi2_thumb1: assume a != 0
// - __clzsi2_arm32: assume a != 0
// ctz - count trailing zeroes
// - ctzXi2 for unoptimized little and big endian
// ffs - find first set
// * ffs = (a == 0) => 0, (a != 0) => ctz + 1
// * dont pay for `if (x == 0) return shift;` inside ctz
// - ffsXi2 for unoptimized little and big endian
inline fn clzXi2(comptime T: type, a: T) i32 {
@setRuntimeSafety(builtin.is_test);
var x = switch (@bitSizeOf(T)) {
32 => @bitCast(u32, a),
64 => @bitCast(u64, a),
128 => @bitCast(u128, a),
else => unreachable,
};
var n: T = @bitSizeOf(T);
// Count first bit set using binary search, from Hacker's Delight
var y: @TypeOf(x) = 0;
comptime var shift: u8 = @bitSizeOf(T);
inline while (shift > 0) {
shift = shift >> 1;
y = x >> shift;
if (y != 0) {
n = n - shift;
x = y;
}
}
return @intCast(i32, n - @bitCast(T, x));
}
fn __clzsi2_thumb1() callconv(.Naked) void {
@setRuntimeSafety(false);
// Similar to the generic version with the last two rounds replaced by a LUT
asm volatile (
\\ movs r1, #32
\\ lsrs r2, r0, #16
\\ beq 1f
\\ subs r1, #16
\\ movs r0, r2
\\ 1:
\\ lsrs r2, r0, #8
\\ beq 1f
\\ subs r1, #8
\\ movs r0, r2
\\ 1:
\\ lsrs r2, r0, #4
\\ beq 1f
\\ subs r1, #4
\\ movs r0, r2
\\ 1:
\\ ldr r3, =LUT
\\ ldrb r0, [r3, r0]
\\ subs r0, r1, r0
\\ bx lr
\\ .p2align 2
\\ // Number of bits set in the 0-15 range
\\ LUT:
\\ .byte 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4
);
unreachable;
}
fn __clzsi2_arm32() callconv(.Naked) void {
@setRuntimeSafety(false);
asm volatile (
\\ // Assumption: n != 0
\\ // r0: n
\\ // r1: count of leading zeros in n + 1
\\ // r2: scratch register for shifted r0
\\ mov r1, #1
\\
\\ // Basic block:
\\ // if ((r0 >> SHIFT) == 0)
\\ // r1 += SHIFT;
\\ // else
\\ // r0 >>= SHIFT;
\\ // for descending powers of two as SHIFT.
\\ lsrs r2, r0, #16
\\ movne r0, r2
\\ addeq r1, #16
\\
\\ lsrs r2, r0, #8
\\ movne r0, r2
\\ addeq r1, #8
\\
\\ lsrs r2, r0, #4
\\ movne r0, r2
\\ addeq r1, #4
\\
\\ lsrs r2, r0, #2
\\ movne r0, r2
\\ addeq r1, #2
\\
\\ // The basic block invariants at this point are (r0 >> 2) == 0 and
\\ // r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1.
\\ //
\\ // r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)f
\\ // ---+----------------+----------------+------------+--------------
\\ // 1 | 1 | 0 | 0 | 1
\\ // 2 | 0 | 1 | -1 | 0
\\ // 3 | 0 | 1 | -1 | 0
\\ //
\\ // The r1's initial value of 1 compensates for the 1 here.
\\ sub r0, r1, r0, lsr #1
\\ bx lr
);
unreachable;
}
fn clzsi2_generic(a: i32) callconv(.C) i32 {
return clzXi2(i32, a);
}
pub const __clzsi2 = switch (builtin.cpu.arch) {
.arm, .armeb, .thumb, .thumbeb => impl: {
const use_thumb1 =
(builtin.cpu.arch.isThumb() or
std.Target.arm.featureSetHas(builtin.cpu.features, .noarm)) and
!std.Target.arm.featureSetHas(builtin.cpu.features, .thumb2);
if (use_thumb1) {
break :impl __clzsi2_thumb1;
}
// From here on we're either targeting Thumb2 or ARM.
else if (!builtin.cpu.arch.isThumb()) {
break :impl __clzsi2_arm32;
}
// Use the generic implementation otherwise.
else break :impl clzsi2_generic;
},
else => clzsi2_generic,
};
pub fn __clzdi2(a: i64) callconv(.C) i32 {
return clzXi2(i64, a);
}
pub fn __clzti2(a: i128) callconv(.C) i32 {
return clzXi2(i128, a);
}
inline fn ctzXi2(comptime T: type, a: T) i32 {
@setRuntimeSafety(builtin.is_test);
var x = switch (@bitSizeOf(T)) {
32 => @bitCast(u32, a),
64 => @bitCast(u64, a),
128 => @bitCast(u128, a),
else => unreachable,
};
var n: T = 1;
// Number of trailing zeroes as binary search, from Hacker's Delight
var mask: @TypeOf(x) = std.math.maxInt(@TypeOf(x));
comptime var shift = @bitSizeOf(T);
if (x == 0) return shift;
inline while (shift > 1) {
shift = shift >> 1;
mask = mask >> shift;
if ((x & mask) == 0) {
n = n + shift;
x = x >> shift;
}
}
return @intCast(i32, n - @bitCast(T, (x & 1)));
}
pub fn __ctzsi2(a: i32) callconv(.C) i32 {
return ctzXi2(i32, a);
}
pub fn __ctzdi2(a: i64) callconv(.C) i32 {
return ctzXi2(i64, a);
}
pub fn __ctzti2(a: i128) callconv(.C) i32 {
return ctzXi2(i128, a);
}
inline fn ffsXi2(comptime T: type, a: T) i32 {
@setRuntimeSafety(builtin.is_test);
var x = switch (@bitSizeOf(T)) {
32 => @bitCast(u32, a),
64 => @bitCast(u64, a),
128 => @bitCast(u128, a),
else => unreachable,
};
var n: T = 1;
// adapted from Number of trailing zeroes (see ctzXi2)
var mask: @TypeOf(x) = std.math.maxInt(@TypeOf(x));
comptime var shift = @bitSizeOf(T);
// In contrast to ctz return 0
if (x == 0) return 0;
inline while (shift > 1) {
shift = shift >> 1;
mask = mask >> shift;
if ((x & mask) == 0) {
n = n + shift;
x = x >> shift;
}
}
// return ctz + 1
return @intCast(i32, n - @bitCast(T, (x & 1))) + @as(i32, 1);
}
pub fn __ffssi2(a: i32) callconv(.C) i32 {
return ffsXi2(i32, a);
}
pub fn __ffsdi2(a: i64) callconv(.C) i32 {
return ffsXi2(i64, a);
}
pub fn __ffsti2(a: i128) callconv(.C) i32 {
return ffsXi2(i128, a);
}
test {
_ = @import("clzsi2_test.zig");
_ = @import("clzdi2_test.zig");
_ = @import("clzti2_test.zig");
_ = @import("ctzsi2_test.zig");
_ = @import("ctzdi2_test.zig");
_ = @import("ctzti2_test.zig");
_ = @import("ffssi2_test.zig");
_ = @import("ffsdi2_test.zig");
_ = @import("ffsti2_test.zig");
}