CBE: split {clz,ctz,mod,popCount,byteSwap,bitReverse} by type

This also surfaces the fact that clz, ctz and popCount didn't actually support 128 bit integers, despite what was claimed by 226fcd7c709ec664c5d883042cf7beb3026f66cb. This was partially hidden by the fact that the test code for popCount only exercised 128 bit integers in a comptime context. This commit duplicates that test case for runtime ints too.
2026-01-20 22:35:24 +00:00 · 2022-03-14 14:10:52 +00:00 · 2022-03-14 14:10:52 +00:00 · 8643591c9a
commit 8643591c9a
parent d912699e08
4 changed files with 245 additions and 101 deletions
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@ -1709,11 +1709,11 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO
            .memcpy           => try airMemcpy(f, inst),
            .set_union_tag    => try airSetUnionTag(f, inst),
            .get_union_tag    => try airGetUnionTag(f, inst),
-            .clz              => try airBuiltinCall(f, inst, "clz", .{}),
-            .ctz              => try airBuiltinCall(f, inst, "ctz", .{}),
-            .popcount         => try airBuiltinCall(f, inst, "popcount", .{}),
-            .byte_swap        => try airBuiltinCall(f, inst, "byte_swap", .{ .needs_signedness_info = true }),
-            .bit_reverse      => try airBuiltinCall(f, inst, "bit_reverse", .{ .needs_signedness_info = true }),
+            .clz              => try airBuiltinCall(f, inst, "clz"),
+            .ctz              => try airBuiltinCall(f, inst, "ctz"),
+            .popcount         => try airBuiltinCall(f, inst, "popcount"),
+            .byte_swap        => try airBuiltinCall(f, inst, "byte_swap"),
+            .bit_reverse      => try airBuiltinCall(f, inst, "bit_reverse"),
            .tag_name         => try airTagName(f, inst),
            .error_name       => try airErrorName(f, inst),
            .splat            => try airSplat(f, inst),
@ -3351,7 +3351,7 @@ fn airPtrToInt(f: *Function, inst: Air.Inst.Index) !CValue {
    return local;
 }

-fn airBuiltinCall(f: *Function, inst: Air.Inst.Index, fn_name: [*:0]const u8, options: struct { needs_signedness_info: bool = false }) !CValue {
+fn airBuiltinCall(f: *Function, inst: Air.Inst.Index, fn_name: [*:0]const u8) !CValue {
    if (f.liveness.isUnused(inst)) return CValue.none;

    const inst_ty = f.air.typeOfIndex(inst);
@ -3362,20 +3362,17 @@ fn airBuiltinCall(f: *Function, inst: Air.Inst.Index, fn_name: [*:0]const u8, op
    const writer = f.object.writer();

    const int_info = operand_ty.intInfo(target);
-    _ = toCIntBits(int_info.bits) orelse
+    const c_bits = toCIntBits(int_info.bits) orelse
        return f.fail("TODO: C backend: implement integer types larger than 128 bits", .{});

-    try writer.print(" = zig_{s}(", .{fn_name});
+    try writer.print(" = zig_{s}_", .{fn_name});
+    const prefix_byte: u8 = switch (int_info.signedness) {
+        .signed => 'i',
+        .unsigned => 'u',
+    };
+    try writer.print("{c}{d}(", .{ prefix_byte, c_bits });
    try f.writeCValue(writer, try f.resolveInst(operand));
-    try writer.print(", {d}", .{int_info.bits});
-    if (options.needs_signedness_info) {
-        const signed_type = switch (int_info.signedness) {
-            .signed => "true",
-            .unsigned => "false",
-        };
-        try writer.print(", {s}", .{signed_type});
-    }
-    try writer.writeAll(");\n");
+    try writer.print(", {d});\n", .{int_info.bits});
    return local;
 }

--- a/src/link/C/zig.h
+++ b/src/link/C/zig.h
@ -503,104 +503,248 @@ zig_shl_sat_s(long,      long, ((sizeof(long    )) * CHAR_BIT - 1))

 #define zig_bitsizeof(T) (CHAR_BIT * sizeof(T))
 #define zig_bit_mask(T, bit_width) \
-    ((bit_width) == 128 \
-     ? ((T)(((uint128_t) 0xffffffffffffffff) << 64 | \
-             (uint128_t) 0xffffffffffffffff)) \
+    ((bit_width) == zig_bitsizeof(T) \
+     ? ((T)-1) \
     : (((T)1 << (T)(bit_width)) - 1))

-static inline uint128_t zig_sign_extend(uint128_t value, uint128_t zig_type_bit_width) {
-    const uint128_t m = (uint128_t)1 << (zig_type_bit_width - 1);
-    return (value ^ m) - m;
-}
-
-static inline int zig_clz(unsigned long long value, uint8_t zig_type_bit_width) {
+static inline int zig_clz(unsigned int value, uint8_t zig_type_bit_width) {
    if (value == 0) return zig_type_bit_width;
-    if (zig_type_bit_width <= zig_bitsizeof(unsigned int))
-        return (__builtin_clz(value) - zig_bitsizeof(unsigned int) + zig_type_bit_width);
-    if (zig_type_bit_width <= zig_bitsizeof(unsigned long))
-        return (__builtin_clzl(value) - zig_bitsizeof(unsigned long) + zig_type_bit_width);
-    return (__builtin_clzll(value) - zig_bitsizeof(unsigned long long) + zig_type_bit_width);
+    return __builtin_clz(value) - zig_bitsizeof(unsigned int) + zig_type_bit_width;
 }

-static inline int zig_ctz(unsigned long long value, uint8_t zig_type_bit_width) {
+static inline int zig_clzl(unsigned long value, uint8_t zig_type_bit_width) {
    if (value == 0) return zig_type_bit_width;
-    if (zig_type_bit_width <= zig_bitsizeof(unsigned int)) return __builtin_ctz(value);
-    if (zig_type_bit_width <= zig_bitsizeof(unsigned long)) return __builtin_ctzl(value);
-    return __builtin_ctzll(value);
+    return __builtin_clzl(value) - zig_bitsizeof(unsigned long) + zig_type_bit_width;
 }

-static inline int zig_popcount(unsigned long long value, uint8_t zig_type_bit_width) {
-    const unsigned long long mask = zig_bit_mask(unsigned long long, zig_type_bit_width);
-    if (zig_type_bit_width <= zig_bitsizeof(unsigned int))
-        return __builtin_popcount(value & mask);
-    if (zig_type_bit_width <= zig_bitsizeof(unsigned long))
-        return __builtin_popcountl(value & mask);
-    return __builtin_popcountll(value & mask);
+static inline int zig_clzll(unsigned long long value, uint8_t zig_type_bit_width) {
+    if (value == 0) return zig_type_bit_width;
+    return __builtin_clzll(value) - zig_bitsizeof(unsigned long long) + zig_type_bit_width;
 }

-static inline uint128_t zig_byte_swap(uint128_t value, uint8_t zig_type_bit_width, bool signed_type) {
-    if (zig_type_bit_width <= 8)  return value;
-    if (zig_type_bit_width <= 16) return __builtin_bswap16(value);
-    if (zig_type_bit_width <= 32) {
-        const uint32_t swapped = __builtin_bswap32(value) >> (32 - zig_type_bit_width);
-        return (signed_type ? zig_sign_extend(swapped, zig_type_bit_width) : swapped);
+#define zig_clz_u8  zig_clz
+#define zig_clz_i8  zig_clz
+#define zig_clz_u16 zig_clz
+#define zig_clz_i16 zig_clz
+#define zig_clz_u32 zig_clzl
+#define zig_clz_i32 zig_clzl
+#define zig_clz_u64 zig_clzll
+#define zig_clz_i64 zig_clzll
+
+static inline int zig_clz_u128(uint128_t value, uint8_t zig_type_bit_width) {
+    if (value == 0) return zig_type_bit_width;
+    const uint128_t mask = zig_bit_mask(uint128_t, zig_type_bit_width);
+    const uint64_t hi = (value & mask) >> 64;
+    const uint64_t lo = (value & mask);
+    const int leading_zeroes = (
+        hi != 0 ? __builtin_clzll(hi) : 64 + (lo != 0 ? __builtin_clzll(lo) : 64));
+    return leading_zeroes - zig_bitsizeof(uint128_t) + zig_type_bit_width;
+}
+
+#define zig_clz_i128 zig_clz_u128
+
+static inline int zig_ctz(unsigned int value, uint8_t zig_type_bit_width) {
+    if (value == 0) return zig_type_bit_width;
+    return __builtin_ctz(value & zig_bit_mask(unsigned int, zig_type_bit_width));
+}
+
+static inline int zig_ctzl(unsigned long value, uint8_t zig_type_bit_width) {
+    if (value == 0) return zig_type_bit_width;
+    return __builtin_ctzl(value & zig_bit_mask(unsigned long, zig_type_bit_width));
+}
+
+static inline int zig_ctzll(unsigned long value, uint8_t zig_type_bit_width) {
+    if (value == 0) return zig_type_bit_width;
+    return __builtin_ctzll(value & zig_bit_mask(unsigned long, zig_type_bit_width));
+}
+
+#define zig_ctz_u8  zig_ctz
+#define zig_ctz_i8  zig_ctz
+#define zig_ctz_u16 zig_ctz
+#define zig_ctz_i16 zig_ctz
+#define zig_ctz_u32 zig_ctzl
+#define zig_ctz_i32 zig_ctzl
+#define zig_ctz_u64 zig_ctzll
+#define zig_ctz_i64 zig_ctzll
+
+static inline int zig_ctz_u128(uint128_t value, uint8_t zig_type_bit_width) {
+    const uint128_t mask = zig_bit_mask(uint128_t, zig_type_bit_width);
+    const uint64_t hi = (value & mask) >> 64;
+    const uint64_t lo = (value & mask);
+    return (lo != 0 ? __builtin_ctzll(lo) : 64 + (hi != 0 ? __builtin_ctzll(hi) : 64));
+}
+
+#define zig_ctz_i128 zig_ctz_u128
+
+static inline int zig_popcount(unsigned int value, uint8_t zig_type_bit_width) {
+    return __builtin_popcount(value & zig_bit_mask(unsigned int, zig_type_bit_width));
+}
+
+static inline int zig_popcountl(unsigned long value, uint8_t zig_type_bit_width) {
+    return __builtin_popcountl(value & zig_bit_mask(unsigned long, zig_type_bit_width));
+}
+
+static inline int zig_popcountll(unsigned long value, uint8_t zig_type_bit_width) {
+    return __builtin_popcountll(value & zig_bit_mask(unsigned long, zig_type_bit_width));
+}
+
+#define zig_popcount_u8  zig_popcount
+#define zig_popcount_i8  zig_popcount
+#define zig_popcount_u16 zig_popcount
+#define zig_popcount_i16 zig_popcount
+#define zig_popcount_u32 zig_popcountl
+#define zig_popcount_i32 zig_popcountl
+#define zig_popcount_u64 zig_popcountll
+#define zig_popcount_i64 zig_popcountll
+
+static inline int zig_popcount_u128(uint128_t value, uint8_t zig_type_bit_width) {
+    const uint128_t mask = zig_bit_mask(uint128_t, zig_type_bit_width);
+    const uint64_t hi = (value & mask) >> 64;
+    const uint64_t lo = (value & mask);
+    return __builtin_popcountll(hi) + __builtin_popcountll(lo);
+}
+
+#define zig_popcount_i128 zig_popcount_u128
+
+#define zig_sign_extend(T) \
+    static inline T zig_sign_extend_##T(T value, uint8_t zig_type_bit_width) { \
+        const T m = (T)1 << (T)(zig_type_bit_width - 1); \
+        return (value ^ m) - m; \
    }
-    if (zig_type_bit_width <= 64) {
-        const uint64_t swapped = __builtin_bswap64(value) >> (64 - zig_type_bit_width);
-        return (signed_type ? zig_sign_extend(swapped, zig_type_bit_width) : swapped);
+
+zig_sign_extend(uint8_t)
+zig_sign_extend(uint16_t)
+zig_sign_extend(uint32_t)
+zig_sign_extend(uint64_t)
+zig_sign_extend(uint128_t)
+
+#define zig_byte_swap_u(ZigTypeBits, CTypeBits) \
+    static inline uint##CTypeBits##_t zig_byte_swap_u##ZigTypeBits(uint##CTypeBits##_t value, uint8_t zig_type_bit_width) { \
+        return __builtin_bswap##CTypeBits(value) >> (CTypeBits - zig_type_bit_width); \
    }
+
+#define zig_byte_swap_s(ZigTypeBits, CTypeBits) \
+    static inline int##CTypeBits##_t zig_byte_swap_i##ZigTypeBits(int##CTypeBits##_t value, uint8_t zig_type_bit_width) { \
+        const uint##CTypeBits##_t swapped = zig_byte_swap_u##ZigTypeBits(value, zig_type_bit_width); \
+        return zig_sign_extend_uint##CTypeBits##_t(swapped, zig_type_bit_width); \
+    }
+
+#define zig_byte_swap(ZigTypeBits, CTypeBits) \
+    zig_byte_swap_u(ZigTypeBits, CTypeBits) \
+    zig_byte_swap_s(ZigTypeBits, CTypeBits)
+
+zig_byte_swap( 8, 16)
+zig_byte_swap(16, 16)
+zig_byte_swap(32, 32)
+zig_byte_swap(64, 64)
+
+static inline uint128_t zig_byte_swap_u128(uint128_t value, uint8_t zig_type_bit_width) {
    const uint128_t mask = zig_bit_mask(uint128_t, zig_type_bit_width);
    const uint128_t hi = __builtin_bswap64((uint64_t)(value >> 64));
    const uint128_t lo = __builtin_bswap64((uint64_t)value);
-    const uint128_t swapped = (((lo << 64 | hi) >> (128 - zig_type_bit_width))) & mask;
-    return (signed_type ? zig_sign_extend(swapped, zig_type_bit_width) : swapped);
+    return (((lo << 64 | hi) >> (128 - zig_type_bit_width))) & mask;
 }

-static inline uint128_t zig_bit_reverse(uint128_t value, uint8_t zig_type_bit_width, bool signed_type) {
-    static const uint128_t reverse_lut[256] = {
-        0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x10, 0x90, 0x50, 0xd0,
-        0x30, 0xb0, 0x70, 0xf0, 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
-        0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, 0x04, 0x84, 0x44, 0xc4,
-        0x24, 0xa4, 0x64, 0xe4, 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
-        0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, 0x1c, 0x9c, 0x5c, 0xdc,
-        0x3c, 0xbc, 0x7c, 0xfc, 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
-        0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, 0x0a, 0x8a, 0x4a, 0xca,
-        0x2a, 0xaa, 0x6a, 0xea, 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
-        0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, 0x16, 0x96, 0x56, 0xd6,
-        0x36, 0xb6, 0x76, 0xf6, 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
-        0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, 0x01, 0x81, 0x41, 0xc1,
-        0x21, 0xa1, 0x61, 0xe1, 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
-        0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, 0x19, 0x99, 0x59, 0xd9,
-        0x39, 0xb9, 0x79, 0xf9, 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
-        0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, 0x0d, 0x8d, 0x4d, 0xcd,
-        0x2d, 0xad, 0x6d, 0xed, 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
-        0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, 0x13, 0x93, 0x53, 0xd3,
-        0x33, 0xb3, 0x73, 0xf3, 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
-        0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, 0x07, 0x87, 0x47, 0xc7,
-        0x27, 0xa7, 0x67, 0xe7, 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
-        0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, 0x1f, 0x9f, 0x5f, 0xdf,
-        0x3f, 0xbf, 0x7f, 0xff
-    };
-    const uint128_t mask = zig_bit_mask(uint128_t, zig_type_bit_width);
-    const uint128_t swapped = zig_byte_swap(value, zig_type_bit_width, signed_type);
-    const uint128_t reversed = (
-        (reverse_lut[(swapped >> 0x78) & 0xff] << 0x78) |
-        (reverse_lut[(swapped >> 0x70) & 0xff] << 0x70) |
-        (reverse_lut[(swapped >> 0x68) & 0xff] << 0x68) |
-        (reverse_lut[(swapped >> 0x60) & 0xff] << 0x60) |
-        (reverse_lut[(swapped >> 0x58) & 0xff] << 0x58) |
-        (reverse_lut[(swapped >> 0x50) & 0xff] << 0x50) |
-        (reverse_lut[(swapped >> 0x48) & 0xff] << 0x48) |
-        (reverse_lut[(swapped >> 0x40) & 0xff] << 0x40) |
-        (reverse_lut[(swapped >> 0x38) & 0xff] << 0x38) |
-        (reverse_lut[(swapped >> 0x30) & 0xff] << 0x30) |
-        (reverse_lut[(swapped >> 0x28) & 0xff] << 0x28) |
-        (reverse_lut[(swapped >> 0x20) & 0xff] << 0x20) |
-        (reverse_lut[(swapped >> 0x18) & 0xff] << 0x18) |
-        (reverse_lut[(swapped >> 0x10) & 0xff] << 0x10) |
-        (reverse_lut[(swapped >> 0x08) & 0xff] << 0x08) |
-        (reverse_lut[(swapped >> 0x00) & 0xff] << 0x00));
-    const uint128_t result =
-        (reversed >> (zig_type_bit_width < 8 ? 8 - zig_type_bit_width : 0)) & mask;
-    return (signed_type ? zig_sign_extend(result, zig_type_bit_width) : result);
+zig_byte_swap_s(128, 128)
+
+static const uint8_t zig_bit_reverse_lut[256] = {
+    0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x10, 0x90, 0x50, 0xd0,
+    0x30, 0xb0, 0x70, 0xf0, 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
+    0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, 0x04, 0x84, 0x44, 0xc4,
+    0x24, 0xa4, 0x64, 0xe4, 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
+    0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, 0x1c, 0x9c, 0x5c, 0xdc,
+    0x3c, 0xbc, 0x7c, 0xfc, 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
+    0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, 0x0a, 0x8a, 0x4a, 0xca,
+    0x2a, 0xaa, 0x6a, 0xea, 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
+    0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, 0x16, 0x96, 0x56, 0xd6,
+    0x36, 0xb6, 0x76, 0xf6, 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
+    0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, 0x01, 0x81, 0x41, 0xc1,
+    0x21, 0xa1, 0x61, 0xe1, 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
+    0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, 0x19, 0x99, 0x59, 0xd9,
+    0x39, 0xb9, 0x79, 0xf9, 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
+    0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, 0x0d, 0x8d, 0x4d, 0xcd,
+    0x2d, 0xad, 0x6d, 0xed, 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
+    0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, 0x13, 0x93, 0x53, 0xd3,
+    0x33, 0xb3, 0x73, 0xf3, 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
+    0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, 0x07, 0x87, 0x47, 0xc7,
+    0x27, 0xa7, 0x67, 0xe7, 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
+    0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, 0x1f, 0x9f, 0x5f, 0xdf,
+    0x3f, 0xbf, 0x7f, 0xff
+};
+
+static inline uint8_t zig_bit_reverse_u8(uint8_t value, uint8_t zig_type_bit_width) {
+    const uint8_t reversed = zig_bit_reverse_lut[value] >> (8 - zig_type_bit_width);
+    return zig_sign_extend_uint8_t(reversed, zig_type_bit_width);
 }
+
+#define zig_bit_reverse_i8 zig_bit_reverse_u8
+
+static inline uint16_t zig_bit_reverse_u16(uint16_t value, uint8_t zig_type_bit_width) {
+    const uint16_t swapped = zig_byte_swap_u16(value, zig_type_bit_width);
+    const uint16_t reversed = (
+        ((uint16_t)zig_bit_reverse_lut[(swapped >> 0x08) & 0xff] << 0x08) |
+        ((uint16_t)zig_bit_reverse_lut[(swapped >> 0x00) & 0xff] << 0x00));
+    return zig_sign_extend_uint16_t(
+        reversed & zig_bit_mask(uint16_t, zig_type_bit_width),
+        zig_type_bit_width);
+}
+
+#define zig_bit_reverse_i16 zig_bit_reverse_u16
+
+static inline uint32_t zig_bit_reverse_u32(uint32_t value, uint8_t zig_type_bit_width) {
+    const uint32_t swapped = zig_byte_swap_u32(value, zig_type_bit_width);
+    const uint32_t reversed = (
+         ((uint32_t)zig_bit_reverse_lut[(swapped >> 0x18) & 0xff] << 0x18) |
+         ((uint32_t)zig_bit_reverse_lut[(swapped >> 0x10) & 0xff] << 0x10) |
+         ((uint32_t)zig_bit_reverse_lut[(swapped >> 0x08) & 0xff] << 0x08) |
+         ((uint32_t)zig_bit_reverse_lut[(swapped >> 0x00) & 0xff] << 0x00));
+    return zig_sign_extend_uint32_t(
+        reversed & zig_bit_mask(uint32_t, zig_type_bit_width),
+        zig_type_bit_width);
+}
+
+#define zig_bit_reverse_i32 zig_bit_reverse_u32
+
+static inline uint64_t zig_bit_reverse_u64(uint64_t value, uint8_t zig_type_bit_width) {
+    const uint64_t swapped = zig_byte_swap_u64(value, zig_type_bit_width);
+    const uint64_t reversed = (
+        ((uint64_t)zig_bit_reverse_lut[(swapped >> 0x38) & 0xff] << 0x38) |
+        ((uint64_t)zig_bit_reverse_lut[(swapped >> 0x30) & 0xff] << 0x30) |
+        ((uint64_t)zig_bit_reverse_lut[(swapped >> 0x28) & 0xff] << 0x28) |
+        ((uint64_t)zig_bit_reverse_lut[(swapped >> 0x20) & 0xff] << 0x20) |
+        ((uint64_t)zig_bit_reverse_lut[(swapped >> 0x18) & 0xff] << 0x18) |
+        ((uint64_t)zig_bit_reverse_lut[(swapped >> 0x10) & 0xff] << 0x10) |
+        ((uint64_t)zig_bit_reverse_lut[(swapped >> 0x08) & 0xff] << 0x08) |
+        ((uint64_t)zig_bit_reverse_lut[(swapped >> 0x00) & 0xff] << 0x00));
+    return zig_sign_extend_uint64_t(
+        reversed & zig_bit_mask(uint64_t, zig_type_bit_width),
+        zig_type_bit_width);
+}
+
+#define zig_bit_reverse_i64 zig_bit_reverse_u64
+
+static inline uint128_t zig_bit_reverse_u128(uint128_t value, uint8_t zig_type_bit_width) {
+    const uint128_t swapped = zig_byte_swap_u128(value, zig_type_bit_width);
+    const uint128_t reversed = (
+        ((uint128_t)zig_bit_reverse_lut[(swapped >> 0x78) & 0xff] << 0x78) |
+        ((uint128_t)zig_bit_reverse_lut[(swapped >> 0x70) & 0xff] << 0x70) |
+        ((uint128_t)zig_bit_reverse_lut[(swapped >> 0x68) & 0xff] << 0x68) |
+        ((uint128_t)zig_bit_reverse_lut[(swapped >> 0x60) & 0xff] << 0x60) |
+        ((uint128_t)zig_bit_reverse_lut[(swapped >> 0x58) & 0xff] << 0x58) |
+        ((uint128_t)zig_bit_reverse_lut[(swapped >> 0x50) & 0xff] << 0x50) |
+        ((uint128_t)zig_bit_reverse_lut[(swapped >> 0x48) & 0xff] << 0x48) |
+        ((uint128_t)zig_bit_reverse_lut[(swapped >> 0x40) & 0xff] << 0x40) |
+        ((uint128_t)zig_bit_reverse_lut[(swapped >> 0x38) & 0xff] << 0x38) |
+        ((uint128_t)zig_bit_reverse_lut[(swapped >> 0x30) & 0xff] << 0x30) |
+        ((uint128_t)zig_bit_reverse_lut[(swapped >> 0x28) & 0xff] << 0x28) |
+        ((uint128_t)zig_bit_reverse_lut[(swapped >> 0x20) & 0xff] << 0x20) |
+        ((uint128_t)zig_bit_reverse_lut[(swapped >> 0x18) & 0xff] << 0x18) |
+        ((uint128_t)zig_bit_reverse_lut[(swapped >> 0x10) & 0xff] << 0x10) |
+        ((uint128_t)zig_bit_reverse_lut[(swapped >> 0x08) & 0xff] << 0x08) |
+        ((uint128_t)zig_bit_reverse_lut[(swapped >> 0x00) & 0xff] << 0x00));
+    return zig_sign_extend_uint128_t(
+        reversed & zig_bit_mask(uint128_t, zig_type_bit_width),
+        zig_type_bit_width);
+}
+
+#define zig_bit_reverse_i128 zig_bit_reverse_u128
--- a/test/behavior/math.zig
+++ b/test/behavior/math.zig
@ -82,7 +82,6 @@ test "@clz big ints" {
    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO

    try testClzBigInts();
    comptime try testClzBigInts();
--- a/test/behavior/popcount.zig
+++ b/test/behavior/popcount.zig
@ -37,6 +37,10 @@ fn testPopCountIntegers() !void {
        var x: i8 = -120;
        try expect(@popCount(i8, x) == 2);
    }
+    {
+        var x: u128 = 0b11111111000110001100010000100001000011000011100101010001;
+        try expect(@popCount(u128, x) == 24);
+    }
    comptime {
        try expect(@popCount(u8, @bitCast(u8, @as(i8, -120))) == 2);
    }