x86_64: implement optimized float @reduce(.Mul)

2025-12-06 06:13:07 +00:00 · 2025-05-24 11:37:13 -04:00 · 2025-05-24 11:37:13 -04:00 · a4a1ebdeed
commit a4a1ebdeed
parent 612f5784cf
14 changed files with 1854 additions and 121 deletions
--- a/lib/std/zig/Zir.zig
+++ b/lib/std/zig/Zir.zig
@ -2142,7 +2142,7 @@ pub const Inst = struct {
        ref_start_index = static_len,
        _,
-        pub const static_len = 105;
+        pub const static_len = 109;
        pub fn toRef(i: Index) Inst.Ref {
            return @enumFromInt(@intFromEnum(Index.ref_start_index) + @intFromEnum(i));
@ -2255,11 +2255,15 @@ pub const Inst = struct {
        vector_1_u256_type,
        vector_4_f16_type,
        vector_8_f16_type,
        vector_16_f16_type,
        vector_32_f16_type,
        vector_2_f32_type,
        vector_4_f32_type,
        vector_8_f32_type,
        vector_16_f32_type,
        vector_2_f64_type,
        vector_4_f64_type,
        vector_8_f64_type,
        optional_noreturn_type,
        anyerror_void_error_union_type,
        adhoc_inferred_error_set_type,
--- a/src/Air.zig
+++ b/src/Air.zig
@ -1038,11 +1038,15 @@ pub const Inst = struct {
        vector_1_u256_type = @intFromEnum(InternPool.Index.vector_1_u256_type),
        vector_4_f16_type = @intFromEnum(InternPool.Index.vector_4_f16_type),
        vector_8_f16_type = @intFromEnum(InternPool.Index.vector_8_f16_type),
        vector_16_f16_type = @intFromEnum(InternPool.Index.vector_16_f16_type),
        vector_32_f16_type = @intFromEnum(InternPool.Index.vector_32_f16_type),
        vector_2_f32_type = @intFromEnum(InternPool.Index.vector_2_f32_type),
        vector_4_f32_type = @intFromEnum(InternPool.Index.vector_4_f32_type),
        vector_8_f32_type = @intFromEnum(InternPool.Index.vector_8_f32_type),
        vector_16_f32_type = @intFromEnum(InternPool.Index.vector_16_f32_type),
        vector_2_f64_type = @intFromEnum(InternPool.Index.vector_2_f64_type),
        vector_4_f64_type = @intFromEnum(InternPool.Index.vector_4_f64_type),
        vector_8_f64_type = @intFromEnum(InternPool.Index.vector_8_f64_type),
        optional_noreturn_type = @intFromEnum(InternPool.Index.optional_noreturn_type),
        anyerror_void_error_union_type = @intFromEnum(InternPool.Index.anyerror_void_error_union_type),
        adhoc_inferred_error_set_type = @intFromEnum(InternPool.Index.adhoc_inferred_error_set_type),
--- a/src/InternPool.zig
+++ b/src/InternPool.zig
@ -4615,11 +4615,15 @@ pub const Index = enum(u32) {
    vector_1_u256_type,
    vector_4_f16_type,
    vector_8_f16_type,
    vector_16_f16_type,
    vector_32_f16_type,
    vector_2_f32_type,
    vector_4_f32_type,
    vector_8_f32_type,
    vector_16_f32_type,
    vector_2_f64_type,
    vector_4_f64_type,
    vector_8_f64_type,
    optional_noreturn_type,
    anyerror_void_error_union_type,
@ -5174,16 +5178,24 @@ pub const static_keys = [_]Key{
    .{ .vector_type = .{ .len = 4, .child = .f16_type } },
    // @Vector(8, f16)
    .{ .vector_type = .{ .len = 8, .child = .f16_type } },
    // @Vector(16, f16)
    .{ .vector_type = .{ .len = 16, .child = .f16_type } },
    // @Vector(32, f16)
    .{ .vector_type = .{ .len = 32, .child = .f16_type } },
    // @Vector(2, f32)
    .{ .vector_type = .{ .len = 2, .child = .f32_type } },
    // @Vector(4, f32)
    .{ .vector_type = .{ .len = 4, .child = .f32_type } },
    // @Vector(8, f32)
    .{ .vector_type = .{ .len = 8, .child = .f32_type } },
    // @Vector(16, f32)
    .{ .vector_type = .{ .len = 16, .child = .f32_type } },
    // @Vector(2, f64)
    .{ .vector_type = .{ .len = 2, .child = .f64_type } },
    // @Vector(4, f64)
    .{ .vector_type = .{ .len = 4, .child = .f64_type } },
    // @Vector(8, f64)
    .{ .vector_type = .{ .len = 8, .child = .f64_type } },
    // ?noreturn
    .{ .opt_type = .noreturn_type },
@ -11847,11 +11859,15 @@ pub fn typeOf(ip: *const InternPool, index: Index) Index {
        .vector_1_u256_type,
        .vector_4_f16_type,
        .vector_8_f16_type,
        .vector_16_f16_type,
        .vector_32_f16_type,
        .vector_2_f32_type,
        .vector_4_f32_type,
        .vector_8_f32_type,
        .vector_16_f32_type,
        .vector_2_f64_type,
        .vector_4_f64_type,
        .vector_8_f64_type,
        .optional_noreturn_type,
        .anyerror_void_error_union_type,
        .adhoc_inferred_error_set_type,
@ -12175,11 +12191,15 @@ pub fn zigTypeTag(ip: *const InternPool, index: Index) std.builtin.TypeId {
        .vector_1_u256_type,
        .vector_4_f16_type,
        .vector_8_f16_type,
        .vector_16_f16_type,
        .vector_32_f16_type,
        .vector_2_f32_type,
        .vector_4_f32_type,
        .vector_8_f32_type,
        .vector_16_f32_type,
        .vector_2_f64_type,
        .vector_4_f64_type,
        .vector_8_f64_type,
        => .vector,
        .optional_noreturn_type => .optional,
--- a/src/Sema.zig
+++ b/src/Sema.zig
@ -36571,11 +36571,15 @@ pub fn typeHasOnePossibleValue(sema: *Sema, ty: Type) CompileError!?Value {
        .vector_1_u256_type,
        .vector_4_f16_type,
        .vector_8_f16_type,
        .vector_16_f16_type,
        .vector_32_f16_type,
        .vector_2_f32_type,
        .vector_4_f32_type,
        .vector_8_f32_type,
        .vector_16_f32_type,
        .vector_2_f64_type,
        .vector_4_f64_type,
        .vector_8_f64_type,
        .anyerror_void_error_union_type,
        => null,
        .void_type => Value.void,
--- a/src/Type.zig
+++ b/src/Type.zig
@ -4136,11 +4136,15 @@ pub const vector_2_u128: Type = .{ .ip_index = .vector_2_u128_type };
 pub const vector_1_u256: Type = .{ .ip_index = .vector_1_u256_type };
 pub const vector_4_f16: Type = .{ .ip_index = .vector_4_f16_type };
 pub const vector_8_f16: Type = .{ .ip_index = .vector_8_f16_type };
 pub const vector_16_f16: Type = .{ .ip_index = .vector_16_f16_type };
 pub const vector_32_f16: Type = .{ .ip_index = .vector_32_f16_type };
 pub const vector_2_f32: Type = .{ .ip_index = .vector_2_f32_type };
 pub const vector_4_f32: Type = .{ .ip_index = .vector_4_f32_type };
 pub const vector_8_f32: Type = .{ .ip_index = .vector_8_f32_type };
 pub const vector_16_f32: Type = .{ .ip_index = .vector_16_f32_type };
 pub const vector_2_f64: Type = .{ .ip_index = .vector_2_f64_type };
 pub const vector_4_f64: Type = .{ .ip_index = .vector_4_f64_type };
 pub const vector_8_f64: Type = .{ .ip_index = .vector_8_f64_type };
 pub const empty_tuple: Type = .{ .ip_index = .empty_tuple_type };
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
--- a/src/codegen/c/Type.zig
+++ b/src/codegen/c/Type.zig
@ -1885,6 +1885,36 @@ pub const Pool = struct {
                };
                return pool.fromFields(allocator, .@"struct", &fields, kind);
            },
            .vector_16_f16_type => {
                const vector_ctype = try pool.getVector(allocator, .{
                    .elem_ctype = .f16,
                    .len = 16,
                });
                if (!kind.isParameter()) return vector_ctype;
                var fields = [_]Info.Field{
                    .{
                        .name = .{ .index = .array },
                        .ctype = vector_ctype,
                        .alignas = AlignAs.fromAbiAlignment(Type.f16.abiAlignment(zcu)),
                    },
                };
                return pool.fromFields(allocator, .@"struct", &fields, kind);
            },
            .vector_32_f16_type => {
                const vector_ctype = try pool.getVector(allocator, .{
                    .elem_ctype = .f16,
                    .len = 32,
                });
                if (!kind.isParameter()) return vector_ctype;
                var fields = [_]Info.Field{
                    .{
                        .name = .{ .index = .array },
                        .ctype = vector_ctype,
                        .alignas = AlignAs.fromAbiAlignment(Type.f16.abiAlignment(zcu)),
                    },
                };
                return pool.fromFields(allocator, .@"struct", &fields, kind);
            },
            .vector_2_f32_type => {
                const vector_ctype = try pool.getVector(allocator, .{
                    .elem_ctype = .f32,
@ -1930,6 +1960,21 @@ pub const Pool = struct {
                };
                return pool.fromFields(allocator, .@"struct", &fields, kind);
            },
            .vector_16_f32_type => {
                const vector_ctype = try pool.getVector(allocator, .{
                    .elem_ctype = .f32,
                    .len = 16,
                });
                if (!kind.isParameter()) return vector_ctype;
                var fields = [_]Info.Field{
                    .{
                        .name = .{ .index = .array },
                        .ctype = vector_ctype,
                        .alignas = AlignAs.fromAbiAlignment(Type.f32.abiAlignment(zcu)),
                    },
                };
                return pool.fromFields(allocator, .@"struct", &fields, kind);
            },
            .vector_2_f64_type => {
                const vector_ctype = try pool.getVector(allocator, .{
                    .elem_ctype = .f64,
@ -1960,6 +2005,21 @@ pub const Pool = struct {
                };
                return pool.fromFields(allocator, .@"struct", &fields, kind);
            },
            .vector_8_f64_type => {
                const vector_ctype = try pool.getVector(allocator, .{
                    .elem_ctype = .f64,
                    .len = 8,
                });
                if (!kind.isParameter()) return vector_ctype;
                var fields = [_]Info.Field{
                    .{
                        .name = .{ .index = .array },
                        .ctype = vector_ctype,
                        .alignas = AlignAs.fromAbiAlignment(Type.f64.abiAlignment(zcu)),
                    },
                };
                return pool.fromFields(allocator, .@"struct", &fields, kind);
            },
            .undef,
            .zero,
--- a/test/behavior/x86_64/math.zig
+++ b/test/behavior/x86_64/math.zig
@ -125,7 +125,7 @@ fn boolOr(lhs: anytype, rhs: @TypeOf(lhs)) @TypeOf(lhs) {
    @compileError("unsupported boolOr type: " ++ @typeName(@TypeOf(lhs)));
 }
-pub const Compare = enum { strict, relaxed, approx, approx_int };
+pub const Compare = enum { strict, relaxed, approx, approx_int, approx_or_overflow };
 // noinline for a more helpful stack trace
 pub noinline fn checkExpected(expected: anytype, actual: @TypeOf(expected), comptime compare: Compare) !void {
    const Expected = @TypeOf(expected);
@ -137,20 +137,32 @@ pub noinline fn checkExpected(expected: anytype, actual: @TypeOf(expected), comp
                break :unexpected switch (compare) {
                    .strict => boolOr(unequal, sign(expected) != sign(actual)),
                    .relaxed => unequal,
-                    .approx, .approx_int => comptime unreachable,
+                    .approx, .approx_int, .approx_or_overflow => comptime unreachable,
                };
            },
-            .approx, .approx_int => {
+            .approx, .approx_int, .approx_or_overflow => {
                const epsilon = math.floatEps(Scalar(Expected));
-                const tolerance = @sqrt(epsilon);
+                const tolerance = switch (compare) {
-                break :unexpected @abs(expected - actual) > @max(
+                    .strict, .relaxed => comptime unreachable,
                    .approx, .approx_int => @sqrt(epsilon),
                    .approx_or_overflow => @exp2(@log2(epsilon) * 0.4),
                };
                const approx_unequal = @abs(expected - actual) > @max(
                    @abs(expected) * splat(Expected, tolerance),
                    splat(Expected, switch (compare) {
                        .strict, .relaxed => comptime unreachable,
-                        .approx => tolerance,
+                        .approx, .approx_or_overflow => tolerance,
                        .approx_int => 1,
                    }),
                );
                break :unexpected switch (compare) {
                    .strict, .relaxed => comptime unreachable,
                    .approx, .approx_int => approx_unequal,
                    .approx_or_overflow => boolAnd(approx_unequal, boolOr(boolAnd(
                        @abs(expected) != splat(Expected, inf(Expected)),
                        @abs(actual) != splat(Expected, inf(Expected)),
                    ), sign(expected) != sign(actual))),
                };
            },
        },
        .@"struct" => |@"struct"| inline for (@"struct".fields) |field| {
--- a/test/behavior/x86_64/unary.zig
+++ b/test/behavior/x86_64/unary.zig
@ -5119,6 +5119,15 @@ test reduceAddOptimized {
    try test_reduce_add_optimized.testFloatVectors();
 }
 inline fn reduceMulOptimized(comptime Type: type, rhs: Type) @typeInfo(Type).vector.child {
    @setFloatMode(.optimized);
    return @reduce(.Mul, rhs);
 }
 test reduceMulOptimized {
    const test_reduce_mul_optimized = unary(reduceMulOptimized, .{ .compare = .approx_or_overflow });
    try test_reduce_mul_optimized.testFloatVectors();
 }
 inline fn splat(comptime Type: type, rhs: Type) Type {
    return @splat(rhs[0]);
 }
--- a/test/cases/compile_errors/@import_zon_bad_type.zig
+++ b/test/cases/compile_errors/@import_zon_bad_type.zig
@ -117,9 +117,9 @@ export fn testMutablePointer() void {
 // tmp.zig:37:38: note: imported here
 // neg_inf.zon:1:1: error: expected type '?u8'
 // tmp.zig:57:28: note: imported here
-// neg_inf.zon:1:1: error: expected type 'tmp.testNonExhaustiveEnum__enum_505'
+// neg_inf.zon:1:1: error: expected type 'tmp.testNonExhaustiveEnum__enum_509'
 // tmp.zig:62:39: note: imported here
-// neg_inf.zon:1:1: error: expected type 'tmp.testUntaggedUnion__union_507'
+// neg_inf.zon:1:1: error: expected type 'tmp.testUntaggedUnion__union_511'
 // tmp.zig:67:44: note: imported here
-// neg_inf.zon:1:1: error: expected type 'tmp.testTaggedUnionVoid__union_510'
+// neg_inf.zon:1:1: error: expected type 'tmp.testTaggedUnionVoid__union_514'
 // tmp.zig:72:50: note: imported here
--- a/test/cases/compile_errors/anytype_param_requires_comptime.zig
+++ b/test/cases/compile_errors/anytype_param_requires_comptime.zig
@ -15,6 +15,6 @@ pub export fn entry() void {
 // error
 //
 // :7:25: error: unable to resolve comptime value
-// :7:25: note: initializer of comptime-only struct 'tmp.S.foo__anon_479.C' must be comptime-known
+// :7:25: note: initializer of comptime-only struct 'tmp.S.foo__anon_483.C' must be comptime-known
 // :4:16: note: struct requires comptime because of this field
 // :4:16: note: types are not available at runtime
--- a/test/cases/compile_errors/bogus_method_call_on_slice.zig
+++ b/test/cases/compile_errors/bogus_method_call_on_slice.zig
@ -16,5 +16,5 @@ pub export fn entry2() void {
 //
 // :3:6: error: no field or member function named 'copy' in '[]const u8'
 // :9:8: error: no field or member function named 'bar' in '@TypeOf(.{})'
-// :12:18: error: no field or member function named 'bar' in 'tmp.entry2__struct_483'
+// :12:18: error: no field or member function named 'bar' in 'tmp.entry2__struct_487'
 // :12:6: note: struct declared here
--- a/test/cases/compile_errors/coerce_anon_struct.zig
+++ b/test/cases/compile_errors/coerce_anon_struct.zig
@ -6,6 +6,6 @@ export fn foo() void {
 // error
 //
-// :4:16: error: expected type 'tmp.T', found 'tmp.foo__struct_472'
+// :4:16: error: expected type 'tmp.T', found 'tmp.foo__struct_476'
 // :3:16: note: struct declared here
 // :1:11: note: struct declared here
--- a/test/cases/compile_errors/redundant_try.zig
+++ b/test/cases/compile_errors/redundant_try.zig
@ -44,9 +44,9 @@ comptime {
 //
 // :5:23: error: expected error union type, found 'comptime_int'
 // :10:23: error: expected error union type, found '@TypeOf(.{})'
-// :15:23: error: expected error union type, found 'tmp.test2__struct_509'
+// :15:23: error: expected error union type, found 'tmp.test2__struct_513'
 // :15:23: note: struct declared here
-// :20:27: error: expected error union type, found 'tmp.test3__struct_511'
+// :20:27: error: expected error union type, found 'tmp.test3__struct_515'
 // :20:27: note: struct declared here
 // :25:23: error: expected error union type, found 'struct { comptime *const [5:0]u8 = "hello" }'
 // :31:13: error: expected error union type, found 'u32'