x86_64: implement optimized float @reduce(.Mul)

This commit is contained in:
Jacob Young 2025-05-24 11:37:13 -04:00
parent 612f5784cf
commit a4a1ebdeed
14 changed files with 1854 additions and 121 deletions

View File

@ -2142,7 +2142,7 @@ pub const Inst = struct {
ref_start_index = static_len, ref_start_index = static_len,
_, _,
pub const static_len = 105; pub const static_len = 109;
pub fn toRef(i: Index) Inst.Ref { pub fn toRef(i: Index) Inst.Ref {
return @enumFromInt(@intFromEnum(Index.ref_start_index) + @intFromEnum(i)); return @enumFromInt(@intFromEnum(Index.ref_start_index) + @intFromEnum(i));
@ -2255,11 +2255,15 @@ pub const Inst = struct {
vector_1_u256_type, vector_1_u256_type,
vector_4_f16_type, vector_4_f16_type,
vector_8_f16_type, vector_8_f16_type,
vector_16_f16_type,
vector_32_f16_type,
vector_2_f32_type, vector_2_f32_type,
vector_4_f32_type, vector_4_f32_type,
vector_8_f32_type, vector_8_f32_type,
vector_16_f32_type,
vector_2_f64_type, vector_2_f64_type,
vector_4_f64_type, vector_4_f64_type,
vector_8_f64_type,
optional_noreturn_type, optional_noreturn_type,
anyerror_void_error_union_type, anyerror_void_error_union_type,
adhoc_inferred_error_set_type, adhoc_inferred_error_set_type,

View File

@ -1038,11 +1038,15 @@ pub const Inst = struct {
vector_1_u256_type = @intFromEnum(InternPool.Index.vector_1_u256_type), vector_1_u256_type = @intFromEnum(InternPool.Index.vector_1_u256_type),
vector_4_f16_type = @intFromEnum(InternPool.Index.vector_4_f16_type), vector_4_f16_type = @intFromEnum(InternPool.Index.vector_4_f16_type),
vector_8_f16_type = @intFromEnum(InternPool.Index.vector_8_f16_type), vector_8_f16_type = @intFromEnum(InternPool.Index.vector_8_f16_type),
vector_16_f16_type = @intFromEnum(InternPool.Index.vector_16_f16_type),
vector_32_f16_type = @intFromEnum(InternPool.Index.vector_32_f16_type),
vector_2_f32_type = @intFromEnum(InternPool.Index.vector_2_f32_type), vector_2_f32_type = @intFromEnum(InternPool.Index.vector_2_f32_type),
vector_4_f32_type = @intFromEnum(InternPool.Index.vector_4_f32_type), vector_4_f32_type = @intFromEnum(InternPool.Index.vector_4_f32_type),
vector_8_f32_type = @intFromEnum(InternPool.Index.vector_8_f32_type), vector_8_f32_type = @intFromEnum(InternPool.Index.vector_8_f32_type),
vector_16_f32_type = @intFromEnum(InternPool.Index.vector_16_f32_type),
vector_2_f64_type = @intFromEnum(InternPool.Index.vector_2_f64_type), vector_2_f64_type = @intFromEnum(InternPool.Index.vector_2_f64_type),
vector_4_f64_type = @intFromEnum(InternPool.Index.vector_4_f64_type), vector_4_f64_type = @intFromEnum(InternPool.Index.vector_4_f64_type),
vector_8_f64_type = @intFromEnum(InternPool.Index.vector_8_f64_type),
optional_noreturn_type = @intFromEnum(InternPool.Index.optional_noreturn_type), optional_noreturn_type = @intFromEnum(InternPool.Index.optional_noreturn_type),
anyerror_void_error_union_type = @intFromEnum(InternPool.Index.anyerror_void_error_union_type), anyerror_void_error_union_type = @intFromEnum(InternPool.Index.anyerror_void_error_union_type),
adhoc_inferred_error_set_type = @intFromEnum(InternPool.Index.adhoc_inferred_error_set_type), adhoc_inferred_error_set_type = @intFromEnum(InternPool.Index.adhoc_inferred_error_set_type),

View File

@ -4615,11 +4615,15 @@ pub const Index = enum(u32) {
vector_1_u256_type, vector_1_u256_type,
vector_4_f16_type, vector_4_f16_type,
vector_8_f16_type, vector_8_f16_type,
vector_16_f16_type,
vector_32_f16_type,
vector_2_f32_type, vector_2_f32_type,
vector_4_f32_type, vector_4_f32_type,
vector_8_f32_type, vector_8_f32_type,
vector_16_f32_type,
vector_2_f64_type, vector_2_f64_type,
vector_4_f64_type, vector_4_f64_type,
vector_8_f64_type,
optional_noreturn_type, optional_noreturn_type,
anyerror_void_error_union_type, anyerror_void_error_union_type,
@ -5174,16 +5178,24 @@ pub const static_keys = [_]Key{
.{ .vector_type = .{ .len = 4, .child = .f16_type } }, .{ .vector_type = .{ .len = 4, .child = .f16_type } },
// @Vector(8, f16) // @Vector(8, f16)
.{ .vector_type = .{ .len = 8, .child = .f16_type } }, .{ .vector_type = .{ .len = 8, .child = .f16_type } },
// @Vector(16, f16)
.{ .vector_type = .{ .len = 16, .child = .f16_type } },
// @Vector(32, f16)
.{ .vector_type = .{ .len = 32, .child = .f16_type } },
// @Vector(2, f32) // @Vector(2, f32)
.{ .vector_type = .{ .len = 2, .child = .f32_type } }, .{ .vector_type = .{ .len = 2, .child = .f32_type } },
// @Vector(4, f32) // @Vector(4, f32)
.{ .vector_type = .{ .len = 4, .child = .f32_type } }, .{ .vector_type = .{ .len = 4, .child = .f32_type } },
// @Vector(8, f32) // @Vector(8, f32)
.{ .vector_type = .{ .len = 8, .child = .f32_type } }, .{ .vector_type = .{ .len = 8, .child = .f32_type } },
// @Vector(16, f32)
.{ .vector_type = .{ .len = 16, .child = .f32_type } },
// @Vector(2, f64) // @Vector(2, f64)
.{ .vector_type = .{ .len = 2, .child = .f64_type } }, .{ .vector_type = .{ .len = 2, .child = .f64_type } },
// @Vector(4, f64) // @Vector(4, f64)
.{ .vector_type = .{ .len = 4, .child = .f64_type } }, .{ .vector_type = .{ .len = 4, .child = .f64_type } },
// @Vector(8, f64)
.{ .vector_type = .{ .len = 8, .child = .f64_type } },
// ?noreturn // ?noreturn
.{ .opt_type = .noreturn_type }, .{ .opt_type = .noreturn_type },
@ -11847,11 +11859,15 @@ pub fn typeOf(ip: *const InternPool, index: Index) Index {
.vector_1_u256_type, .vector_1_u256_type,
.vector_4_f16_type, .vector_4_f16_type,
.vector_8_f16_type, .vector_8_f16_type,
.vector_16_f16_type,
.vector_32_f16_type,
.vector_2_f32_type, .vector_2_f32_type,
.vector_4_f32_type, .vector_4_f32_type,
.vector_8_f32_type, .vector_8_f32_type,
.vector_16_f32_type,
.vector_2_f64_type, .vector_2_f64_type,
.vector_4_f64_type, .vector_4_f64_type,
.vector_8_f64_type,
.optional_noreturn_type, .optional_noreturn_type,
.anyerror_void_error_union_type, .anyerror_void_error_union_type,
.adhoc_inferred_error_set_type, .adhoc_inferred_error_set_type,
@ -12175,11 +12191,15 @@ pub fn zigTypeTag(ip: *const InternPool, index: Index) std.builtin.TypeId {
.vector_1_u256_type, .vector_1_u256_type,
.vector_4_f16_type, .vector_4_f16_type,
.vector_8_f16_type, .vector_8_f16_type,
.vector_16_f16_type,
.vector_32_f16_type,
.vector_2_f32_type, .vector_2_f32_type,
.vector_4_f32_type, .vector_4_f32_type,
.vector_8_f32_type, .vector_8_f32_type,
.vector_16_f32_type,
.vector_2_f64_type, .vector_2_f64_type,
.vector_4_f64_type, .vector_4_f64_type,
.vector_8_f64_type,
=> .vector, => .vector,
.optional_noreturn_type => .optional, .optional_noreturn_type => .optional,

View File

@ -36571,11 +36571,15 @@ pub fn typeHasOnePossibleValue(sema: *Sema, ty: Type) CompileError!?Value {
.vector_1_u256_type, .vector_1_u256_type,
.vector_4_f16_type, .vector_4_f16_type,
.vector_8_f16_type, .vector_8_f16_type,
.vector_16_f16_type,
.vector_32_f16_type,
.vector_2_f32_type, .vector_2_f32_type,
.vector_4_f32_type, .vector_4_f32_type,
.vector_8_f32_type, .vector_8_f32_type,
.vector_16_f32_type,
.vector_2_f64_type, .vector_2_f64_type,
.vector_4_f64_type, .vector_4_f64_type,
.vector_8_f64_type,
.anyerror_void_error_union_type, .anyerror_void_error_union_type,
=> null, => null,
.void_type => Value.void, .void_type => Value.void,

View File

@ -4136,11 +4136,15 @@ pub const vector_2_u128: Type = .{ .ip_index = .vector_2_u128_type };
pub const vector_1_u256: Type = .{ .ip_index = .vector_1_u256_type }; pub const vector_1_u256: Type = .{ .ip_index = .vector_1_u256_type };
pub const vector_4_f16: Type = .{ .ip_index = .vector_4_f16_type }; pub const vector_4_f16: Type = .{ .ip_index = .vector_4_f16_type };
pub const vector_8_f16: Type = .{ .ip_index = .vector_8_f16_type }; pub const vector_8_f16: Type = .{ .ip_index = .vector_8_f16_type };
pub const vector_16_f16: Type = .{ .ip_index = .vector_16_f16_type };
pub const vector_32_f16: Type = .{ .ip_index = .vector_32_f16_type };
pub const vector_2_f32: Type = .{ .ip_index = .vector_2_f32_type }; pub const vector_2_f32: Type = .{ .ip_index = .vector_2_f32_type };
pub const vector_4_f32: Type = .{ .ip_index = .vector_4_f32_type }; pub const vector_4_f32: Type = .{ .ip_index = .vector_4_f32_type };
pub const vector_8_f32: Type = .{ .ip_index = .vector_8_f32_type }; pub const vector_8_f32: Type = .{ .ip_index = .vector_8_f32_type };
pub const vector_16_f32: Type = .{ .ip_index = .vector_16_f32_type };
pub const vector_2_f64: Type = .{ .ip_index = .vector_2_f64_type }; pub const vector_2_f64: Type = .{ .ip_index = .vector_2_f64_type };
pub const vector_4_f64: Type = .{ .ip_index = .vector_4_f64_type }; pub const vector_4_f64: Type = .{ .ip_index = .vector_4_f64_type };
pub const vector_8_f64: Type = .{ .ip_index = .vector_8_f64_type };
pub const empty_tuple: Type = .{ .ip_index = .empty_tuple_type }; pub const empty_tuple: Type = .{ .ip_index = .empty_tuple_type };

File diff suppressed because it is too large Load Diff

View File

@ -1885,6 +1885,36 @@ pub const Pool = struct {
}; };
return pool.fromFields(allocator, .@"struct", &fields, kind); return pool.fromFields(allocator, .@"struct", &fields, kind);
}, },
.vector_16_f16_type => {
const vector_ctype = try pool.getVector(allocator, .{
.elem_ctype = .f16,
.len = 16,
});
if (!kind.isParameter()) return vector_ctype;
var fields = [_]Info.Field{
.{
.name = .{ .index = .array },
.ctype = vector_ctype,
.alignas = AlignAs.fromAbiAlignment(Type.f16.abiAlignment(zcu)),
},
};
return pool.fromFields(allocator, .@"struct", &fields, kind);
},
.vector_32_f16_type => {
const vector_ctype = try pool.getVector(allocator, .{
.elem_ctype = .f16,
.len = 32,
});
if (!kind.isParameter()) return vector_ctype;
var fields = [_]Info.Field{
.{
.name = .{ .index = .array },
.ctype = vector_ctype,
.alignas = AlignAs.fromAbiAlignment(Type.f16.abiAlignment(zcu)),
},
};
return pool.fromFields(allocator, .@"struct", &fields, kind);
},
.vector_2_f32_type => { .vector_2_f32_type => {
const vector_ctype = try pool.getVector(allocator, .{ const vector_ctype = try pool.getVector(allocator, .{
.elem_ctype = .f32, .elem_ctype = .f32,
@ -1930,6 +1960,21 @@ pub const Pool = struct {
}; };
return pool.fromFields(allocator, .@"struct", &fields, kind); return pool.fromFields(allocator, .@"struct", &fields, kind);
}, },
.vector_16_f32_type => {
const vector_ctype = try pool.getVector(allocator, .{
.elem_ctype = .f32,
.len = 16,
});
if (!kind.isParameter()) return vector_ctype;
var fields = [_]Info.Field{
.{
.name = .{ .index = .array },
.ctype = vector_ctype,
.alignas = AlignAs.fromAbiAlignment(Type.f32.abiAlignment(zcu)),
},
};
return pool.fromFields(allocator, .@"struct", &fields, kind);
},
.vector_2_f64_type => { .vector_2_f64_type => {
const vector_ctype = try pool.getVector(allocator, .{ const vector_ctype = try pool.getVector(allocator, .{
.elem_ctype = .f64, .elem_ctype = .f64,
@ -1960,6 +2005,21 @@ pub const Pool = struct {
}; };
return pool.fromFields(allocator, .@"struct", &fields, kind); return pool.fromFields(allocator, .@"struct", &fields, kind);
}, },
.vector_8_f64_type => {
const vector_ctype = try pool.getVector(allocator, .{
.elem_ctype = .f64,
.len = 8,
});
if (!kind.isParameter()) return vector_ctype;
var fields = [_]Info.Field{
.{
.name = .{ .index = .array },
.ctype = vector_ctype,
.alignas = AlignAs.fromAbiAlignment(Type.f64.abiAlignment(zcu)),
},
};
return pool.fromFields(allocator, .@"struct", &fields, kind);
},
.undef, .undef,
.zero, .zero,

View File

@ -125,7 +125,7 @@ fn boolOr(lhs: anytype, rhs: @TypeOf(lhs)) @TypeOf(lhs) {
@compileError("unsupported boolOr type: " ++ @typeName(@TypeOf(lhs))); @compileError("unsupported boolOr type: " ++ @typeName(@TypeOf(lhs)));
} }
pub const Compare = enum { strict, relaxed, approx, approx_int }; pub const Compare = enum { strict, relaxed, approx, approx_int, approx_or_overflow };
// noinline for a more helpful stack trace // noinline for a more helpful stack trace
pub noinline fn checkExpected(expected: anytype, actual: @TypeOf(expected), comptime compare: Compare) !void { pub noinline fn checkExpected(expected: anytype, actual: @TypeOf(expected), comptime compare: Compare) !void {
const Expected = @TypeOf(expected); const Expected = @TypeOf(expected);
@ -137,20 +137,32 @@ pub noinline fn checkExpected(expected: anytype, actual: @TypeOf(expected), comp
break :unexpected switch (compare) { break :unexpected switch (compare) {
.strict => boolOr(unequal, sign(expected) != sign(actual)), .strict => boolOr(unequal, sign(expected) != sign(actual)),
.relaxed => unequal, .relaxed => unequal,
.approx, .approx_int => comptime unreachable, .approx, .approx_int, .approx_or_overflow => comptime unreachable,
}; };
}, },
.approx, .approx_int => { .approx, .approx_int, .approx_or_overflow => {
const epsilon = math.floatEps(Scalar(Expected)); const epsilon = math.floatEps(Scalar(Expected));
const tolerance = @sqrt(epsilon); const tolerance = switch (compare) {
break :unexpected @abs(expected - actual) > @max( .strict, .relaxed => comptime unreachable,
.approx, .approx_int => @sqrt(epsilon),
.approx_or_overflow => @exp2(@log2(epsilon) * 0.4),
};
const approx_unequal = @abs(expected - actual) > @max(
@abs(expected) * splat(Expected, tolerance), @abs(expected) * splat(Expected, tolerance),
splat(Expected, switch (compare) { splat(Expected, switch (compare) {
.strict, .relaxed => comptime unreachable, .strict, .relaxed => comptime unreachable,
.approx => tolerance, .approx, .approx_or_overflow => tolerance,
.approx_int => 1, .approx_int => 1,
}), }),
); );
break :unexpected switch (compare) {
.strict, .relaxed => comptime unreachable,
.approx, .approx_int => approx_unequal,
.approx_or_overflow => boolAnd(approx_unequal, boolOr(boolAnd(
@abs(expected) != splat(Expected, inf(Expected)),
@abs(actual) != splat(Expected, inf(Expected)),
), sign(expected) != sign(actual))),
};
}, },
}, },
.@"struct" => |@"struct"| inline for (@"struct".fields) |field| { .@"struct" => |@"struct"| inline for (@"struct".fields) |field| {

View File

@ -5119,6 +5119,15 @@ test reduceAddOptimized {
try test_reduce_add_optimized.testFloatVectors(); try test_reduce_add_optimized.testFloatVectors();
} }
inline fn reduceMulOptimized(comptime Type: type, rhs: Type) @typeInfo(Type).vector.child {
@setFloatMode(.optimized);
return @reduce(.Mul, rhs);
}
test reduceMulOptimized {
const test_reduce_mul_optimized = unary(reduceMulOptimized, .{ .compare = .approx_or_overflow });
try test_reduce_mul_optimized.testFloatVectors();
}
inline fn splat(comptime Type: type, rhs: Type) Type { inline fn splat(comptime Type: type, rhs: Type) Type {
return @splat(rhs[0]); return @splat(rhs[0]);
} }

View File

@ -117,9 +117,9 @@ export fn testMutablePointer() void {
// tmp.zig:37:38: note: imported here // tmp.zig:37:38: note: imported here
// neg_inf.zon:1:1: error: expected type '?u8' // neg_inf.zon:1:1: error: expected type '?u8'
// tmp.zig:57:28: note: imported here // tmp.zig:57:28: note: imported here
// neg_inf.zon:1:1: error: expected type 'tmp.testNonExhaustiveEnum__enum_505' // neg_inf.zon:1:1: error: expected type 'tmp.testNonExhaustiveEnum__enum_509'
// tmp.zig:62:39: note: imported here // tmp.zig:62:39: note: imported here
// neg_inf.zon:1:1: error: expected type 'tmp.testUntaggedUnion__union_507' // neg_inf.zon:1:1: error: expected type 'tmp.testUntaggedUnion__union_511'
// tmp.zig:67:44: note: imported here // tmp.zig:67:44: note: imported here
// neg_inf.zon:1:1: error: expected type 'tmp.testTaggedUnionVoid__union_510' // neg_inf.zon:1:1: error: expected type 'tmp.testTaggedUnionVoid__union_514'
// tmp.zig:72:50: note: imported here // tmp.zig:72:50: note: imported here

View File

@ -15,6 +15,6 @@ pub export fn entry() void {
// error // error
// //
// :7:25: error: unable to resolve comptime value // :7:25: error: unable to resolve comptime value
// :7:25: note: initializer of comptime-only struct 'tmp.S.foo__anon_479.C' must be comptime-known // :7:25: note: initializer of comptime-only struct 'tmp.S.foo__anon_483.C' must be comptime-known
// :4:16: note: struct requires comptime because of this field // :4:16: note: struct requires comptime because of this field
// :4:16: note: types are not available at runtime // :4:16: note: types are not available at runtime

View File

@ -16,5 +16,5 @@ pub export fn entry2() void {
// //
// :3:6: error: no field or member function named 'copy' in '[]const u8' // :3:6: error: no field or member function named 'copy' in '[]const u8'
// :9:8: error: no field or member function named 'bar' in '@TypeOf(.{})' // :9:8: error: no field or member function named 'bar' in '@TypeOf(.{})'
// :12:18: error: no field or member function named 'bar' in 'tmp.entry2__struct_483' // :12:18: error: no field or member function named 'bar' in 'tmp.entry2__struct_487'
// :12:6: note: struct declared here // :12:6: note: struct declared here

View File

@ -6,6 +6,6 @@ export fn foo() void {
// error // error
// //
// :4:16: error: expected type 'tmp.T', found 'tmp.foo__struct_472' // :4:16: error: expected type 'tmp.T', found 'tmp.foo__struct_476'
// :3:16: note: struct declared here // :3:16: note: struct declared here
// :1:11: note: struct declared here // :1:11: note: struct declared here

View File

@ -44,9 +44,9 @@ comptime {
// //
// :5:23: error: expected error union type, found 'comptime_int' // :5:23: error: expected error union type, found 'comptime_int'
// :10:23: error: expected error union type, found '@TypeOf(.{})' // :10:23: error: expected error union type, found '@TypeOf(.{})'
// :15:23: error: expected error union type, found 'tmp.test2__struct_509' // :15:23: error: expected error union type, found 'tmp.test2__struct_513'
// :15:23: note: struct declared here // :15:23: note: struct declared here
// :20:27: error: expected error union type, found 'tmp.test3__struct_511' // :20:27: error: expected error union type, found 'tmp.test3__struct_515'
// :20:27: note: struct declared here // :20:27: note: struct declared here
// :25:23: error: expected error union type, found 'struct { comptime *const [5:0]u8 = "hello" }' // :25:23: error: expected error union type, found 'struct { comptime *const [5:0]u8 = "hello" }'
// :31:13: error: expected error union type, found 'u32' // :31:13: error: expected error union type, found 'u32'