x86_64: fix strictness edge cases in +|

Closes #25145
This commit is contained in:
Jacob Young 2025-09-10 13:49:49 -04:00
parent bfda12efcf
commit 1a0a9d7d59
2 changed files with 174 additions and 39 deletions

View File

@ -7263,10 +7263,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .vp_d, .add, .dst0x, .src0x, .src1x, ._ },
.{ ._, .vp_d, .sra, .tmp2x, .src0x, .ui(31), ._ },
.{ ._, .vp_d, .cmpgt, .tmp3x, .dst0x, .src0x, ._ },
.{ ._, .vp_d, .cmpgt, .tmp3x, .src0x, .dst0x, ._ },
.{ ._, .vp_, .xor, .tmp2x, .tmp2x, .lea(.tmp0x), ._ },
.{ ._, .vp_, .xor, .tmp3x, .tmp3x, .src1x, ._ },
.{ ._, .v_ps, .blendv, .dst0x, .tmp2x, .dst0x, .tmp3x },
.{ ._, .v_ps, .blendv, .dst0x, .dst0x, .tmp2x, .tmp3x },
} },
}, .{
.required_features = .{ .sse4_1, null, null, null },
@ -7332,10 +7332,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ },
.{ ._, .vp_d, .add, .dst0y, .src0y, .src1y, ._ },
.{ ._, .vp_d, .sra, .tmp2y, .src0y, .ui(31), ._ },
.{ ._, .vp_d, .cmpgt, .tmp3y, .dst0y, .src0y, ._ },
.{ ._, .vp_d, .cmpgt, .tmp3y, .src0y, .dst0y, ._ },
.{ ._, .vp_, .xor, .tmp2y, .tmp2y, .lea(.tmp0y), ._ },
.{ ._, .vp_, .xor, .tmp3y, .tmp3y, .src1y, ._ },
.{ ._, .v_ps, .blendv, .dst0y, .tmp2y, .dst0y, .tmp3y },
.{ ._, .v_ps, .blendv, .dst0y, .dst0y, .tmp2y, .tmp3y },
} },
}, .{
.required_features = .{ .avx, null, null, null },
@ -7615,10 +7615,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_, .xor, .tmp2x, .tmp2x, .tmp2x, ._ },
.{ ._, .vp_q, .add, .dst0x, .src0x, .src1x, ._ },
.{ ._, .vp_q, .cmpgt, .tmp2x, .tmp2x, .src0x, ._ },
.{ ._, .vp_q, .cmpgt, .tmp3x, .dst0x, .src0x, ._ },
.{ ._, .vp_q, .cmpgt, .tmp3x, .src0x, .dst0x, ._ },
.{ ._, .vp_, .xor, .tmp2x, .tmp2x, .lea(.tmp0x), ._ },
.{ ._, .vp_, .xor, .tmp3x, .tmp3x, .src1x, ._ },
.{ ._, .v_pd, .blendv, .dst0x, .tmp2x, .dst0x, .tmp3x },
.{ ._, .v_pd, .blendv, .dst0x, .dst0x, .tmp2x, .tmp3x },
} },
}, .{
.required_features = .{ .sse4_2, null, null, null },
@ -7685,10 +7685,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_, .xor, .tmp2y, .tmp2y, .tmp2y, ._ },
.{ ._, .vp_q, .add, .dst0y, .src0y, .src1y, ._ },
.{ ._, .vp_q, .cmpgt, .tmp2y, .tmp2y, .src0y, ._ },
.{ ._, .vp_q, .cmpgt, .tmp3y, .dst0y, .src0y, ._ },
.{ ._, .vp_q, .cmpgt, .tmp3y, .src0y, .dst0y, ._ },
.{ ._, .vp_, .xor, .tmp2y, .tmp2y, .lea(.tmp0y), ._ },
.{ ._, .vp_, .xor, .tmp3y, .tmp3y, .src1y, ._ },
.{ ._, .v_pd, .blendv, .dst0y, .tmp2y, .dst0y, .tmp3y },
.{ ._, .v_pd, .blendv, .dst0y, .dst0y, .tmp2y, .tmp3y },
} },
}, .{
.required_features = .{ .avx2, null, null, null },
@ -7724,8 +7724,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_b, .blendv, .dst0x, .dst0x, .tmp2x, .tmp3x },
.{ ._, .vp_q, .cmpeq, .tmp3x, .tmp3x, .tmp3x, ._ },
.{ ._, .vp_, .xor, .tmp2x, .tmp2x, .tmp3x, ._ },
.{ ._, .vp_q, .cmpgt, .tmp3x, .dst0x, .tmp2x, ._ },
.{ ._, .vp_b, .blendv, .dst0x, .dst0x, .tmp2x, .tmp3x },
.{ ._, .vp_q, .cmpgt, .tmp3x, .tmp2x, .dst0x, ._ },
.{ ._, .vp_b, .blendv, .dst0x, .tmp2x, .dst0x, .tmp3x },
} },
}, .{
.required_features = .{ .avx, null, null, null },
@ -7761,8 +7761,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_b, .blendv, .dst0x, .dst0x, .tmp2x, .tmp3x },
.{ ._, .vp_q, .cmpeq, .tmp3x, .tmp3x, .tmp3x, ._ },
.{ ._, .vp_, .xor, .tmp2x, .tmp2x, .tmp3x, ._ },
.{ ._, .vp_q, .cmpgt, .tmp3x, .dst0x, .tmp2x, ._ },
.{ ._, .vp_b, .blendv, .dst0x, .dst0x, .tmp2x, .tmp3x },
.{ ._, .vp_q, .cmpgt, .tmp3x, .tmp2x, .dst0x, ._ },
.{ ._, .vp_b, .blendv, .dst0x, .tmp2x, .dst0x, .tmp3x },
} },
}, .{
.required_features = .{ .sse4_2, null, null, null },
@ -7837,8 +7837,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_b, .blendv, .dst0y, .dst0y, .tmp2y, .tmp3y },
.{ ._, .vp_q, .cmpeq, .tmp3y, .tmp3y, .tmp3y, ._ },
.{ ._, .vp_, .xor, .tmp2y, .tmp2y, .tmp3y, ._ },
.{ ._, .vp_q, .cmpgt, .tmp3y, .dst0y, .tmp2y, ._ },
.{ ._, .vp_b, .blendv, .dst0y, .dst0y, .tmp2y, .tmp3y },
.{ ._, .vp_q, .cmpgt, .tmp3y, .tmp2y, .dst0y, ._ },
.{ ._, .vp_b, .blendv, .dst0y, .tmp2y, .dst0y, .tmp3y },
} },
}, .{
.required_features = .{ .avx2, null, null, null },
@ -10714,10 +10714,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .v_dqa, .mov, .tmp4y, .memia(.src1y, .tmp0, .add_unaligned_size), ._, ._ },
.{ ._, .vp_d, .add, .tmp5y, .tmp3y, .tmp4y, ._ },
.{ ._, .vp_d, .sra, .tmp6y, .tmp3y, .ui(31), ._ },
.{ ._, .vp_d, .cmpgt, .tmp3y, .tmp5y, .tmp3y, ._ },
.{ ._, .vp_d, .cmpgt, .tmp3y, .tmp3y, .tmp5y, ._ },
.{ ._, .vp_, .xor, .tmp6y, .tmp6y, .tmp2y, ._ },
.{ ._, .vp_, .xor, .tmp3y, .tmp3y, .tmp4y, ._ },
.{ ._, .v_ps, .blendv, .tmp3y, .tmp6y, .tmp5y, .tmp3y },
.{ ._, .v_ps, .blendv, .tmp3y, .tmp5y, .tmp6y, .tmp3y },
.{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp3y, ._, ._ },
.{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
@ -10755,10 +10755,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .v_dqa, .mov, .tmp4x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ },
.{ ._, .vp_d, .add, .tmp5x, .tmp3x, .tmp4x, ._ },
.{ ._, .vp_d, .sra, .tmp6x, .tmp3x, .ui(31), ._ },
.{ ._, .vp_d, .cmpgt, .tmp3x, .tmp5x, .tmp3x, ._ },
.{ ._, .vp_d, .cmpgt, .tmp3x, .tmp3x, .tmp5x, ._ },
.{ ._, .vp_, .xor, .tmp6x, .tmp6x, .tmp2x, ._ },
.{ ._, .vp_, .xor, .tmp3x, .tmp3x, .tmp4x, ._ },
.{ ._, .v_ps, .blendv, .tmp3x, .tmp6x, .tmp5x, .tmp3x },
.{ ._, .v_ps, .blendv, .tmp3x, .tmp5x, .tmp6x, .tmp3x },
.{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp3x, ._, ._ },
.{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
@ -11543,10 +11543,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_, .xor, .tmp5y, .tmp5y, .tmp5y, ._ },
.{ ._, .vp_q, .add, .tmp6y, .tmp3y, .tmp4y, ._ },
.{ ._, .vp_q, .cmpgt, .tmp5y, .tmp5y, .tmp3y, ._ },
.{ ._, .vp_q, .cmpgt, .tmp3y, .tmp6y, .tmp3y, ._ },
.{ ._, .vp_q, .cmpgt, .tmp3y, .tmp3y, .tmp6y, ._ },
.{ ._, .vp_, .xor, .tmp5y, .tmp5y, .tmp2y, ._ },
.{ ._, .vp_, .xor, .tmp3y, .tmp3y, .tmp4y, ._ },
.{ ._, .v_pd, .blendv, .tmp5y, .tmp5y, .tmp6y, .tmp3y },
.{ ._, .v_pd, .blendv, .tmp5y, .tmp6y, .tmp5y, .tmp3y },
.{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp5y, ._, ._ },
.{ ._, ._, .add, .tmp0p, .si(32), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
@ -11585,10 +11585,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, .vp_, .xor, .tmp5x, .tmp5x, .tmp5x, ._ },
.{ ._, .vp_q, .add, .tmp6x, .tmp3x, .tmp4x, ._ },
.{ ._, .vp_q, .cmpgt, .tmp5x, .tmp5x, .tmp3x, ._ },
.{ ._, .vp_q, .cmpgt, .tmp3x, .tmp6x, .tmp3x, ._ },
.{ ._, .vp_q, .cmpgt, .tmp3x, .tmp3x, .tmp6x, ._ },
.{ ._, .vp_, .xor, .tmp5x, .tmp5x, .tmp2x, ._ },
.{ ._, .vp_, .xor, .tmp3x, .tmp3x, .tmp4x, ._ },
.{ ._, .v_pd, .blendv, .tmp5x, .tmp5x, .tmp6x, .tmp3x },
.{ ._, .v_pd, .blendv, .tmp5x, .tmp6x, .tmp5x, .tmp3x },
.{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp5x, ._, ._ },
.{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
@ -11607,11 +11607,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .i64, .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize_to = .none } } },
.{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
.{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
.{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
.{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
.{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
.{ .type = .vector_2_i64, .kind = .{ .reg = .xmm0 } },
.{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
.{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
.{ .type = .vector_2_i64, .kind = .{ .rc = .sse } },
.unused,
.unused,
.unused,
.unused,
@ -11628,12 +11628,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._dqa, .mov, .tmp6x, .tmp3x, ._, ._ },
.{ ._, .p_q, .add, .tmp6x, .tmp4x, ._, ._ },
.{ ._, .p_q, .cmpgt, .tmp5x, .tmp3x, ._, ._ },
.{ ._, ._dqa, .mov, .tmp7x, .tmp6x, ._, ._ },
.{ ._, .p_q, .cmpgt, .tmp7x, .tmp3x, ._, ._ },
.{ ._, .p_q, .cmpgt, .tmp3x, .tmp6x, ._, ._ },
.{ ._, .p_, .xor, .tmp5x, .tmp2x, ._, ._ },
.{ ._, .p_, .xor, .tmp7x, .tmp4x, ._, ._ },
.{ ._, ._pd, .blendv, .tmp5x, .tmp6x, .tmp7x, ._ },
.{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp5x, ._, ._ },
.{ ._, .p_, .xor, .tmp3x, .tmp4x, ._, ._ },
.{ ._, ._pd, .blendv, .tmp6x, .tmp5x, .tmp3x, ._ },
.{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp6x, ._, ._ },
.{ ._, ._, .add, .tmp0p, .si(16), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },

View File

@ -970,6 +970,73 @@ test "saturating add" {
const expected = i8x3{ 127, 127, 127 };
try expect(mem.eql(i8, &@as([3]i8, expected), &@as([3]i8, result)));
}
try testElemType(i4);
try testElemType(u4);
try testElemType(i8);
try testElemType(u8);
try testElemType(i12);
try testElemType(u12);
try testElemType(i16);
try testElemType(u16);
try testElemType(i24);
try testElemType(u24);
try testElemType(i32);
try testElemType(u32);
try testElemType(i48);
try testElemType(u48);
try testElemType(i64);
try testElemType(u64);
}
fn testElemType(comptime Elem: type) !void {
const min = std.math.minInt(Elem);
const max = std.math.maxInt(Elem);
var v: @Vector(4, Elem) = .{ 0, 1, 0, 1 };
v +|= .{ 0, 0, 1, 1 };
try expect(v[0] == 0);
try expect(v[1] == 1);
try expect(v[2] == 1);
try expect(v[3] == 2);
v = .{ 0, max, 1, max };
v +|= .{ max, 0, max, 1 };
try expect(v[0] == max);
try expect(v[1] == max);
try expect(v[2] == max);
try expect(v[3] == max);
v = .{ 1, max - 1, max / 2, max };
v +|= .{ max - 1, 1, max / 2, max };
try expect(v[0] == max);
try expect(v[1] == max);
try expect(v[2] == max - 1);
try expect(v[3] == max);
switch (@typeInfo(Elem).int.signedness) {
.signed => {
v = .{ -1, -1, 0, -1 };
v +|= .{ 1, 0, -1, -1 };
try expect(v[0] == 0);
try expect(v[1] == -1);
try expect(v[2] == -1);
try expect(v[3] == -2);
v = .{ 0, min, -1, min };
v +|= .{ min, 0, min, -1 };
try expect(v[0] == min);
try expect(v[1] == min);
try expect(v[2] == min);
try expect(v[3] == min);
v = .{ -1, min + 1, min / 2, min };
v +|= .{ min + 1, -1, min / 2, min };
try expect(v[0] == min);
try expect(v[1] == min);
try expect(v[2] == min);
try expect(v[3] == min);
},
.unsigned => {},
}
}
};
try S.doTheTest();
@ -986,14 +1053,83 @@ test "saturating subtraction" {
const S = struct {
fn doTheTest() !void {
// Broken out to avoid https://github.com/ziglang/zig/issues/11251
const u8x3 = @Vector(3, u8);
var lhs = u8x3{ 0, 0, 0 };
var rhs = u8x3{ 255, 255, 255 };
_ = .{ &lhs, &rhs };
const result = lhs -| rhs;
const expected = u8x3{ 0, 0, 0 };
try expect(mem.eql(u8, &@as([3]u8, expected), &@as([3]u8, result)));
{
// Broken out to avoid https://github.com/ziglang/zig/issues/11251
const u8x3 = @Vector(3, u8);
var lhs = u8x3{ 0, 0, 0 };
var rhs = u8x3{ 255, 255, 255 };
_ = .{ &lhs, &rhs };
const result = lhs -| rhs;
const expected = u8x3{ 0, 0, 0 };
try expect(mem.eql(u8, &@as([3]u8, expected), &@as([3]u8, result)));
}
try testElemType(i4);
try testElemType(u4);
try testElemType(i8);
try testElemType(u8);
try testElemType(i12);
try testElemType(u12);
try testElemType(i16);
try testElemType(u16);
try testElemType(i24);
try testElemType(u24);
try testElemType(i32);
try testElemType(u32);
try testElemType(i48);
try testElemType(u48);
try testElemType(i64);
try testElemType(u64);
}
fn testElemType(comptime Elem: type) !void {
const min = std.math.minInt(Elem);
const max = std.math.maxInt(Elem);
var v: @Vector(4, Elem) = .{ 0, 1, 0, 1 };
v -|= .{ 0, 0, 1, 1 };
try expect(v[0] == 0);
try expect(v[1] == 1);
try expect(v[2] == @max(min, -1));
try expect(v[3] == 0);
v = .{ 0, max, 1, max };
v -|= .{ max, 0, max, 1 };
try expect(v[0] == @min(min + 1, 0));
try expect(v[1] == max);
try expect(v[2] == @min(min + 2, 0));
try expect(v[3] == max - 1);
v = .{ 1, max - 1, max / 2, max };
v -|= .{ max - 1, 1, max / 2, max };
try expect(v[0] == @min(min + 3, 0));
try expect(v[1] == max - 2);
try expect(v[2] == 0);
try expect(v[3] == 0);
switch (@typeInfo(Elem).int.signedness) {
.signed => {
v = .{ -1, -1, 0, -1 };
v -|= .{ -1, 0, 1, 1 };
try expect(v[0] == 0);
try expect(v[1] == -1);
try expect(v[2] == -1);
try expect(v[3] == -2);
v = .{ 0, min, -1, min };
v -|= .{ max, 0, max, 1 };
try expect(v[0] == min + 1);
try expect(v[1] == min);
try expect(v[2] == min);
try expect(v[3] == min);
v = .{ -1, min + 1, min / 2, min };
v -|= .{ max, 1, max / 2, max };
try expect(v[0] == min);
try expect(v[1] == min);
try expect(v[2] == min + 1);
try expect(v[3] == min);
},
.unsigned => {},
}
}
};
try S.doTheTest();