From 1a0a9d7d59b78e5d97142d867bece78fc477e9a7 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Wed, 10 Sep 2025 13:49:49 -0400 Subject: [PATCH 1/3] x86_64: fix strictness edge cases in `+|` Closes #25145 --- src/arch/x86_64/CodeGen.zig | 61 +++++++-------- test/behavior/vector.zig | 152 ++++++++++++++++++++++++++++++++++-- 2 files changed, 174 insertions(+), 39 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 1d3792d785..d5432ca856 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -7263,10 +7263,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .vp_d, .add, .dst0x, .src0x, .src1x, ._ }, .{ ._, .vp_d, .sra, .tmp2x, .src0x, .ui(31), ._ }, - .{ ._, .vp_d, .cmpgt, .tmp3x, .dst0x, .src0x, ._ }, + .{ ._, .vp_d, .cmpgt, .tmp3x, .src0x, .dst0x, ._ }, .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .lea(.tmp0x), ._ }, .{ ._, .vp_, .xor, .tmp3x, .tmp3x, .src1x, ._ }, - .{ ._, .v_ps, .blendv, .dst0x, .tmp2x, .dst0x, .tmp3x }, + .{ ._, .v_ps, .blendv, .dst0x, .dst0x, .tmp2x, .tmp3x }, } }, }, .{ .required_features = .{ .sse4_1, null, null, null }, @@ -7332,10 +7332,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, .{ ._, .vp_d, .add, .dst0y, .src0y, .src1y, ._ }, .{ ._, .vp_d, .sra, .tmp2y, .src0y, .ui(31), ._ }, - .{ ._, .vp_d, .cmpgt, .tmp3y, .dst0y, .src0y, ._ }, + .{ ._, .vp_d, .cmpgt, .tmp3y, .src0y, .dst0y, ._ }, .{ ._, .vp_, .xor, .tmp2y, .tmp2y, .lea(.tmp0y), ._ }, .{ ._, .vp_, .xor, .tmp3y, .tmp3y, .src1y, ._ }, - .{ ._, .v_ps, .blendv, .dst0y, .tmp2y, .dst0y, .tmp3y }, + .{ ._, .v_ps, .blendv, .dst0y, .dst0y, .tmp2y, .tmp3y }, } }, }, .{ .required_features = .{ .avx, null, null, null }, @@ -7615,10 +7615,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .tmp2x, ._ }, .{ ._, .vp_q, .add, .dst0x, .src0x, .src1x, ._ }, .{ ._, .vp_q, .cmpgt, .tmp2x, .tmp2x, .src0x, ._ }, - .{ ._, .vp_q, .cmpgt, .tmp3x, .dst0x, .src0x, ._ }, + .{ ._, .vp_q, .cmpgt, .tmp3x, .src0x, .dst0x, ._ }, .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .lea(.tmp0x), ._ }, .{ ._, .vp_, .xor, .tmp3x, .tmp3x, .src1x, ._ }, - .{ ._, .v_pd, .blendv, .dst0x, .tmp2x, .dst0x, .tmp3x }, + .{ ._, .v_pd, .blendv, .dst0x, .dst0x, .tmp2x, .tmp3x }, } }, }, .{ .required_features = .{ .sse4_2, null, null, null }, @@ -7685,10 +7685,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_, .xor, .tmp2y, .tmp2y, .tmp2y, ._ }, .{ ._, .vp_q, .add, .dst0y, .src0y, .src1y, ._ }, .{ ._, .vp_q, .cmpgt, .tmp2y, .tmp2y, .src0y, ._ }, - .{ ._, .vp_q, .cmpgt, .tmp3y, .dst0y, .src0y, ._ }, + .{ ._, .vp_q, .cmpgt, .tmp3y, .src0y, .dst0y, ._ }, .{ ._, .vp_, .xor, .tmp2y, .tmp2y, .lea(.tmp0y), ._ }, .{ ._, .vp_, .xor, .tmp3y, .tmp3y, .src1y, ._ }, - .{ ._, .v_pd, .blendv, .dst0y, .tmp2y, .dst0y, .tmp3y }, + .{ ._, .v_pd, .blendv, .dst0y, .dst0y, .tmp2y, .tmp3y }, } }, }, .{ .required_features = .{ .avx2, null, null, null }, @@ -7724,8 +7724,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_b, .blendv, .dst0x, .dst0x, .tmp2x, .tmp3x }, .{ ._, .vp_q, .cmpeq, .tmp3x, .tmp3x, .tmp3x, ._ }, .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .tmp3x, ._ }, - .{ ._, .vp_q, .cmpgt, .tmp3x, .dst0x, .tmp2x, ._ }, - .{ ._, .vp_b, .blendv, .dst0x, .dst0x, .tmp2x, .tmp3x }, + .{ ._, .vp_q, .cmpgt, .tmp3x, .tmp2x, .dst0x, ._ }, + .{ ._, .vp_b, .blendv, .dst0x, .tmp2x, .dst0x, .tmp3x }, } }, }, .{ .required_features = .{ .avx, null, null, null }, @@ -7761,8 +7761,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_b, .blendv, .dst0x, .dst0x, .tmp2x, .tmp3x }, .{ ._, .vp_q, .cmpeq, .tmp3x, .tmp3x, .tmp3x, ._ }, .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .tmp3x, ._ }, - .{ ._, .vp_q, .cmpgt, .tmp3x, .dst0x, .tmp2x, ._ }, - .{ ._, .vp_b, .blendv, .dst0x, .dst0x, .tmp2x, .tmp3x }, + .{ ._, .vp_q, .cmpgt, .tmp3x, .tmp2x, .dst0x, ._ }, + .{ ._, .vp_b, .blendv, .dst0x, .tmp2x, .dst0x, .tmp3x }, } }, }, .{ .required_features = .{ .sse4_2, null, null, null }, @@ -7837,8 +7837,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_b, .blendv, .dst0y, .dst0y, .tmp2y, .tmp3y }, .{ ._, .vp_q, .cmpeq, .tmp3y, .tmp3y, .tmp3y, ._ }, .{ ._, .vp_, .xor, .tmp2y, .tmp2y, .tmp3y, ._ }, - .{ ._, .vp_q, .cmpgt, .tmp3y, .dst0y, .tmp2y, ._ }, - .{ ._, .vp_b, .blendv, .dst0y, .dst0y, .tmp2y, .tmp3y }, + .{ ._, .vp_q, .cmpgt, .tmp3y, .tmp2y, .dst0y, ._ }, + .{ ._, .vp_b, .blendv, .dst0y, .tmp2y, .dst0y, .tmp3y }, } }, }, .{ .required_features = .{ .avx2, null, null, null }, @@ -10714,10 +10714,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .v_dqa, .mov, .tmp4y, .memia(.src1y, .tmp0, .add_unaligned_size), ._, ._ }, .{ ._, .vp_d, .add, .tmp5y, .tmp3y, .tmp4y, ._ }, .{ ._, .vp_d, .sra, .tmp6y, .tmp3y, .ui(31), ._ }, - .{ ._, .vp_d, .cmpgt, .tmp3y, .tmp5y, .tmp3y, ._ }, + .{ ._, .vp_d, .cmpgt, .tmp3y, .tmp3y, .tmp5y, ._ }, .{ ._, .vp_, .xor, .tmp6y, .tmp6y, .tmp2y, ._ }, .{ ._, .vp_, .xor, .tmp3y, .tmp3y, .tmp4y, ._ }, - .{ ._, .v_ps, .blendv, .tmp3y, .tmp6y, .tmp5y, .tmp3y }, + .{ ._, .v_ps, .blendv, .tmp3y, .tmp5y, .tmp6y, .tmp3y }, .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp3y, ._, ._ }, .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, @@ -10755,10 +10755,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .v_dqa, .mov, .tmp4x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, .{ ._, .vp_d, .add, .tmp5x, .tmp3x, .tmp4x, ._ }, .{ ._, .vp_d, .sra, .tmp6x, .tmp3x, .ui(31), ._ }, - .{ ._, .vp_d, .cmpgt, .tmp3x, .tmp5x, .tmp3x, ._ }, + .{ ._, .vp_d, .cmpgt, .tmp3x, .tmp3x, .tmp5x, ._ }, .{ ._, .vp_, .xor, .tmp6x, .tmp6x, .tmp2x, ._ }, .{ ._, .vp_, .xor, .tmp3x, .tmp3x, .tmp4x, ._ }, - .{ ._, .v_ps, .blendv, .tmp3x, .tmp6x, .tmp5x, .tmp3x }, + .{ ._, .v_ps, .blendv, .tmp3x, .tmp5x, .tmp6x, .tmp3x }, .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp3x, ._, ._ }, .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, @@ -11543,10 +11543,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_, .xor, .tmp5y, .tmp5y, .tmp5y, ._ }, .{ ._, .vp_q, .add, .tmp6y, .tmp3y, .tmp4y, ._ }, .{ ._, .vp_q, .cmpgt, .tmp5y, .tmp5y, .tmp3y, ._ }, - .{ ._, .vp_q, .cmpgt, .tmp3y, .tmp6y, .tmp3y, ._ }, + .{ ._, .vp_q, .cmpgt, .tmp3y, .tmp3y, .tmp6y, ._ }, .{ ._, .vp_, .xor, .tmp5y, .tmp5y, .tmp2y, ._ }, .{ ._, .vp_, .xor, .tmp3y, .tmp3y, .tmp4y, ._ }, - .{ ._, .v_pd, .blendv, .tmp5y, .tmp5y, .tmp6y, .tmp3y }, + .{ ._, .v_pd, .blendv, .tmp5y, .tmp6y, .tmp5y, .tmp3y }, .{ ._, .v_dqa, .mov, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp5y, ._, ._ }, .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, @@ -11585,10 +11585,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_, .xor, .tmp5x, .tmp5x, .tmp5x, ._ }, .{ ._, .vp_q, .add, .tmp6x, .tmp3x, .tmp4x, ._ }, .{ ._, .vp_q, .cmpgt, .tmp5x, .tmp5x, .tmp3x, ._ }, - .{ ._, .vp_q, .cmpgt, .tmp3x, .tmp6x, .tmp3x, ._ }, + .{ ._, .vp_q, .cmpgt, .tmp3x, .tmp3x, .tmp6x, ._ }, .{ ._, .vp_, .xor, .tmp5x, .tmp5x, .tmp2x, ._ }, .{ ._, .vp_, .xor, .tmp3x, .tmp3x, .tmp4x, ._ }, - .{ ._, .v_pd, .blendv, .tmp5x, .tmp5x, .tmp6x, .tmp3x }, + .{ ._, .v_pd, .blendv, .tmp5x, .tmp6x, .tmp5x, .tmp3x }, .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp5x, ._, ._ }, .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, @@ -11607,11 +11607,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .i64, .kind = .{ .smax_mem = .{ .ref = .src0, .vectorize_to = .none } } }, .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } }, - .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } }, - .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } }, - .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } }, - .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } }, .{ .type = .vector_2_i64, .kind = .{ .reg = .xmm0 } }, + .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } }, + .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } }, + .{ .type = .vector_2_i64, .kind = .{ .rc = .sse } }, + .unused, .unused, .unused, .unused, @@ -11628,12 +11628,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._dqa, .mov, .tmp6x, .tmp3x, ._, ._ }, .{ ._, .p_q, .add, .tmp6x, .tmp4x, ._, ._ }, .{ ._, .p_q, .cmpgt, .tmp5x, .tmp3x, ._, ._ }, - .{ ._, ._dqa, .mov, .tmp7x, .tmp6x, ._, ._ }, - .{ ._, .p_q, .cmpgt, .tmp7x, .tmp3x, ._, ._ }, + .{ ._, .p_q, .cmpgt, .tmp3x, .tmp6x, ._, ._ }, .{ ._, .p_, .xor, .tmp5x, .tmp2x, ._, ._ }, - .{ ._, .p_, .xor, .tmp7x, .tmp4x, ._, ._ }, - .{ ._, ._pd, .blendv, .tmp5x, .tmp6x, .tmp7x, ._ }, - .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp5x, ._, ._ }, + .{ ._, .p_, .xor, .tmp3x, .tmp4x, ._, ._ }, + .{ ._, ._pd, .blendv, .tmp6x, .tmp5x, .tmp3x, ._ }, + .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp6x, ._, ._ }, .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index 4de7c42d49..bf430f41bb 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -970,6 +970,73 @@ test "saturating add" { const expected = i8x3{ 127, 127, 127 }; try expect(mem.eql(i8, &@as([3]i8, expected), &@as([3]i8, result))); } + try testElemType(i4); + try testElemType(u4); + try testElemType(i8); + try testElemType(u8); + try testElemType(i12); + try testElemType(u12); + try testElemType(i16); + try testElemType(u16); + try testElemType(i24); + try testElemType(u24); + try testElemType(i32); + try testElemType(u32); + try testElemType(i48); + try testElemType(u48); + try testElemType(i64); + try testElemType(u64); + } + fn testElemType(comptime Elem: type) !void { + const min = std.math.minInt(Elem); + const max = std.math.maxInt(Elem); + + var v: @Vector(4, Elem) = .{ 0, 1, 0, 1 }; + v +|= .{ 0, 0, 1, 1 }; + try expect(v[0] == 0); + try expect(v[1] == 1); + try expect(v[2] == 1); + try expect(v[3] == 2); + + v = .{ 0, max, 1, max }; + v +|= .{ max, 0, max, 1 }; + try expect(v[0] == max); + try expect(v[1] == max); + try expect(v[2] == max); + try expect(v[3] == max); + + v = .{ 1, max - 1, max / 2, max }; + v +|= .{ max - 1, 1, max / 2, max }; + try expect(v[0] == max); + try expect(v[1] == max); + try expect(v[2] == max - 1); + try expect(v[3] == max); + + switch (@typeInfo(Elem).int.signedness) { + .signed => { + v = .{ -1, -1, 0, -1 }; + v +|= .{ 1, 0, -1, -1 }; + try expect(v[0] == 0); + try expect(v[1] == -1); + try expect(v[2] == -1); + try expect(v[3] == -2); + + v = .{ 0, min, -1, min }; + v +|= .{ min, 0, min, -1 }; + try expect(v[0] == min); + try expect(v[1] == min); + try expect(v[2] == min); + try expect(v[3] == min); + + v = .{ -1, min + 1, min / 2, min }; + v +|= .{ min + 1, -1, min / 2, min }; + try expect(v[0] == min); + try expect(v[1] == min); + try expect(v[2] == min); + try expect(v[3] == min); + }, + .unsigned => {}, + } } }; try S.doTheTest(); @@ -986,14 +1053,83 @@ test "saturating subtraction" { const S = struct { fn doTheTest() !void { - // Broken out to avoid https://github.com/ziglang/zig/issues/11251 - const u8x3 = @Vector(3, u8); - var lhs = u8x3{ 0, 0, 0 }; - var rhs = u8x3{ 255, 255, 255 }; - _ = .{ &lhs, &rhs }; - const result = lhs -| rhs; - const expected = u8x3{ 0, 0, 0 }; - try expect(mem.eql(u8, &@as([3]u8, expected), &@as([3]u8, result))); + { + // Broken out to avoid https://github.com/ziglang/zig/issues/11251 + const u8x3 = @Vector(3, u8); + var lhs = u8x3{ 0, 0, 0 }; + var rhs = u8x3{ 255, 255, 255 }; + _ = .{ &lhs, &rhs }; + const result = lhs -| rhs; + const expected = u8x3{ 0, 0, 0 }; + try expect(mem.eql(u8, &@as([3]u8, expected), &@as([3]u8, result))); + } + try testElemType(i4); + try testElemType(u4); + try testElemType(i8); + try testElemType(u8); + try testElemType(i12); + try testElemType(u12); + try testElemType(i16); + try testElemType(u16); + try testElemType(i24); + try testElemType(u24); + try testElemType(i32); + try testElemType(u32); + try testElemType(i48); + try testElemType(u48); + try testElemType(i64); + try testElemType(u64); + } + fn testElemType(comptime Elem: type) !void { + const min = std.math.minInt(Elem); + const max = std.math.maxInt(Elem); + + var v: @Vector(4, Elem) = .{ 0, 1, 0, 1 }; + v -|= .{ 0, 0, 1, 1 }; + try expect(v[0] == 0); + try expect(v[1] == 1); + try expect(v[2] == @max(min, -1)); + try expect(v[3] == 0); + + v = .{ 0, max, 1, max }; + v -|= .{ max, 0, max, 1 }; + try expect(v[0] == @min(min + 1, 0)); + try expect(v[1] == max); + try expect(v[2] == @min(min + 2, 0)); + try expect(v[3] == max - 1); + + v = .{ 1, max - 1, max / 2, max }; + v -|= .{ max - 1, 1, max / 2, max }; + try expect(v[0] == @min(min + 3, 0)); + try expect(v[1] == max - 2); + try expect(v[2] == 0); + try expect(v[3] == 0); + + switch (@typeInfo(Elem).int.signedness) { + .signed => { + v = .{ -1, -1, 0, -1 }; + v -|= .{ -1, 0, 1, 1 }; + try expect(v[0] == 0); + try expect(v[1] == -1); + try expect(v[2] == -1); + try expect(v[3] == -2); + + v = .{ 0, min, -1, min }; + v -|= .{ max, 0, max, 1 }; + try expect(v[0] == min + 1); + try expect(v[1] == min); + try expect(v[2] == min); + try expect(v[3] == min); + + v = .{ -1, min + 1, min / 2, min }; + v -|= .{ max, 1, max / 2, max }; + try expect(v[0] == min); + try expect(v[1] == min); + try expect(v[2] == min + 1); + try expect(v[3] == min); + }, + .unsigned => {}, + } } }; try S.doTheTest(); From e313b387a039fe4471462db0a63eea9b29c84a96 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Wed, 10 Sep 2025 22:25:55 -0400 Subject: [PATCH 2/3] x86_64: delete usages of avx2 `vpack?s??` This instruction actually has fairly useless semantics, and even the cases that were semantically correct could save 1 cycle of latency by using a different sequnce involving the avx version instead. Closes #25174 --- src/arch/x86_64/CodeGen.zig | 156 +++++++++++++++++++------ test/behavior/cast.zig | 222 +++++++++++++++++++++++++++++++++--- 2 files changed, 323 insertions(+), 55 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index d5432ca856..83c5129943 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -22283,8 +22283,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_w, .movsxb, .dst0y, .src0x, ._, ._ }, .{ ._, .vp_w, .movsxb, .tmp0y, .src1x, ._, ._ }, .{ ._, .vp_w, .mull, .dst0y, .dst0y, .tmp0y, ._ }, - .{ ._, .vp_b, .ackssw, .dst0y, .dst0y, .dst0y, ._ }, - .{ ._, .v_q, .perm, .dst0y, .dst0y, .ui(0b10_00_10_00), ._ }, + .{ ._, .v_i128, .extract, .tmp0x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_b, .ackssw, .dst0x, .dst0x, .tmp0x, ._ }, } }, }, .{ .required_features = .{ .avx, null, null, null }, @@ -22414,8 +22414,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_w, .movzxb, .dst0y, .src0x, ._, ._ }, .{ ._, .vp_w, .movzxb, .tmp0y, .src1x, ._, ._ }, .{ ._, .vp_w, .mull, .dst0y, .dst0y, .tmp0y, ._ }, - .{ ._, .vp_b, .ackusw, .dst0y, .dst0y, .dst0y, ._ }, - .{ ._, .v_q, .perm, .dst0y, .dst0y, .ui(0b10_00_10_00), ._ }, + .{ ._, .v_i128, .extract, .tmp0x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_b, .ackusw, .dst0x, .dst0x, .tmp0x, ._ }, } }, }, .{ .required_features = .{ .avx2, null, null, null }, @@ -22447,8 +22447,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .@"0:", .vp_w, .movsxb, .tmp1y, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, .{ ._, .vp_w, .movsxb, .tmp2y, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, .{ ._, .vp_w, .mull, .tmp1y, .tmp1y, .tmp2y, ._ }, - .{ ._, .vp_b, .ackssw, .tmp1y, .tmp1y, .tmp1y, ._ }, - .{ ._, .v_q, .perm, .tmp1y, .tmp1y, .ui(0b10_00_10_00), ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .tmp1y, .ui(1), ._ }, + .{ ._, .vp_b, .ackssw, .tmp1x, .tmp1x, .tmp2x, ._ }, .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, @@ -22659,8 +22659,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .@"0:", .vp_w, .movzxb, .tmp1y, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, .{ ._, .vp_w, .movzxb, .tmp2y, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, .{ ._, .vp_w, .mull, .tmp1y, .tmp1y, .tmp2y, ._ }, - .{ ._, .vp_b, .ackusw, .tmp1y, .tmp1y, .tmp1y, ._ }, - .{ ._, .v_q, .perm, .tmp1y, .tmp1y, .ui(0b10_00_10_00), ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .tmp1y, .ui(1), ._ }, + .{ ._, .vp_b, .ackusw, .tmp1x, .tmp1x, .tmp2x, ._ }, .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, @@ -82559,7 +82559,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, - .unused, + .{ .kind = .{ .rc = .sse } }, .unused, .unused, .unused, @@ -82576,8 +82576,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, - .{ ._, .vp_b, .ackssw, .tmp3y, .tmp3y, .tmp3y, ._ }, - .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, + .{ ._, .v_i128, .extract, .tmp4x, .tmp3y, .ui(1), ._ }, + .{ ._, .vp_b, .ackssw, .tmp3x, .tmp3x, .tmp4x, ._ }, + .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.tmp1, 2), ._, ._ }, .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, @@ -82588,8 +82589,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, - .{ ._, .vp_b, .ackssw, .tmp3y, .tmp3y, .tmp3y, ._ }, - .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, + .{ ._, .v_i128, .extract, .tmp4x, .tmp3y, .ui(1), ._ }, + .{ ._, .vp_b, .ackssw, .tmp3x, .tmp3x, .tmp4x, ._ }, + .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, .{ ._, ._, .not, .tmp2d, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.tmp1, 2), ._, ._ }, @@ -90324,7 +90326,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .{ .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .word } }, .any, .any }, - .dst_constraints = .{ .{ .scalar_int = .{ .of = .qword, .is = .byte } }, .any }, + .dst_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .qword, .is = .byte } }, .any }, .patterns = &.{ .{ .src = .{ .to_sse, .none, .none } }, }, @@ -90346,7 +90348,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .{ .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .word } }, .any, .any }, - .dst_constraints = .{ .{ .scalar_int = .{ .of = .qword, .is = .byte } }, .any }, + .dst_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .qword, .is = .byte } }, .any }, .patterns = &.{ .{ .src = .{ .to_mut_sse, .none, .none } }, }, @@ -90361,20 +90363,46 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .patterns = &.{ .{ .src = .{ .to_sse, .none, .none } }, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ - .{ ._, .vp_b, .ackssw, .dst0y, .src0y, .dst0y, ._ }, + .{ ._, .v_i128, .extract, .dst0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_b, .ackssw, .dst0x, .src0x, .dst0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .word } }, .any, .any }, + .dst_constraints = .{ .{ .scalar_signed_int = .{ .of = .xword, .is = .byte } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_f128, .extract, .dst0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_b, .ackssw, .dst0x, .src0x, .dst0x, ._ }, } }, }, .{ .required_features = .{ .avx2, null, null, null }, .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .word } }, .any, .any }, - .dst_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .byte } }, .any }, + .dst_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .byte } }, .any }, .patterns = &.{ .{ .src = .{ .to_sse, .none, .none } }, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ - .{ ._, .vp_b, .ackusw, .dst0y, .src0y, .dst0y, ._ }, + .{ ._, .v_i128, .extract, .dst0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_b, .ackusw, .dst0x, .src0x, .dst0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .word } }, .any, .any }, + .dst_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .byte } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_f128, .extract, .dst0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_b, .ackusw, .dst0x, .src0x, .dst0x, ._ }, } }, }, .{ .required_features = .{ .slow_incdec, null, null, null }, @@ -90448,7 +90476,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .{ .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any }, - .dst_constraints = .{ .{ .scalar_int = .{ .of = .dword, .is = .byte } }, .any }, + .dst_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .dword, .is = .byte } }, .any }, .patterns = &.{ .{ .src = .{ .to_sse, .none, .none } }, }, @@ -90472,7 +90500,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .{ .required_features = .{ .sse4_1, null, null, null }, .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any }, - .dst_constraints = .{ .{ .scalar_int = .{ .of = .dword, .is = .byte } }, .any }, + .dst_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .dword, .is = .byte } }, .any }, .patterns = &.{ .{ .src = .{ .to_mut_sse, .none, .none } }, }, @@ -90488,22 +90516,50 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .patterns = &.{ .{ .src = .{ .to_sse, .none, .none } }, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ - .{ ._, .vp_w, .ackssd, .dst0y, .src0y, .dst0y, ._ }, - .{ ._, .vp_b, .ackssw, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .v_i128, .extract, .dst0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_w, .ackssd, .dst0x, .src0x, .dst0x, ._ }, + .{ ._, .vp_b, .ackssw, .dst0x, .dst0x, .dst0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any }, + .dst_constraints = .{ .{ .scalar_signed_int = .{ .of = .qword, .is = .byte } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_f128, .extract, .dst0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_w, .ackssd, .dst0x, .src0x, .dst0x, ._ }, + .{ ._, .vp_b, .ackssw, .dst0x, .dst0x, .dst0x, ._ }, } }, }, .{ .required_features = .{ .avx2, null, null, null }, .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any }, - .dst_constraints = .{ .{ .scalar_int = .{ .of = .qword, .is = .byte } }, .any }, + .dst_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .qword, .is = .byte } }, .any }, .patterns = &.{ .{ .src = .{ .to_sse, .none, .none } }, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ - .{ ._, .vp_w, .ackusd, .dst0y, .src0y, .dst0y, ._ }, - .{ ._, .vp_b, .ackusw, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .v_i128, .extract, .dst0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_w, .ackusd, .dst0x, .src0x, .dst0x, ._ }, + .{ ._, .vp_b, .ackusw, .dst0x, .dst0x, .dst0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any }, + .dst_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .qword, .is = .byte } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_f128, .extract, .dst0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_w, .ackusd, .dst0x, .src0x, .dst0x, ._ }, + .{ ._, .vp_b, .ackusw, .dst0x, .dst0x, .dst0x, ._ }, } }, }, .{ .required_features = .{ .slow_incdec, null, null, null }, @@ -90722,7 +90778,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .{ .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any }, - .dst_constraints = .{ .{ .scalar_int = .{ .of = .qword, .is = .word } }, .any }, + .dst_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .qword, .is = .word } }, .any }, .patterns = &.{ .{ .src = .{ .to_sse, .none, .none } }, }, @@ -90744,7 +90800,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .{ .required_features = .{ .sse4_1, null, null, null }, .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any }, - .dst_constraints = .{ .{ .scalar_int = .{ .of = .qword, .is = .word } }, .any }, + .dst_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .qword, .is = .word } }, .any }, .patterns = &.{ .{ .src = .{ .to_mut_sse, .none, .none } }, }, @@ -90759,20 +90815,46 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .patterns = &.{ .{ .src = .{ .to_sse, .none, .none } }, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ - .{ ._, .vp_w, .ackssd, .dst0y, .src0y, .dst0y, ._ }, + .{ ._, .v_i128, .extract, .dst0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_w, .ackssd, .dst0x, .src0x, .dst0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any }, + .dst_constraints = .{ .{ .scalar_signed_int = .{ .of = .xword, .is = .word } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_f128, .extract, .dst0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_w, .ackssd, .dst0x, .src0x, .dst0x, ._ }, } }, }, .{ .required_features = .{ .avx2, null, null, null }, .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any }, - .dst_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .word } }, .any }, + .dst_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } }, .any }, .patterns = &.{ .{ .src = .{ .to_sse, .none, .none } }, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, .each = .{ .once = &.{ - .{ ._, .vp_w, .ackusd, .dst0y, .src0y, .dst0y, ._ }, + .{ ._, .v_i128, .extract, .dst0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_w, .ackusd, .dst0x, .src0x, .dst0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any }, + .dst_constraints = .{ .{ .scalar_unsigned_int = .{ .of = .xword, .is = .word } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_f128, .extract, .dst0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_w, .ackusd, .dst0x, .src0x, .dst0x, ._ }, } }, }, .{ .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .dword, .is = .dword } }, .any, .any }, @@ -92413,7 +92495,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", .vp_d, .movzxb, .tmp1y, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", .vp_d, .movzxb, .tmp1y, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._ }, .{ ._, .v_dqa, .mov, .memsia(.dst0y, .@"4", .tmp0, .add_unaligned_size), .tmp1y, ._, ._ }, .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, diff --git a/test/behavior/cast.zig b/test/behavior/cast.zig index 700bfb0991..a04e94451e 100644 --- a/test/behavior/cast.zig +++ b/test/behavior/cast.zig @@ -607,26 +607,212 @@ test "@intCast on vector" { const S = struct { fn doTheTest() !void { - // Upcast (implicit, equivalent to @intCast) - var up0: @Vector(2, u8) = [_]u8{ 0x55, 0xaa }; - _ = &up0; - const up1: @Vector(2, u16) = up0; - const up2: @Vector(2, u32) = up0; - const up3: @Vector(2, u64) = up0; - // Downcast (safety-checked) - var down0 = up3; - _ = &down0; - const down1: @Vector(2, u32) = @intCast(down0); - const down2: @Vector(2, u16) = @intCast(down0); - const down3: @Vector(2, u8) = @intCast(down0); + { + // Upcast (implicit, equivalent to @intCast) + var up0: @Vector(2, u8) = .{ 0x55, 0xaa }; + _ = &up0; + const up1: @Vector(2, u16) = up0; + const up2: @Vector(2, u32) = up0; + const up3: @Vector(2, u64) = up0; - try expect(mem.eql(u16, &@as([2]u16, up1), &[2]u16{ 0x55, 0xaa })); - try expect(mem.eql(u32, &@as([2]u32, up2), &[2]u32{ 0x55, 0xaa })); - try expect(mem.eql(u64, &@as([2]u64, up3), &[2]u64{ 0x55, 0xaa })); + try expect(mem.eql(u16, &@as([2]u16, up1), &[2]u16{ 0x55, 0xaa })); + try expect(mem.eql(u32, &@as([2]u32, up2), &[2]u32{ 0x55, 0xaa })); + try expect(mem.eql(u64, &@as([2]u64, up3), &[2]u64{ 0x55, 0xaa })); - try expect(mem.eql(u32, &@as([2]u32, down1), &[2]u32{ 0x55, 0xaa })); - try expect(mem.eql(u16, &@as([2]u16, down2), &[2]u16{ 0x55, 0xaa })); - try expect(mem.eql(u8, &@as([2]u8, down3), &[2]u8{ 0x55, 0xaa })); + { + // Downcast (safety-checked) + const down2: @Vector(2, u32) = @intCast(up3); + const down1: @Vector(2, u16) = @intCast(up3); + const down0: @Vector(2, u8) = @intCast(up3); + + try expect(mem.eql(u32, &@as([2]u32, down2), &[2]u32{ 0x55, 0xaa })); + try expect(mem.eql(u16, &@as([2]u16, down1), &[2]u16{ 0x55, 0xaa })); + try expect(mem.eql(u8, &@as([2]u8, down0), &[2]u8{ 0x55, 0xaa })); + } + + { + // Downcast (safety-checked) + const down1: @Vector(2, u16) = @intCast(up2); + const down0: @Vector(2, u8) = @intCast(up2); + + try expect(mem.eql(u16, &@as([2]u16, down1), &[2]u16{ 0x55, 0xaa })); + try expect(mem.eql(u8, &@as([2]u8, down0), &[2]u8{ 0x55, 0xaa })); + } + + { + // Downcast (safety-checked) + const down0: @Vector(2, u8) = @intCast(up1); + + try expect(mem.eql(u8, &@as([2]u8, down0), &[2]u8{ 0x55, 0xaa })); + } + } + { + // Upcast (implicit, equivalent to @intCast) + var up0: @Vector(4, u8) = .{ 0x00, 0x55, 0xaa, 0xff }; + _ = &up0; + const up1: @Vector(4, u16) = up0; + const up2: @Vector(4, u32) = up0; + const up3: @Vector(4, u64) = up0; + + try expect(mem.eql(u16, &@as([4]u16, up1), &[4]u16{ 0x00, 0x55, 0xaa, 0xff })); + try expect(mem.eql(u32, &@as([4]u32, up2), &[4]u32{ 0x00, 0x55, 0xaa, 0xff })); + try expect(mem.eql(u64, &@as([4]u64, up3), &[4]u64{ 0x00, 0x55, 0xaa, 0xff })); + + { + // Downcast (safety-checked) + const down2: @Vector(4, u32) = @intCast(up3); + const down1: @Vector(4, u16) = @intCast(up3); + const down0: @Vector(4, u8) = @intCast(up3); + + try expect(mem.eql(u32, &@as([4]u32, down2), &[4]u32{ 0x00, 0x55, 0xaa, 0xff })); + try expect(mem.eql(u16, &@as([4]u16, down1), &[4]u16{ 0x00, 0x55, 0xaa, 0xff })); + try expect(mem.eql(u8, &@as([4]u8, down0), &[4]u8{ 0x00, 0x55, 0xaa, 0xff })); + } + + { + // Downcast (safety-checked) + const down1: @Vector(4, u16) = @intCast(up2); + const down0: @Vector(4, u8) = @intCast(up2); + + try expect(mem.eql(u16, &@as([4]u16, down1), &[4]u16{ 0x00, 0x55, 0xaa, 0xff })); + try expect(mem.eql(u8, &@as([4]u8, down0), &[4]u8{ 0x00, 0x55, 0xaa, 0xff })); + } + + { + // Downcast (safety-checked) + const down0: @Vector(4, u8) = @intCast(up1); + + try expect(mem.eql(u8, &@as([4]u8, down0), &[4]u8{ 0x00, 0x55, 0xaa, 0xff })); + } + } + { + // Upcast (implicit, equivalent to @intCast) + var up0: @Vector(8, u8) = .{ + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, + }; + _ = &up0; + const up1: @Vector(8, u16) = up0; + const up2: @Vector(8, u32) = up0; + const up3: @Vector(8, u64) = up0; + + try expect(mem.eql(u16, &@as([8]u16, up1), &[8]u16{ + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, + })); + try expect(mem.eql(u32, &@as([8]u32, up2), &[8]u32{ + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, + })); + try expect(mem.eql(u64, &@as([8]u64, up3), &[8]u64{ + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, + })); + + { + // Downcast (safety-checked) + const down2: @Vector(8, u32) = @intCast(up3); + const down1: @Vector(8, u16) = @intCast(up3); + const down0: @Vector(8, u8) = @intCast(up3); + + try expect(mem.eql(u32, &@as([8]u32, down2), &[8]u32{ + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, + })); + try expect(mem.eql(u16, &@as([8]u16, down1), &[8]u16{ + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, + })); + try expect(mem.eql(u8, &@as([8]u8, down0), &[8]u8{ + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, + })); + } + + { + // Downcast (safety-checked) + const down1: @Vector(8, u16) = @intCast(up2); + const down0: @Vector(8, u8) = @intCast(up2); + + try expect(mem.eql(u16, &@as([8]u16, down1), &[8]u16{ + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, + })); + try expect(mem.eql(u8, &@as([8]u8, down0), &[8]u8{ + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, + })); + } + + { + // Downcast (safety-checked) + const down0: @Vector(8, u8) = @intCast(up1); + + try expect(mem.eql(u8, &@as([8]u8, down0), &[8]u8{ + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, + })); + } + } + { + // Upcast (implicit, equivalent to @intCast) + var up0: @Vector(16, u8) = .{ + 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, + }; + _ = &up0; + const up1: @Vector(16, u16) = up0; + const up2: @Vector(16, u32) = up0; + const up3: @Vector(16, u64) = up0; + + try expect(mem.eql(u16, &@as([16]u16, up1), &[16]u16{ + 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, + })); + try expect(mem.eql(u32, &@as([16]u32, up2), &[16]u32{ + 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, + })); + try expect(mem.eql(u64, &@as([16]u64, up3), &[16]u64{ + 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, + })); + + { + // Downcast (safety-checked) + const down2: @Vector(16, u32) = @intCast(up3); + const down1: @Vector(16, u16) = @intCast(up3); + const down0: @Vector(16, u8) = @intCast(up3); + + try expect(mem.eql(u32, &@as([16]u32, down2), &[16]u32{ + 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, + })); + try expect(mem.eql(u16, &@as([16]u16, down1), &[16]u16{ + 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, + })); + try expect(mem.eql(u8, &@as([16]u8, down0), &[16]u8{ + 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, + })); + } + + { + // Downcast (safety-checked) + const down1: @Vector(16, u16) = @intCast(up2); + const down0: @Vector(16, u8) = @intCast(up2); + + try expect(mem.eql(u16, &@as([16]u16, down1), &[16]u16{ + 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, + })); + try expect(mem.eql(u8, &@as([16]u8, down0), &[16]u8{ + 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, + })); + } + + { + // Downcast (safety-checked) + const down0: @Vector(16, u8) = @intCast(up1); + + try expect(mem.eql(u8, &@as([16]u8, down0), &[16]u8{ + 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, + })); + } + } } }; From a2ba7dd1c2c3b2be88b9d3c30ff2735c87a3d6d3 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Wed, 10 Sep 2025 23:13:34 -0400 Subject: [PATCH 3/3] x86_64: fix `@splat` typo --- src/arch/x86_64/CodeGen.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 83c5129943..3222aea55e 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -165321,7 +165321,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-16, .dst0, .add_size), ._, ._ }, - .{ ._, .v_ps, .shuf, .tmp1x, .tmp1x, .src0x, .ui(0b00_00_00_00) }, + .{ ._, .v_ps, .shuf, .tmp1x, .src0x, .src0x, .ui(0b00_00_00_00) }, .{ .@"0:", .v_ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._nb, .j, .@"0b", ._, ._, ._ },