From 1f6f8b0ffe33696ce980955cceacd98171d7ea0c Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Wed, 21 May 2025 12:15:03 -0400 Subject: [PATCH] x86_64: implement integer `@reduce(.Add)` --- lib/std/crypto/Certificate.zig | 32 +- lib/std/http/Client.zig | 13 +- lib/std/http/HeadParser.zig | 33 +- lib/std/zon/parse.zig | 1 - src/arch/x86_64/CodeGen.zig | 3094 ++++++++++++++++++++++++++- test/behavior/array.zig | 1 - test/behavior/cast.zig | 1 - test/behavior/packed-struct.zig | 1 - test/behavior/sizeof_and_typeof.zig | 1 - test/behavior/slice.zig | 4 - test/behavior/struct.zig | 4 - test/behavior/try.zig | 2 - test/behavior/undefined.zig | 1 - test/behavior/union.zig | 2 - test/behavior/vector.zig | 2 - test/behavior/x86_64/unary.zig | 8 + 16 files changed, 3088 insertions(+), 112 deletions(-) diff --git a/lib/std/crypto/Certificate.zig b/lib/std/crypto/Certificate.zig index f4e7900dae..48089a7986 100644 --- a/lib/std/crypto/Certificate.zig +++ b/lib/std/crypto/Certificate.zig @@ -607,7 +607,7 @@ const Date = struct { while (month < date.month) : (month += 1) { const days: u64 = std.time.epoch.getDaysInMonth( date.year, - @as(std.time.epoch.Month, @enumFromInt(month)), + @enumFromInt(month), ); sec += days * std.time.epoch.secs_per_day; } @@ -623,15 +623,13 @@ const Date = struct { }; pub fn parseTimeDigits(text: *const [2]u8, min: u8, max: u8) !u8 { - const result = if (use_vectors) result: { - const nn: @Vector(2, u16) = .{ text[0], text[1] }; - const zero: @Vector(2, u16) = .{ '0', '0' }; - const mm: @Vector(2, u16) = .{ 10, 1 }; - break :result @reduce(.Add, (nn -% zero) *% mm); - } else std.fmt.parseInt(u8, text, 10) catch return error.CertificateTimeInvalid; + const nn: @Vector(2, u16) = .{ text[0], text[1] }; + const zero: @Vector(2, u16) = .{ '0', '0' }; + const mm: @Vector(2, u16) = .{ 10, 1 }; + const result = @reduce(.Add, (nn -% zero) *% mm); if (result < min) return error.CertificateTimeInvalid; if (result > max) return error.CertificateTimeInvalid; - return @truncate(result); + return @intCast(result); } test parseTimeDigits { @@ -647,14 +645,12 @@ test parseTimeDigits { } pub fn parseYear4(text: *const [4]u8) !u16 { - const result = if (use_vectors) result: { - const nnnn: @Vector(4, u32) = .{ text[0], text[1], text[2], text[3] }; - const zero: @Vector(4, u32) = .{ '0', '0', '0', '0' }; - const mmmm: @Vector(4, u32) = .{ 1000, 100, 10, 1 }; - break :result @reduce(.Add, (nnnn -% zero) *% mmmm); - } else std.fmt.parseInt(u16, text, 10) catch return error.CertificateTimeInvalid; + const nnnn: @Vector(4, u32) = .{ text[0], text[1], text[2], text[3] }; + const zero: @Vector(4, u32) = .{ '0', '0', '0', '0' }; + const mmmm: @Vector(4, u32) = .{ 1000, 100, 10, 1 }; + const result = @reduce(.Add, (nnnn -% zero) *% mmmm); if (result > 9999) return error.CertificateTimeInvalid; - return @truncate(result); + return @intCast(result); } test parseYear4 { @@ -858,7 +854,7 @@ pub const der = struct { pub fn parse(bytes: []const u8, index: u32) Element.ParseError!Element { var i = index; - const identifier = @as(Identifier, @bitCast(bytes[i])); + const identifier: Identifier = @bitCast(bytes[i]); i += 1; const size_byte = bytes[i]; i += 1; @@ -872,7 +868,7 @@ pub const der = struct { }; } - const len_size = @as(u7, @truncate(size_byte)); + const len_size: u7 = @truncate(size_byte); if (len_size > @sizeOf(u32)) { return error.CertificateFieldHasInvalidLength; } @@ -1244,5 +1240,3 @@ pub const rsa = struct { return res; } }; - -const use_vectors = @import("builtin").zig_backend != .stage2_x86_64; diff --git a/lib/std/http/Client.zig b/lib/std/http/Client.zig index c4826aa096..d36cd10aee 100644 --- a/lib/std/http/Client.zig +++ b/lib/std/http/Client.zig @@ -13,7 +13,6 @@ const net = std.net; const Uri = std.Uri; const Allocator = mem.Allocator; const assert = std.debug.assert; -const use_vectors = builtin.zig_backend != .stage2_x86_64; const Client = @This(); const proto = @import("protocol.zig"); @@ -594,13 +593,10 @@ pub const Response = struct { } fn parseInt3(text: *const [3]u8) u10 { - if (use_vectors) { - const nnn: @Vector(3, u8) = text.*; - const zero: @Vector(3, u8) = .{ '0', '0', '0' }; - const mmm: @Vector(3, u10) = .{ 100, 10, 1 }; - return @reduce(.Add, @as(@Vector(3, u10), nnn -% zero) *% mmm); - } - return std.fmt.parseInt(u10, text, 10) catch unreachable; + const nnn: @Vector(3, u8) = text.*; + const zero: @Vector(3, u8) = .{ '0', '0', '0' }; + const mmm: @Vector(3, u10) = .{ 100, 10, 1 }; + return @reduce(.Add, (nnn -% zero) *% mmm); } test parseInt3 { @@ -1796,5 +1792,6 @@ pub fn fetch(client: *Client, options: FetchOptions) !FetchResult { } test { + _ = Response; _ = &initDefaultProxies; } diff --git a/lib/std/http/HeadParser.zig b/lib/std/http/HeadParser.zig index bb49faa14b..7b9ca6d2c5 100644 --- a/lib/std/http/HeadParser.zig +++ b/lib/std/http/HeadParser.zig @@ -109,27 +109,21 @@ pub fn feed(p: *HeadParser, bytes: []const u8) usize { continue; }, else => { + const Vector = @Vector(vector_len, u8); + // const BoolVector = @Vector(vector_len, bool); + const BitVector = @Vector(vector_len, u1); + const SizeVector = @Vector(vector_len, u8); + const chunk = bytes[index..][0..vector_len]; - const matches = if (use_vectors) matches: { - const Vector = @Vector(vector_len, u8); - // const BoolVector = @Vector(vector_len, bool); - const BitVector = @Vector(vector_len, u1); - const SizeVector = @Vector(vector_len, u8); + const v: Vector = chunk.*; + // depends on https://github.com/ziglang/zig/issues/19755 + // const matches_r: BitVector = @bitCast(v == @as(Vector, @splat('\r'))); + // const matches_n: BitVector = @bitCast(v == @as(Vector, @splat('\n'))); + const matches_r: BitVector = @select(u1, v == @as(Vector, @splat('\r')), @as(Vector, @splat(1)), @as(Vector, @splat(0))); + const matches_n: BitVector = @select(u1, v == @as(Vector, @splat('\n')), @as(Vector, @splat(1)), @as(Vector, @splat(0))); + const matches_or: SizeVector = matches_r | matches_n; - const v: Vector = chunk.*; - const matches_r: BitVector = @bitCast(v == @as(Vector, @splat('\r'))); - const matches_n: BitVector = @bitCast(v == @as(Vector, @splat('\n'))); - const matches_or: SizeVector = matches_r | matches_n; - - break :matches @reduce(.Add, matches_or); - } else matches: { - var matches: u8 = 0; - for (chunk) |byte| switch (byte) { - '\r', '\n' => matches += 1, - else => {}, - }; - break :matches matches; - }; + const matches = @reduce(.Add, matches_or); switch (matches) { 0 => {}, 1 => switch (chunk[vector_len - 1]) { @@ -357,7 +351,6 @@ inline fn intShift(comptime T: type, x: anytype) T { const HeadParser = @This(); const std = @import("std"); -const use_vectors = builtin.zig_backend != .stage2_x86_64; const builtin = @import("builtin"); test feed { diff --git a/lib/std/zon/parse.zig b/lib/std/zon/parse.zig index 849aa83f86..08447f08f3 100644 --- a/lib/std/zon/parse.zig +++ b/lib/std/zon/parse.zig @@ -3091,7 +3091,6 @@ test "std.zon free on error" { test "std.zon vector" { if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/15330 - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/15329 const gpa = std.testing.allocator; diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 8af58f1622..b37a62f149 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2389,7 +2389,7 @@ fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { - @setEvalBranchQuota(22_000); + @setEvalBranchQuota(22_500); const pt = cg.pt; const zcu = pt.zcu; const ip = &zcu.intern_pool; @@ -4328,10 +4328,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { else => unreachable, .add, .add_optimized => {}, .add_wrap => res[0].wrapInt(cg) catch |err| switch (err) { - error.SelectFailed => return cg.fail("failed to select wrap {} {} {}", .{ + error.SelectFailed => return cg.fail("failed to select {s} wrap {} {}", .{ + @tagName(air_tag), cg.typeOf(bin_op.lhs).fmt(pt), - ops[0].tracking(cg), - ops[1].tracking(cg), + res[0].tracking(cg), }), else => |e| return e, }, @@ -14777,10 +14777,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { else => unreachable, .sub, .sub_optimized => {}, .sub_wrap => res[0].wrapInt(cg) catch |err| switch (err) { - error.SelectFailed => return cg.fail("failed to select wrap {} {} {}", .{ + error.SelectFailed => return cg.fail("failed to select {s} wrap {} {}", .{ + @tagName(air_tag), cg.typeOf(bin_op.lhs).fmt(pt), - ops[0].tracking(cg), - ops[1].tracking(cg), + res[0].tracking(cg), }), else => |e| return e, }, @@ -26979,10 +26979,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { else => |e| return e, }; res[0].wrapInt(cg) catch |err| switch (err) { - error.SelectFailed => return cg.fail("failed to select wrap {} {} {}", .{ + error.SelectFailed => return cg.fail("failed to select {s} wrap {} {}", .{ + @tagName(air_tag), cg.typeOf(bin_op.lhs).fmt(pt), - ops[0].tracking(cg), - ops[1].tracking(cg), + res[0].tracking(cg), }), else => |e| return e, }; @@ -58664,11 +58664,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { switch (air_tag) { else => unreachable, .shl => res[0].wrapInt(cg) catch |err| switch (err) { - error.SelectFailed => return cg.fail("failed to select wrap {} {} {} {}", .{ + error.SelectFailed => return cg.fail("failed to select {s} wrap {} {}", .{ + @tagName(air_tag), cg.typeOf(bin_op.lhs).fmt(pt), - cg.typeOf(bin_op.rhs).fmt(pt), - ops[0].tracking(cg), - ops[1].tracking(cg), + res[0].tracking(cg), }), else => |e| return e, }, @@ -58841,10 +58840,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { else => |e| return e, }, } - if (ops[1].index != res[0].index) { - try ops[1].die(cg); - ops[1] = res[0]; - } else cg.temp_type[@intFromEnum(res[0].unwrap(cg).temp)] = .i32; + if (ops[1].index != res[0].index) try ops[1].die(cg); + ops[1] = res[0]; cg.select(&res, &.{lhs_ty}, &ops, comptime &.{ .{ .required_features = .{ .cmov, null, null, null }, .src_constraints = .{ .{ .signed_int = .byte }, .{ .exact_signed_int = 32 }, .any }, @@ -116833,14 +116830,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .reduce => |air_tag| if (use_old) try cg.airReduce(inst) else fallback: { const reduce = air_datas[@intFromEnum(inst)].reduce; - const ty = cg.typeOfIndex(inst); + const res_ty = cg.typeOfIndex(inst); switch (reduce.operation) { .And, .Or, .Xor => {}, - .Min, .Max, .Add, .Mul => break :fallback try cg.airReduce(inst), + .Min, .Max => break :fallback try cg.airReduce(inst), + .Add => if (cg.floatBits(res_ty)) |_| break :fallback try cg.airReduce(inst), + .Mul => break :fallback try cg.airReduce(inst), } var ops = try cg.tempsFromOperands(inst, .{reduce.operand}); var res: [1]Temp = undefined; - cg.select(&res, &.{ty}, &ops, switch (reduce.operation) { + cg.select(&res, &.{res_ty}, &ops, switch (reduce.operation) { .And, .Or, .Xor => switch (@as(Mir.Inst.Tag, switch (reduce.operation) { else => unreachable, .And => .@"and", @@ -119123,7 +119122,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, .{ ._, .v_f128, .extract, .tmp1x, .dst0y, .ui(1), ._ }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp1x, ._ }, - .{ ._, .v_ps, .shuf, .tmp1x, .dst0x, .dst0x, .ui(0b11_10_11_10) }, + .{ ._, .v_ps, .movhl, .tmp1x, .dst0x, .dst0x, ._ }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp1x, ._ }, .{ ._, .v_ps, .shuf, .tmp1x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp1x, ._ }, @@ -119322,7 +119321,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp2x, ._ }, - .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b11_10_11_10) }, + .{ ._, .v_ps, .movhl, .tmp2x, .dst0x, .dst0x, ._ }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp2x, ._ }, .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp2x, ._ }, @@ -120063,7 +120062,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, .{ ._, .v_f128, .extract, .tmp1x, .dst0y, .ui(1), ._ }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp1x, ._ }, - .{ ._, .v_ps, .shuf, .tmp1x, .dst0x, .dst0x, .ui(0b11_10_11_10) }, + .{ ._, .v_ps, .movhl, .tmp1x, .dst0x, .dst0x, ._ }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp1x, ._ }, .{ ._, .v_ps, .shuf, .tmp1x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp1x, ._ }, @@ -120253,7 +120252,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp2x, ._ }, - .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b11_10_11_10) }, + .{ ._, .v_ps, .movhl, .tmp2x, .dst0x, .dst0x, ._ }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp2x, ._ }, .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp2x, ._ }, @@ -120800,7 +120799,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, .{ ._, .v_f128, .extract, .tmp1x, .dst0y, .ui(1), ._ }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp1x, ._ }, - .{ ._, .v_ps, .shuf, .tmp1x, .dst0x, .dst0x, .ui(0b11_10_11_10) }, + .{ ._, .v_ps, .movhl, .tmp1x, .dst0x, .dst0x, ._ }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp1x, ._ }, .{ ._, .v_ps, .shuf, .tmp1x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp1x, ._ }, @@ -120981,7 +120980,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp2x, ._ }, - .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b11_10_11_10) }, + .{ ._, .v_ps, .movhl, .tmp2x, .dst0x, .dst0x, ._ }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp2x, ._ }, .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b01_01_01_01) }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp2x, ._ }, @@ -121376,7 +121375,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, .{ ._, .v_f128, .extract, .tmp1x, .dst0y, .ui(1), ._ }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp1x, ._ }, - .{ ._, .v_ps, .shuf, .tmp1x, .dst0x, .dst0x, .ui(0b11_10_11_10) }, + .{ ._, .v_ps, .movhl, .tmp1x, .dst0x, .dst0x, ._ }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp1x, ._ }, } }, }, .{ @@ -121549,7 +121548,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp2x, ._ }, - .{ ._, .v_ps, .shuf, .tmp2x, .dst0x, .dst0x, .ui(0b11_10_11_10) }, + .{ ._, .v_ps, .movhl, .tmp2x, .dst0x, .dst0x, ._ }, .{ ._, .v_ps, mir_tag, .dst0x, .dst0x, .tmp2x, ._ }, } }, }, .{ @@ -122278,7 +122277,2998 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, } }, }, - .Min, .Max, .Add, .Mul => unreachable, + .Min, .Max => unreachable, + .Add => comptime &.{ .{ + .required_features = .{ .sse, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .mut_gpr, .none, .none } }, + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{} }, + }, .{ + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{} }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .word, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vp_w, .srl, .tmp0x, .src0x, .ui(8), ._ }, + .{ ._, .vp_b, .add, .dst0x, .src0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .word, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._dqa, .mov, .tmp0x, .src0x, ._, ._ }, + .{ ._, .p_w, .srl, .tmp0x, .ui(8), ._, ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .dword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vp_d, .srl, .tmp0x, .src0x, .ui(16), ._ }, + .{ ._, .vp_b, .add, .dst0x, .src0x, .tmp0x, ._ }, + .{ ._, .vp_w, .srl, .tmp0x, .dst0x, .ui(8), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .dword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._dqa, .mov, .tmp0x, .src0x, ._, ._ }, + .{ ._, .p_d, .srl, .tmp0x, .ui(16), ._, ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp0x, ._, ._ }, + .{ ._, ._dqa, .mov, .tmp0x, .dst0x, ._, ._ }, + .{ ._, .p_w, .srl, .tmp0x, .ui(8), ._, ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .dword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vp_d, .srl, .tmp0x, .src0x, .ui(16), ._ }, + .{ ._, .vp_b, .add, .dst0x, .src0x, .tmp0x, ._ }, + .{ ._, .vp_w, .srl, .tmp0x, .src0x, .ui(8), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .dword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._dqa, .mov, .tmp0x, .src0x, ._, ._ }, + .{ ._, .p_d, .srl, .tmp0x, .ui(16), ._, ._ }, + .{ ._, .p_b, .add, .tmp0x, .src0x, ._, ._ }, + .{ ._, .p_w, .srl, .dst0x, .ui(8), ._, ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .qword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vp_d, .shuf, .tmp0x, .src0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_b, .add, .dst0x, .src0x, .tmp0x, ._ }, + .{ ._, .vp_d, .srl, .tmp0x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_w, .srl, .tmp0x, .dst0x, .ui(8), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .qword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, .p_d, .shuf, .tmp0x, .src0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp0x, ._, ._ }, + .{ ._, ._dqa, .mov, .tmp0x, .dst0x, ._, ._ }, + .{ ._, .p_d, .srl, .tmp0x, .ui(16), ._, ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp0x, ._, ._ }, + .{ ._, ._dqa, .mov, .tmp0x, .dst0x, ._, ._ }, + .{ ._, .p_w, .srl, .tmp0x, .ui(8), ._, ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .qword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .vp_, .@"and", .dst0x, .src0x, .lea(.tmp0x), ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_w, .srl, .tmp2x, .dst0x, .ui(8), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .qword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .p_, .@"and", .dst0x, .lea(.tmp0x), ._, ._ }, + .{ ._, .p_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp2x, ._, ._ }, + .{ ._, ._dqa, .mov, .tmp2x, .dst0x, ._, ._ }, + .{ ._, .p_d, .srl, .tmp2x, .ui(16), ._, ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp2x, ._, ._ }, + .{ ._, ._dqa, .mov, .tmp2x, .dst0x, ._, ._ }, + .{ ._, .p_w, .srl, .tmp2x, .ui(8), ._, ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .xword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vp_d, .shuf, .tmp0x, .src0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_b, .add, .dst0x, .src0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_d, .srl, .tmp0x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_w, .srl, .tmp0x, .dst0x, .ui(8), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .xword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, .p_d, .shuf, .tmp0x, .src0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp0x, ._, ._ }, + .{ ._, .p_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp0x, ._, ._ }, + .{ ._, ._dqa, .mov, .tmp0x, .dst0x, ._, ._ }, + .{ ._, .p_d, .srl, .tmp0x, .ui(16), ._, ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp0x, ._, ._ }, + .{ ._, ._dqa, .mov, .tmp0x, .dst0x, ._, ._ }, + .{ ._, .p_w, .srl, .tmp0x, .ui(8), ._, ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .vp_, .@"and", .dst0x, .src0x, .lea(.tmp0x), ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_w, .srl, .tmp2x, .dst0x, .ui(8), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .p_, .@"and", .dst0x, .lea(.tmp0x), ._, ._ }, + .{ ._, .p_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp2x, ._, ._ }, + .{ ._, .p_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp2x, ._, ._ }, + .{ ._, ._dqa, .mov, .tmp2x, .dst0x, ._, ._ }, + .{ ._, .p_d, .srl, .tmp2x, .ui(16), ._, ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp2x, ._, ._ }, + .{ ._, ._dqa, .mov, .tmp2x, .dst0x, ._, ._ }, + .{ ._, .p_w, .srl, .tmp2x, .ui(8), ._, ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .yword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_i128, .extract, .tmp0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_b, .add, .dst0x, .src0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_d, .srl, .tmp0x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_w, .srl, .tmp0x, .dst0x, .ui(8), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .yword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_f128, .extract, .tmp0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_b, .add, .dst0x, .src0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_d, .srl, .tmp0x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_w, .srl, .tmp0x, .dst0x, .ui(8), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .vp_, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_w, .srl, .tmp2x, .dst0x, .ui(8), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_w, .srl, .tmp2x, .dst0x, .ui(8), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx512f, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .zword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, + .{ ._, .vp_b, .add, .dst0y, .dst0y, .memd(.src0y, 32), ._ }, + .{ ._, .v_i128, .extract, .tmp0x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_d, .srl, .tmp0x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_w, .srl, .tmp0x, .dst0x, .ui(8), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx512f, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .zword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_64_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_32_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, + .{ ._, .v_dqa, .mov, .tmp2y, .memd(.src0y, 32), ._, ._ }, + .{ ._, .vp_, .@"and", .dst0y, .dst0y, .lea(.tmp0y), ._ }, + .{ ._, .vp_, .@"and", .tmp2y, .tmp2y, .lead(.tmp0y, 32), ._ }, + .{ ._, .vp_b, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_w, .srl, .tmp2x, .dst0x, .ui(8), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .yword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, + .{ .@"0:", .vp_b, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_i128, .extract, .tmp1x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .vp_d, .shuf, .tmp1x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .vp_d, .shuf, .tmp1x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .vp_d, .srl, .tmp1x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .vp_w, .srl, .tmp1x, .dst0x, .ui(8), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp1x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .xword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", .vp_b, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vp_d, .shuf, .tmp1x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .vp_d, .shuf, .tmp1x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .vp_d, .srl, .tmp1x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .vp_w, .srl, .tmp1x, .dst0x, .ui(8), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp1x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .xword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", .p_b, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .p_d, .shuf, .tmp1x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp1x, ._, ._ }, + .{ ._, .p_d, .shuf, .tmp1x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp1x, ._, ._ }, + .{ ._, ._dqa, .mov, .tmp1x, .dst0x, ._, ._ }, + .{ ._, .p_d, .srl, .tmp1x, .ui(16), ._, ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp1x, ._, ._ }, + .{ ._, ._dqa, .mov, .tmp1x, .dst0x, ._, ._ }, + .{ ._, .p_w, .srl, .tmp1x, .ui(8), ._, ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp1x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx512f, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .zword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_64_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_32_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, .v_dqa, .mov, .tmp2y, .lead(.tmp0y, 32), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-128, .src0, .add_size), ._, ._ }, + .{ ._, .vp_, .@"and", .tmp2y, .tmp2y, .memad(.src0y, .add_size, -32), ._ }, + .{ ._, .vp_, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -64), ._ }, + .{ .@"0:", .vp_b, .add, .tmp2y, .tmp2y, .memid(.src0y, .tmp0, 32), ._ }, + .{ ._, .vp_b, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(64), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vp_b, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_w, .srl, .tmp2x, .dst0x, .ui(8), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .yword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_size), ._, ._ }, + .{ ._, .vp_, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ }, + .{ .@"0:", .vp_b, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_w, .srl, .tmp2x, .dst0x, .ui(8), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .xword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_size), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ .@"0:", .vp_b, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_w, .srl, .tmp2x, .dst0x, .ui(8), ._ }, + .{ ._, .vp_b, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .xword, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_size), ._, ._ }, + .{ ._, .p_, .@"and", .dst0x, .memad(.src0x, .add_size, -16), ._, ._ }, + .{ .@"0:", .p_b, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .p_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp2x, ._, ._ }, + .{ ._, .p_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp2x, ._, ._ }, + .{ ._, ._dqa, .mov, .tmp2x, .dst0x, ._, ._ }, + .{ ._, .p_d, .srl, .tmp2x, .ui(16), ._, ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp2x, ._, ._ }, + .{ ._, ._dqa, .mov, .tmp2x, .dst0x, ._, ._ }, + .{ ._, .p_w, .srl, .tmp2x, .ui(8), ._, ._ }, + .{ ._, .p_b, .add, .dst0x, .tmp2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .slow_incdec, null, null, null }, + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-2, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .movzx, .dst0d, .memad(.src0b, .add_unaligned_size, -1), ._, ._ }, + .{ .@"0:", ._, .add, .dst0b, .memi(.src0b, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .dst_constraints = .{ .{ .int = .byte }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-2, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .movzx, .dst0d, .memad(.src0b, .add_unaligned_size, -1), ._, ._ }, + .{ .@"0:", ._, .add, .dst0b, .memi(.src0b, .tmp0), ._, ._ }, + .{ ._, ._c, .de, .tmp0d, ._, ._, ._ }, + .{ ._, ._ns, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .mut_gpr, .none, .none } }, + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{} }, + }, .{ + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{} }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .dword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vp_d, .srl, .tmp0x, .src0x, .ui(16), ._ }, + .{ ._, .vp_w, .add, .dst0x, .src0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .dword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._dqa, .mov, .tmp0x, .src0x, ._, ._ }, + .{ ._, .p_d, .srl, .tmp0x, .ui(16), ._, ._ }, + .{ ._, .p_w, .add, .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .qword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vp_d, .shuf, .tmp0x, .src0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_w, .add, .dst0x, .src0x, .tmp0x, ._ }, + .{ ._, .vp_d, .srl, .tmp0x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .qword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, .p_d, .shuf, .tmp0x, .src0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .p_w, .add, .dst0x, .tmp0x, ._, ._ }, + .{ ._, ._dqa, .mov, .tmp0x, .dst0x, ._, ._ }, + .{ ._, .p_d, .srl, .tmp0x, .ui(16), ._, ._ }, + .{ ._, .p_w, .add, .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .qword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vp_d, .shuf, .tmp0x, .src0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_w, .add, .dst0x, .src0x, .tmp0x, ._ }, + .{ ._, .vp_d, .srl, .tmp0x, .src0x, .ui(16), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .qword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, .p_d, .shuf, .tmp0x, .src0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .p_w, .add, .tmp0x, .src0x, ._, ._ }, + .{ ._, .p_d, .srl, .dst0x, .ui(16), ._, ._ }, + .{ ._, .p_w, .add, .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .xword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vp_d, .shuf, .tmp0x, .src0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_w, .add, .dst0x, .src0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_d, .srl, .tmp0x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .xword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, .p_d, .shuf, .tmp0x, .src0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .p_w, .add, .dst0x, .tmp0x, ._, ._ }, + .{ ._, .p_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .p_w, .add, .dst0x, .tmp0x, ._, ._ }, + .{ ._, ._dqa, .mov, .tmp0x, .dst0x, ._, ._ }, + .{ ._, .p_d, .srl, .tmp0x, .ui(16), ._, ._ }, + .{ ._, .p_w, .add, .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .vp_, .@"and", .dst0x, .src0x, .lea(.tmp0x), ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .p_, .@"and", .dst0x, .lea(.tmp0x), ._, ._ }, + .{ ._, .p_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .p_w, .add, .dst0x, .tmp2x, ._, ._ }, + .{ ._, .p_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .p_w, .add, .dst0x, .tmp2x, ._, ._ }, + .{ ._, ._dqa, .mov, .tmp2x, .dst0x, ._, ._ }, + .{ ._, .p_d, .srl, .tmp2x, .ui(16), ._, ._ }, + .{ ._, .p_w, .add, .dst0x, .tmp2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .yword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_i128, .extract, .tmp0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_w, .add, .dst0x, .src0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_d, .srl, .tmp0x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .yword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_f128, .extract, .tmp0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_w, .add, .dst0x, .src0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_d, .srl, .tmp0x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .vp_, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx512f, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .zword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, + .{ ._, .vp_w, .add, .dst0y, .dst0y, .memd(.src0y, 32), ._ }, + .{ ._, .v_i128, .extract, .tmp0x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_d, .srl, .tmp0x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx512f, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .zword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_64_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_16_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, + .{ ._, .v_dqa, .mov, .tmp2y, .memd(.src0y, 32), ._, ._ }, + .{ ._, .vp_, .@"and", .dst0y, .dst0y, .lea(.tmp0y), ._ }, + .{ ._, .vp_, .@"and", .tmp2y, .tmp2y, .lead(.tmp0y, 32), ._ }, + .{ ._, .vp_w, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .yword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, + .{ .@"0:", .vp_w, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_i128, .extract, .tmp1x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .vp_d, .shuf, .tmp1x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .vp_d, .shuf, .tmp1x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .vp_d, .srl, .tmp1x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp1x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .xword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", .vp_w, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vp_d, .shuf, .tmp1x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .vp_d, .shuf, .tmp1x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .vp_d, .srl, .tmp1x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp1x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .xword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", .p_w, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .p_d, .shuf, .tmp1x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .p_w, .add, .dst0x, .tmp1x, ._, ._ }, + .{ ._, .p_d, .shuf, .tmp1x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .p_w, .add, .dst0x, .tmp1x, ._, ._ }, + .{ ._, ._dqa, .mov, .tmp1x, .dst0x, ._, ._ }, + .{ ._, .p_d, .srl, .tmp1x, .ui(16), ._, ._ }, + .{ ._, .p_w, .add, .dst0x, .tmp1x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx512f, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .zword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_64_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_16_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, .v_dqa, .mov, .tmp2y, .lead(.tmp0y, 32), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-128, .src0, .add_size), ._, ._ }, + .{ ._, .vp_, .@"and", .tmp2y, .tmp2y, .memad(.src0y, .add_size, -32), ._ }, + .{ ._, .vp_, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -64), ._ }, + .{ .@"0:", .vp_w, .add, .tmp2y, .tmp2y, .memid(.src0y, .tmp0, 32), ._ }, + .{ ._, .vp_w, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(64), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vp_w, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .yword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_size), ._, ._ }, + .{ ._, .vp_, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ }, + .{ .@"0:", .vp_w, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .xword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_size), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ .@"0:", .vp_w, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .srl, .tmp2x, .dst0x, .ui(16), ._ }, + .{ ._, .vp_w, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .xword, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_u16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_size), ._, ._ }, + .{ ._, .p_, .@"and", .dst0x, .memad(.src0x, .add_size, -16), ._, ._ }, + .{ .@"0:", .p_w, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .p_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .p_w, .add, .dst0x, .tmp2x, ._, ._ }, + .{ ._, .p_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .p_w, .add, .dst0x, .tmp2x, ._, ._ }, + .{ ._, ._dqa, .mov, .tmp2x, .dst0x, ._, ._ }, + .{ ._, .p_d, .srl, .tmp2x, .ui(16), ._, ._ }, + .{ ._, .p_w, .add, .dst0x, .tmp2x, ._, ._ }, + } }, + }, .{ + .dst_constraints = .{ .{ .int = .word }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .word, .is = .word } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-4, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .movzx, .dst0d, .memad(.src0w, .add_unaligned_size, -2), ._, ._ }, + .{ .@"0:", ._, .add, .dst0w, .memi(.src0w, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(2), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .mut_gpr, .none, .none } }, + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{} }, + }, .{ + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{} }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .qword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vp_d, .shuf, .tmp0x, .src0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_d, .add, .dst0x, .src0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .qword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, .p_d, .shuf, .tmp0x, .src0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .p_d, .add, .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vp_d, .shuf, .tmp0x, .src0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_d, .add, .dst0x, .src0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, .p_d, .shuf, .tmp0x, .src0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .p_d, .add, .dst0x, .tmp0x, ._, ._ }, + .{ ._, .p_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .p_d, .add, .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vp_d, .shuf, .tmp0x, .src0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_d, .add, .dst0x, .src0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .src0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, .p_d, .shuf, .tmp0x, .src0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .p_d, .add, .tmp0x, .src0x, ._, ._ }, + .{ ._, .p_d, .shuf, .dst0x, .src0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .p_d, .add, .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_i128, .extract, .tmp0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_d, .add, .dst0x, .src0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_f128, .extract, .tmp0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_d, .add, .dst0x, .src0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .vp_, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .src0y, .lea(.tmp0y), ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx512f, null, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .zword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, + .{ ._, .vp_d, .add, .dst0y, .dst0y, .memd(.src0y, 32), ._ }, + .{ ._, .v_i128, .extract, .tmp0x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx512f, null, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .zword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_64_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, + .{ ._, .v_dqa, .mov, .tmp2y, .memd(.src0y, 32), ._, ._ }, + .{ ._, .vp_, .@"and", .dst0y, .dst0y, .lea(.tmp0y), ._ }, + .{ ._, .vp_, .@"and", .tmp2y, .tmp2y, .lead(.tmp0y, 32), ._ }, + .{ ._, .vp_d, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, + .{ .@"0:", .vp_d, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_i128, .extract, .tmp1x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .vp_d, .shuf, .tmp1x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .vp_d, .shuf, .tmp1x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp1x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", .vp_d, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vp_d, .shuf, .tmp1x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .vp_d, .shuf, .tmp1x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp1x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", .p_d, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .p_d, .shuf, .tmp1x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .p_d, .add, .dst0x, .tmp1x, ._, ._ }, + .{ ._, .p_d, .shuf, .tmp1x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .p_d, .add, .dst0x, .tmp1x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx512f, null, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .zword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_64_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_8_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, .v_dqa, .mov, .tmp2y, .lead(.tmp0y, 32), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-128, .src0, .add_size), ._, ._ }, + .{ ._, .vp_, .@"and", .tmp2y, .tmp2y, .memad(.src0y, .add_size, -32), ._ }, + .{ ._, .vp_, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -64), ._ }, + .{ .@"0:", .vp_d, .add, .tmp2y, .tmp2y, .memid(.src0y, .tmp0, 32), ._ }, + .{ ._, .vp_d, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(64), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vp_d, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .yword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_size), ._, ._ }, + .{ ._, .vp_, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ }, + .{ .@"0:", .vp_d, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_size), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ .@"0:", .vp_d, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .vp_d, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .xword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_size), ._, ._ }, + .{ ._, .p_, .@"and", .dst0x, .memad(.src0x, .add_size, -16), ._, ._ }, + .{ .@"0:", .p_d, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .p_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .p_d, .add, .dst0x, .tmp2x, ._, ._ }, + .{ ._, .p_d, .shuf, .tmp2x, .dst0x, .ui(0b01_01_01_01), ._ }, + .{ ._, .p_d, .add, .dst0x, .tmp2x, ._, ._ }, + } }, + }, .{ + .dst_constraints = .{ .{ .int = .dword }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .dword, .is = .dword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .dst0d, .memad(.src0d, .add_unaligned_size, -4), ._, ._ }, + .{ .@"0:", ._, .add, .dst0d, .memi(.src0d, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(4), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .dst_constraints = .{ .{ .int = .qword }, .any }, + .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .mut_gpr, .none, .none } }, + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{} }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .dst_constraints = .{ .{ .int = .qword }, .any }, + .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .to_mut_gpr, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{} }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .qword }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .vp_d, .shuf, .tmp0x, .src0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_q, .add, .dst0x, .src0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .qword }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .xword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{ + .{ ._, .p_d, .shuf, .tmp0x, .src0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .p_q, .add, .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .dst_constraints = .{ .{ .int = .qword }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .yword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_i128, .extract, .tmp0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_q, .add, .dst0x, .src0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_q, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .qword }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .yword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_f128, .extract, .tmp0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_q, .add, .dst0x, .src0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_q, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .dst_constraints = .{ .{ .int = .qword }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_i128, .extract, .tmp0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_q, .add, .dst0x, .src0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .src0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_q, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .qword }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .yword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_sse, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_f128, .extract, .tmp0x, .src0y, .ui(1), ._ }, + .{ ._, .vp_q, .add, .dst0x, .src0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .src0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_q, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx512f, null, null, null }, + .dst_constraints = .{ .{ .int = .qword }, .any }, + .src_constraints = .{ .{ .exact_scalar_int = .{ .of = .zword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, + .{ ._, .vp_q, .add, .dst0y, .dst0y, .memd(.src0y, 32), ._ }, + .{ ._, .v_i128, .extract, .tmp0x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_q, .add, .dst0x, .dst0x, .tmp0x, ._ }, + .{ ._, .vp_d, .shuf, .tmp0x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_q, .add, .dst0x, .dst0x, .tmp0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx512f, null, null, null }, + .dst_constraints = .{ .{ .int = .qword }, .any }, + .src_constraints = .{ .{ .scalar_int = .{ .of = .zword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_64_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0y, .mem(.src0y), ._, ._ }, + .{ ._, .v_dqa, .mov, .tmp2y, .memd(.src0y, 32), ._, ._ }, + .{ ._, .vp_, .@"and", .dst0y, .dst0y, .lea(.tmp0y), ._ }, + .{ ._, .vp_, .@"and", .tmp2y, .tmp2y, .lead(.tmp0y, 32), ._ }, + .{ ._, .vp_q, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_q, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_q, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .dst_constraints = .{ .{ .int = .qword }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .yword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0y, .memad(.src0y, .add_unaligned_size, -32), ._, ._ }, + .{ .@"0:", .vp_q, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_i128, .extract, .tmp1x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_q, .add, .dst0x, .dst0x, .tmp1x, ._ }, + .{ ._, .vp_d, .shuf, .tmp1x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_q, .add, .dst0x, .dst0x, .tmp1x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .qword }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .xword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", .vp_q, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vp_d, .shuf, .tmp1x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_q, .add, .dst0x, .dst0x, .tmp1x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .qword }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .xword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._dqa, .mov, .dst0x, .memad(.src0x, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", .p_q, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .p_d, .shuf, .tmp1x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .p_q, .add, .dst0x, .tmp1x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx512f, null, null, null }, + .dst_constraints = .{ .{ .int = .qword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .zword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_64_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, .v_dqa, .mov, .tmp2y, .lead(.tmp0y, 32), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-128, .src0, .add_size), ._, ._ }, + .{ ._, .vp_, .@"and", .tmp2y, .tmp2y, .memad(.src0y, .add_size, -32), ._ }, + .{ ._, .vp_, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -64), ._ }, + .{ .@"0:", .vp_q, .add, .tmp2y, .tmp2y, .memid(.src0y, .tmp0, 32), ._ }, + .{ ._, .vp_q, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(64), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vp_q, .add, .dst0y, .dst0y, .tmp2y, ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_q, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_q, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .dst_constraints = .{ .{ .int = .qword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .yword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_dqa, .mov, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-64, .src0, .add_size), ._, ._ }, + .{ ._, .vp_, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ }, + .{ .@"0:", .vp_q, .add, .dst0y, .dst0y, .memi(.src0y, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(32), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_i128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_q, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_q, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .qword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .xword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_32_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_ps, .mova, .dst0y, .lea(.tmp0y), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-48, .src0, .add_size), ._, ._ }, + .{ ._, .v_ps, .@"and", .dst0y, .dst0y, .memad(.src0y, .add_size, -32), ._ }, + .{ ._, .v_f128, .extract, .tmp2x, .dst0y, .ui(1), ._ }, + .{ ._, .vp_q, .add, .dst0x, .dst0x, .tmp2x, ._ }, + .{ .@"0:", .vp_q, .add, .dst0x, .dst0x, .memi(.src0x, .tmp0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vp_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .vp_q, .add, .dst0x, .dst0x, .tmp2x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .int = .qword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_int = .{ .of = .xword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_16_u8, .kind = .{ .pand_mask_mem = .{ .ref = .src0 } } }, + .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .sse }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, ._dqa, .mov, .dst0x, .lea(.tmp0x), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_size), ._, ._ }, + .{ ._, .p_, .@"and", .dst0x, .memad(.src0x, .add_size, -16), ._, ._ }, + .{ .@"0:", .p_q, .add, .dst0x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + .{ ._, .p_d, .shuf, .tmp2x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, .p_q, .add, .dst0x, .tmp2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .dst_constraints = .{ .{ .int = .qword }, .any }, + .src_constraints = .{ .{ .unaligned_multiple_scalar_int = .{ .of = .qword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .dst0q, .memad(.src0q, .add_unaligned_size, -8), ._, ._ }, + .{ .@"0:", ._, .add, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .dst_constraints = .{ .{ .int = .xword }, .any }, + .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .mut_gpr, .none, .none } }, + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{} }, + }, .{ + .dst_constraints = .{ .{ .int = .xword }, .any }, + .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_mem, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{} }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .int = .yword }, .any }, + .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .mut_gpr, .none, .none } }, + .{ .src = .{ .to_mut_sse, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{} }, + }, .{ + .dst_constraints = .{ .{ .int = .yword }, .any }, + .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_mem, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{} }, + }, .{ + .dst_constraints = .{ .any_int, .any }, + .src_constraints = .{ .{ .vec_len = 1 }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_mem, .none, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .each = .{ .once = &.{} }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .dst_constraints = .{ .{ .remainder_int = .{ .of = .qword, .is = .qword } }, .any }, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .qword, .is = .qword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .reg = .rsi } }, + .{ .type = .usize, .kind = .{ .reg = .rdi } }, + .{ .type = .u64, .kind = .{ .reg = .rcx } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sa(.src0, .add_unaligned_size_sub_2_elem_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mema(.src0, .add_unaligned_size_sub_elem_size), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .mem(.dst0), ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .sa(.dst0, .add_size_div_8), ._, ._ }, + .{ ._, .@"rep _sq", .mov, ._, ._, ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1p, .sa(.dst0, .sub_size_div_8), ._, ._ }, + .{ ._, ._, .lea, .tmp2q, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._c, .cl, ._, ._, ._, ._ }, + .{ .@"1:", ._, .mov, .tmp3q, .leasia(.tmp2q, .@"8", .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .adc, .memsia(.dst0q, .@"8", .tmp1, .add_size), .tmp3q, ._, ._ }, + .{ ._, ._c, .in, .tmp1p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .sa(.dst0, .add_size), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + } }, + .Mul => unreachable, }) catch |err| switch (err) { error.SelectFailed => return cg.fail("failed to select {s} {} {}", .{ @tagName(air_tag), @@ -122287,6 +125277,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }), else => |e| return e, }; + switch (reduce.operation) { + .And, .Or, .Xor, .Min, .Max => {}, + .Add, .Mul => if (cg.intInfo(res_ty)) |_| res[0].wrapInt(cg) catch |err| switch (err) { + error.SelectFailed => return cg.fail("failed to select {s} wrap {} {}", .{ + @tagName(air_tag), + res_ty.fmt(pt), + res[0].tracking(cg), + }), + else => |e| return e, + }, + } try res[0].finish(inst, &.{reduce.operand}, &ops, cg); }, .splat => |air_tag| if (use_old) try cg.airSplat(inst) else fallback: { @@ -142314,10 +145315,10 @@ fn airSelect(self: *CodeGen, inst: Air.Inst.Index) !void { ) else try self.asmRegisterRegister( .{ mir_tag[0], .andn }, mask_alias, - if (other_mcv.isRegister()) + registerAlias(if (other_mcv.isRegister()) other_mcv.getReg().? else - try self.copyToTmpRegister(ty, other_mcv), + try self.copyToTmpRegister(ty, other_mcv), abi_size), ); try self.asmRegisterRegister(.{ mir_tag[0], .@"or" }, dst_alias, mask_alias); } @@ -146202,7 +149203,7 @@ const Temp = struct { }, .{ .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .xword, .is = 128 } }, .any, .any }, .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .to_mut_mem, .none, .none } }, }, .dst_temps = .{ .{ .ref = .src0 }, .unused }, .each = .{ .once = &.{} }, @@ -146210,7 +149211,7 @@ const Temp = struct { .required_features = .{ .@"64bit", null, null, null }, .src_constraints = .{ .{ .exact_remainder_signed_int = .{ .of = .xword, .is = 64 } }, .any, .any }, .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .to_mut_mem, .none, .none } }, }, .extra_temps = .{ .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, @@ -146236,7 +149237,7 @@ const Temp = struct { .required_features = .{ .@"64bit", null, null, null }, .src_constraints = .{ .{ .exact_remainder_unsigned_int = .{ .of = .xword, .is = 64 } }, .any, .any }, .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .to_mut_mem, .none, .none } }, }, .dst_temps = .{ .{ .ref = .src0 }, .unused }, .each = .{ .once = &.{ @@ -146246,7 +149247,7 @@ const Temp = struct { .required_features = .{ .@"64bit", null, null, null }, .src_constraints = .{ .{ .remainder_signed_int = .{ .of = .xword, .is = .qword } }, .any, .any }, .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .to_mut_mem, .none, .none } }, }, .extra_temps = .{ .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, @@ -146275,7 +149276,7 @@ const Temp = struct { .required_features = .{ .@"64bit", null, null, null }, .src_constraints = .{ .{ .remainder_unsigned_int = .{ .of = .xword, .is = .qword } }, .any, .any }, .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .to_mut_mem, .none, .none } }, }, .extra_temps = .{ .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, @@ -146301,7 +149302,7 @@ const Temp = struct { .required_features = .{ .@"64bit", null, null, null }, .src_constraints = .{ .{ .remainder_signed_int = .{ .of = .xword, .is = .xword } }, .any, .any }, .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .to_mut_mem, .none, .none } }, }, .extra_temps = .{ .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, @@ -146328,7 +149329,7 @@ const Temp = struct { .required_features = .{ .@"64bit", null, null, null }, .src_constraints = .{ .{ .remainder_unsigned_int = .{ .of = .xword, .is = .xword } }, .any, .any }, .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .to_mut_mem, .none, .none } }, }, .extra_temps = .{ .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, @@ -148340,7 +151341,7 @@ const Temp = struct { .required_features = .{ .@"64bit", null, null, null }, .src_constraints = .{ .{ .scalar_remainder_signed_int = .{ .of = .xword, .is = .qword } }, .any, .any }, .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .to_mut_mem, .none, .none } }, }, .extra_temps = .{ .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, @@ -148372,7 +151373,7 @@ const Temp = struct { .required_features = .{ .@"64bit", null, null, null }, .src_constraints = .{ .{ .scalar_remainder_unsigned_int = .{ .of = .xword, .is = .qword } }, .any, .any }, .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .to_mut_mem, .none, .none } }, }, .extra_temps = .{ .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, @@ -148401,7 +151402,7 @@ const Temp = struct { .required_features = .{ .@"64bit", null, null, null }, .src_constraints = .{ .{ .scalar_remainder_signed_int = .{ .of = .xword, .is = .xword } }, .any, .any }, .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .to_mut_mem, .none, .none } }, }, .extra_temps = .{ .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, @@ -148431,7 +151432,7 @@ const Temp = struct { .required_features = .{ .@"64bit", null, null, null }, .src_constraints = .{ .{ .scalar_remainder_unsigned_int = .{ .of = .xword, .is = .xword } }, .any, .any }, .patterns = &.{ - .{ .src = .{ .mut_mem, .none, .none } }, + .{ .src = .{ .to_mut_mem, .none, .none } }, }, .extra_temps = .{ .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, @@ -151815,12 +154816,12 @@ const Select = struct { const sub_src0_elem_size: Adjust = .{ .sign = .neg, .lhs = .src0_elem_size, .op = .mul, .rhs = .@"1" }; const add_src0_elem_size_mul_src1: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size_mul_src1, .op = .mul, .rhs = .@"1" }; const sub_src0_elem_size_mul_src1: Adjust = .{ .sign = .neg, .lhs = .src0_elem_size_mul_src1, .op = .mul, .rhs = .@"1" }; + const add_dst0_elem_size: Adjust = .{ .sign = .pos, .lhs = .dst0_elem_size, .op = .mul, .rhs = .@"1" }; const add_src1_div_8_down_4: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .div_8_down, .rhs = .@"4" }; const add_src1_rem_32: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .rem_8_mul, .rhs = .@"4" }; const add_src1_rem_64: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .rem_8_mul, .rhs = .@"8" }; const add_src1_sub_bit_size: Adjust = .{ .sign = .pos, .lhs = .src1_sub_bit_size, .op = .mul, .rhs = .@"1" }; const add_log2_src0_elem_size: Adjust = .{ .sign = .pos, .lhs = .log2_src0_elem_size, .op = .mul, .rhs = .@"1" }; - const add_dst0_elem_size: Adjust = .{ .sign = .pos, .lhs = .dst0_elem_size, .op = .mul, .rhs = .@"1" }; const add_elem_limbs: Adjust = .{ .sign = .pos, .lhs = .elem_limbs, .op = .mul, .rhs = .@"1" }; const add_smin: Adjust = .{ .sign = .pos, .lhs = .smin, .op = .mul, .rhs = .@"1" }; const add_2_smin: Adjust = .{ .sign = .pos, .lhs = .smin, .op = .mul, .rhs = .@"2" }; @@ -152841,7 +155842,10 @@ fn select( }, } } - for (dst_temps, case.dst_temps[0..dst_temps.len]) |dst_temp, tmp_kind| tmp_kind.finish(dst_temp, cg); + for (case.dst_temps[0..dst_temps.len], dst_temps, dst_tys) |tmp_kind, dst_temp, dst_ty| { + tmp_kind.finish(dst_temp, cg); + cg.temp_type[@intFromEnum(dst_temp.unwrap(cg).temp)] = dst_ty; + } for (tmp_owned, s_tmp_temps) |owned, temp| if (owned) try temp.die(cg); return; } diff --git a/test/behavior/array.zig b/test/behavior/array.zig index a258c49d89..9bfd3c25c2 100644 --- a/test/behavior/array.zig +++ b/test/behavior/array.zig @@ -970,7 +970,6 @@ test "store array of array of structs at comptime" { } test "accessing multidimensional global array at comptime" { - if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; diff --git a/test/behavior/cast.zig b/test/behavior/cast.zig index 8e99f672ca..b2faf3e292 100644 --- a/test/behavior/cast.zig +++ b/test/behavior/cast.zig @@ -2714,7 +2714,6 @@ test "result type is preserved into comptime block" { } test "bitcast vector" { - if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO diff --git a/test/behavior/packed-struct.zig b/test/behavior/packed-struct.zig index c735b338d0..be4a73be4f 100644 --- a/test/behavior/packed-struct.zig +++ b/test/behavior/packed-struct.zig @@ -1164,7 +1164,6 @@ test "assignment to non-byte-aligned field in packed struct" { } test "packed struct field pointer aligned properly" { - if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO diff --git a/test/behavior/sizeof_and_typeof.zig b/test/behavior/sizeof_and_typeof.zig index 099dfd41e6..1869507bb3 100644 --- a/test/behavior/sizeof_and_typeof.zig +++ b/test/behavior/sizeof_and_typeof.zig @@ -326,7 +326,6 @@ test "lazy abi size used in comparison" { } test "peer type resolution with @TypeOf doesn't trigger dependency loop check" { - if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; diff --git a/test/behavior/slice.zig b/test/behavior/slice.zig index 1e62ed27f2..0ec5ebf325 100644 --- a/test/behavior/slice.zig +++ b/test/behavior/slice.zig @@ -994,7 +994,6 @@ test "modify slice length at comptime" { } test "slicing zero length array field of struct" { - if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; @@ -1011,7 +1010,6 @@ test "slicing zero length array field of struct" { } test "slicing slices gives correct result" { - if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; @@ -1026,7 +1024,6 @@ test "slicing slices gives correct result" { } test "get address of element of zero-sized slice" { - if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; @@ -1040,7 +1037,6 @@ test "get address of element of zero-sized slice" { } test "sentinel-terminated 0-length slices" { - if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; diff --git a/test/behavior/struct.zig b/test/behavior/struct.zig index 0f99d7ae6b..b62da53d73 100644 --- a/test/behavior/struct.zig +++ b/test/behavior/struct.zig @@ -1835,7 +1835,6 @@ test "tuple with comptime-only field" { } test "extern struct fields are aligned to 1" { - if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO @@ -1905,7 +1904,6 @@ test "array of structs inside struct initialized with undefined" { } test "runtime call in nested initializer" { - if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; @@ -1938,7 +1936,6 @@ test "runtime call in nested initializer" { } test "runtime value in nested initializer passed as pointer to function" { - if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO @@ -1963,7 +1960,6 @@ test "runtime value in nested initializer passed as pointer to function" { } test "struct field default value is a call" { - if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO diff --git a/test/behavior/try.zig b/test/behavior/try.zig index 18061f24e7..769953d4c7 100644 --- a/test/behavior/try.zig +++ b/test/behavior/try.zig @@ -47,7 +47,6 @@ test "try then not executed with assignment" { } test "`try`ing an if/else expression" { - if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; @@ -69,7 +68,6 @@ test "`try`ing an if/else expression" { } test "'return try' of empty error set in function returning non-error" { - if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; diff --git a/test/behavior/undefined.zig b/test/behavior/undefined.zig index 307deb84bf..9dd639c04f 100644 --- a/test/behavior/undefined.zig +++ b/test/behavior/undefined.zig @@ -89,7 +89,6 @@ test "type name of undefined" { var buf: []u8 = undefined; test "reslice of undefined global var slice" { - if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; diff --git a/test/behavior/union.zig b/test/behavior/union.zig index c4ac8ac458..e4e4f95347 100644 --- a/test/behavior/union.zig +++ b/test/behavior/union.zig @@ -2129,7 +2129,6 @@ test "copied union field doesn't alias source" { } test "create union(enum) from other union(enum)" { - if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; @@ -2254,7 +2253,6 @@ test "matching captures causes union equivalence" { } test "signed enum tag with negative value" { - if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index 818fbcd7e4..c0aa998472 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -1548,7 +1548,6 @@ test "index into comptime-known vector is comptime-known" { } test "arithmetic on zero-length vectors" { - if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -1567,7 +1566,6 @@ test "arithmetic on zero-length vectors" { test "@reduce on bool vector" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO diff --git a/test/behavior/x86_64/unary.zig b/test/behavior/x86_64/unary.zig index 8947ff4936..332bc47be5 100644 --- a/test/behavior/x86_64/unary.zig +++ b/test/behavior/x86_64/unary.zig @@ -4881,6 +4881,14 @@ test reduceXor { try test_reduce_xor.testIntVectors(); } +inline fn reduceAdd(comptime Type: type, rhs: Type) @typeInfo(Type).vector.child { + return @reduce(.Add, rhs); +} +test reduceAdd { + const test_reduce_add = unary(reduceAdd, .{}); + try test_reduce_add.testIntVectors(); +} + inline fn splat(comptime Type: type, rhs: Type) Type { return @splat(rhs[0]); }