From af1191ea8ba55a6eab53e0b561355bb116bdbf2d Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 25 Nov 2024 02:58:30 -0500 Subject: [PATCH 01/25] x86_64: rewrite --- src/Air.zig | 36 +- src/Compilation.zig | 1 + src/Liveness.zig | 14 - src/arch/aarch64/CodeGen.zig | 20 +- src/arch/arm/CodeGen.zig | 20 +- src/arch/riscv64/CodeGen.zig | 18 +- src/arch/sparc64/CodeGen.zig | 20 +- src/arch/x86_64/CodeGen.zig | 2689 ++++++++++++++++++++++----------- src/arch/x86_64/abi.zig | 5 +- src/arch/x86_64/bits.zig | 34 +- src/print_air.zig | 24 +- tools/lldb_pretty_printers.py | 2 +- 12 files changed, 1910 insertions(+), 973 deletions(-) diff --git a/src/Air.zig b/src/Air.zig index 4589bb1557..e76ffb3c39 100644 --- a/src/Air.zig +++ b/src/Air.zig @@ -893,14 +893,38 @@ pub const Inst = struct { pub const Index = enum(u32) { _, - pub fn toRef(i: Index) Inst.Ref { - assert(@intFromEnum(i) >> 31 == 0); - return @enumFromInt((1 << 31) | @intFromEnum(i)); + pub fn unwrap(index: Index) union(enum) { ref: Inst.Ref, target: u31 } { + const low_index: u31 = @truncate(@intFromEnum(index)); + return switch (@as(u1, @intCast(@intFromEnum(index) >> 31))) { + 0 => .{ .ref = @enumFromInt(@as(u32, 1 << 31) | low_index) }, + 1 => .{ .target = low_index }, + }; } - pub fn toTargetIndex(i: Index) u31 { - assert(@intFromEnum(i) >> 31 == 1); - return @truncate(@intFromEnum(i)); + pub fn toRef(index: Index) Inst.Ref { + return index.unwrap().ref; + } + + pub fn fromTargetIndex(index: u31) Index { + return @enumFromInt((1 << 31) | @as(u32, index)); + } + + pub fn toTargetIndex(index: Index) u31 { + return index.unwrap().target; + } + + pub fn format( + index: Index, + comptime _: []const u8, + _: std.fmt.FormatOptions, + writer: anytype, + ) @TypeOf(writer).Error!void { + try writer.writeByte('%'); + switch (index.unwrap()) { + .ref => {}, + .target => try writer.writeByte('t'), + } + try writer.print("{d}", .{@as(u31, @truncate(@intFromEnum(index)))}); } }; diff --git a/src/Compilation.zig b/src/Compilation.zig index 77596e34ec..1260bc028f 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -3067,6 +3067,7 @@ pub fn saveState(comp: *Compilation) !void { // linker state switch (lf.tag) { .wasm => { + dev.check(link.File.Tag.wasm.devFeature()); const wasm = lf.cast(.wasm).?; const is_obj = comp.config.output_mode == .Obj; try bufs.ensureUnusedCapacity(85); diff --git a/src/Liveness.zig b/src/Liveness.zig index 709844c0ac..e6ed782a21 100644 --- a/src/Liveness.zig +++ b/src/Liveness.zig @@ -202,14 +202,6 @@ pub fn operandDies(l: Liveness, inst: Air.Inst.Index, operand: OperandInt) bool return (l.tomb_bits[usize_index] & mask) != 0; } -pub fn clearOperandDeath(l: Liveness, inst: Air.Inst.Index, operand: OperandInt) void { - assert(operand < bpi - 1); - const usize_index = (@intFromEnum(inst) * bpi) / @bitSizeOf(usize); - const mask = @as(usize, 1) << - @as(Log2Int(usize), @intCast((@intFromEnum(inst) % (@bitSizeOf(usize) / bpi)) * bpi + operand)); - l.tomb_bits[usize_index] &= ~mask; -} - const OperandCategory = enum { /// The operand lives on, but this instruction cannot possibly mutate memory. none, @@ -844,12 +836,6 @@ const Analysis = struct { special: std.AutoHashMapUnmanaged(Air.Inst.Index, u32), extra: std.ArrayListUnmanaged(u32), - fn storeTombBits(a: *Analysis, inst: Air.Inst.Index, tomb_bits: Bpi) void { - const usize_index = (inst * bpi) / @bitSizeOf(usize); - a.tomb_bits[usize_index] |= @as(usize, tomb_bits) << - @as(Log2Int(usize), @intCast((inst % (@bitSizeOf(usize) / bpi)) * bpi)); - } - fn addExtra(a: *Analysis, extra: anytype) Allocator.Error!u32 { const fields = std.meta.fields(@TypeOf(extra)); try a.extra.ensureUnusedCapacity(a.gpa, fields.len); diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index c1657396ad..64230dfc9e 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -71,6 +71,8 @@ end_di_column: u32, /// which is a relative jump, based on the address following the reloc. exitlude_jump_relocs: std.ArrayListUnmanaged(usize) = .empty, +reused_operands: std.StaticBitSet(Liveness.bpi - 1) = undefined, + /// We postpone the creation of debug info for function args and locals /// until after all Mir instructions have been generated. Only then we /// will know saved_regs_stack_space which is necessary in order to @@ -646,6 +648,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { const old_air_bookkeeping = self.air_bookkeeping; try self.ensureProcessDeathCapacity(Liveness.bpi); + self.reused_operands = @TypeOf(self.reused_operands).initEmpty(); switch (air_tags[@intFromEnum(inst)]) { // zig fmt: off .add => try self.airBinOp(inst, .add), @@ -927,16 +930,13 @@ fn finishAirBookkeeping(self: *Self) void { } fn finishAir(self: *Self, inst: Air.Inst.Index, result: MCValue, operands: [Liveness.bpi - 1]Air.Inst.Ref) void { - var tomb_bits = self.liveness.getTombBits(inst); - for (operands) |op| { - const dies = @as(u1, @truncate(tomb_bits)) != 0; - tomb_bits >>= 1; - if (!dies) continue; - const op_index = op.toIndex() orelse continue; - self.processDeath(op_index); + const tomb_bits = self.liveness.getTombBits(inst); + for (0.., operands) |op_index, op| { + if (tomb_bits & @as(Liveness.Bpi, 1) << @intCast(op_index) == 0) continue; + if (self.reused_operands.isSet(op_index)) continue; + self.processDeath(op.toIndexAllowNone() orelse continue); } - const is_used = @as(u1, @truncate(tomb_bits)) == 0; - if (is_used) { + if (tomb_bits & 1 << (Liveness.bpi - 1) == 0) { log.debug("%{d} => {}", .{ inst, result }); const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; branch.inst_table.putAssumeCapacityNoClobber(inst, result); @@ -3614,7 +3614,7 @@ fn reuseOperand( } // Prevent the operand deaths processing code from deallocating it. - self.liveness.clearOperandDeath(inst, op_index); + self.reused_operands.set(op_index); // That makes us responsible for doing the rest of the stuff that processDeath would have done. const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig index 10b0473ecf..fa892117f3 100644 --- a/src/arch/arm/CodeGen.zig +++ b/src/arch/arm/CodeGen.zig @@ -72,6 +72,8 @@ end_di_column: u32, /// which is a relative jump, based on the address following the reloc. exitlude_jump_relocs: std.ArrayListUnmanaged(usize) = .empty, +reused_operands: std.StaticBitSet(Liveness.bpi - 1) = undefined, + /// We postpone the creation of debug info for function args and locals /// until after all Mir instructions have been generated. Only then we /// will know saved_regs_stack_space which is necessary in order to @@ -635,6 +637,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { const old_air_bookkeeping = self.air_bookkeeping; try self.ensureProcessDeathCapacity(Liveness.bpi); + self.reused_operands = @TypeOf(self.reused_operands).initEmpty(); switch (air_tags[@intFromEnum(inst)]) { // zig fmt: off .add, => try self.airBinOp(inst, .add), @@ -918,16 +921,13 @@ fn finishAirBookkeeping(self: *Self) void { } fn finishAir(self: *Self, inst: Air.Inst.Index, result: MCValue, operands: [Liveness.bpi - 1]Air.Inst.Ref) void { - var tomb_bits = self.liveness.getTombBits(inst); - for (operands) |op| { - const dies = @as(u1, @truncate(tomb_bits)) != 0; - tomb_bits >>= 1; - if (!dies) continue; - const op_index = op.toIndex() orelse continue; - self.processDeath(op_index); + const tomb_bits = self.liveness.getTombBits(inst); + for (0.., operands) |op_index, op| { + if (tomb_bits & @as(Liveness.Bpi, 1) << @intCast(op_index) == 0) continue; + if (self.reused_operands.isSet(op_index)) continue; + self.processDeath(op.toIndexAllowNone() orelse continue); } - const is_used = @as(u1, @truncate(tomb_bits)) == 0; - if (is_used) { + if (tomb_bits & 1 << (Liveness.bpi - 1) == 0) { log.debug("%{d} => {}", .{ inst, result }); const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; branch.inst_table.putAssumeCapacityNoClobber(inst, result); @@ -2650,7 +2650,7 @@ fn reuseOperand( } // Prevent the operand deaths processing code from deallocating it. - self.liveness.clearOperandDeath(inst, op_index); + self.reused_operands.set(op_index); // That makes us responsible for doing the rest of the stuff that processDeath would have done. const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig index c180b3aac9..66f4ce6f0d 100644 --- a/src/arch/riscv64/CodeGen.zig +++ b/src/arch/riscv64/CodeGen.zig @@ -82,6 +82,8 @@ scope_generation: u32, /// which is a relative jump, based on the address following the reloc. exitlude_jump_relocs: std.ArrayListUnmanaged(usize) = .empty, +reused_operands: std.StaticBitSet(Liveness.bpi - 1) = undefined, + /// Whenever there is a runtime branch, we push a Branch onto this stack, /// and pop it off when the runtime branch joins. This provides an "overlay" /// of the table of mappings from instructions to `MCValue` from within the branch. @@ -1443,8 +1445,11 @@ fn genBody(func: *Func, body: []const Air.Inst.Index) InnerError!void { verbose_tracking_log.debug("{}", .{func.fmtTracking()}); const old_air_bookkeeping = func.air_bookkeeping; + try func.ensureProcessDeathCapacity(Liveness.bpi); + + func.reused_operands = @TypeOf(func.reused_operands).initEmpty(); try func.inst_tracking.ensureUnusedCapacity(func.gpa, 1); - const tag: Air.Inst.Tag = air_tags[@intFromEnum(inst)]; + const tag = air_tags[@intFromEnum(inst)]; switch (tag) { // zig fmt: off .add, @@ -1783,11 +1788,10 @@ fn finishAir( result: MCValue, operands: [Liveness.bpi - 1]Air.Inst.Ref, ) !void { - var tomb_bits = func.liveness.getTombBits(inst); - for (operands) |op| { - const dies = @as(u1, @truncate(tomb_bits)) != 0; - tomb_bits >>= 1; - if (!dies) continue; + const tomb_bits = func.liveness.getTombBits(inst); + for (0.., operands) |op_index, op| { + if (tomb_bits & @as(Liveness.Bpi, 1) << @intCast(op_index) == 0) continue; + if (func.reused_operands.isSet(op_index)) continue; try func.processDeath(op.toIndexAllowNone() orelse continue); } func.finishAirResult(inst, result); @@ -4424,7 +4428,7 @@ fn reuseOperandAdvanced( } // Prevent the operand deaths processing code from deallocating it. - func.liveness.clearOperandDeath(inst, op_index); + func.reused_operands.set(op_index); const op_inst = operand.toIndex().?; func.getResolvedInstValue(op_inst).reuse(func, maybe_tracked_inst, op_inst); diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig index 0957789325..240123ee51 100644 --- a/src/arch/sparc64/CodeGen.zig +++ b/src/arch/sparc64/CodeGen.zig @@ -78,6 +78,8 @@ end_di_column: u32, /// which is a relative jump, based on the address following the reloc. exitlude_jump_relocs: std.ArrayListUnmanaged(usize) = .empty, +reused_operands: std.StaticBitSet(Liveness.bpi - 1) = undefined, + /// Whenever there is a runtime branch, we push a Branch onto this stack, /// and pop it off when the runtime branch joins. This provides an "overlay" /// of the table of mappings from instructions to `MCValue` from within the branch. @@ -493,6 +495,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { const old_air_bookkeeping = self.air_bookkeeping; try self.ensureProcessDeathCapacity(Liveness.bpi); + self.reused_operands = @TypeOf(self.reused_operands).initEmpty(); switch (air_tags[@intFromEnum(inst)]) { // zig fmt: off .ptr_add => try self.airPtrArithmetic(inst, .ptr_add), @@ -3523,16 +3526,13 @@ fn finishAirBookkeeping(self: *Self) void { } fn finishAir(self: *Self, inst: Air.Inst.Index, result: MCValue, operands: [Liveness.bpi - 1]Air.Inst.Ref) void { - var tomb_bits = self.liveness.getTombBits(inst); - for (operands) |op| { - const dies = @as(u1, @truncate(tomb_bits)) != 0; - tomb_bits >>= 1; - if (!dies) continue; - const op_index = op.toIndex() orelse continue; - self.processDeath(op_index); + const tomb_bits = self.liveness.getTombBits(inst); + for (0.., operands) |op_index, op| { + if (tomb_bits & @as(Liveness.Bpi, 1) << @intCast(op_index) == 0) continue; + if (self.reused_operands.isSet(op_index)) continue; + self.processDeath(op.toIndexAllowNone() orelse continue); } - const is_used = @as(u1, @truncate(tomb_bits)) == 0; - if (is_used) { + if (tomb_bits & 1 << (Liveness.bpi - 1) == 0) { log.debug("%{d} => {}", .{ inst, result }); const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; branch.inst_table.putAssumeCapacityNoClobber(inst, result); @@ -4568,7 +4568,7 @@ fn reuseOperand(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, op_ind } // Prevent the operand deaths processing code from deallocating it. - self.liveness.clearOperandDeath(inst, op_index); + self.reused_operands.set(op_index); // That makes us responsible for doing the rest of the stuff that processDeath would have done. const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index dbb0179ebb..3bbc80999a 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1,41 +1,26 @@ const std = @import("std"); -const build_options = @import("build_options"); -const builtin = @import("builtin"); const assert = std.debug.assert; const codegen = @import("../../codegen.zig"); -const leb128 = std.leb; const link = @import("../../link.zig"); const log = std.log.scoped(.codegen); const tracking_log = std.log.scoped(.tracking); const verbose_tracking_log = std.log.scoped(.verbose_tracking); const wip_mir_log = std.log.scoped(.wip_mir); -const math = std.math; -const mem = std.mem; -const target_util = @import("../../target.zig"); -const trace = @import("../../tracy.zig").trace; const Air = @import("../../Air.zig"); -const Allocator = mem.Allocator; -const CodeGenError = codegen.CodeGenError; -const Compilation = @import("../../Compilation.zig"); -const ErrorMsg = Zcu.ErrorMsg; +const Allocator = std.mem.Allocator; const Emit = @import("Emit.zig"); const Liveness = @import("../../Liveness.zig"); const Lower = @import("Lower.zig"); const Mir = @import("Mir.zig"); -const Package = @import("../../Package.zig"); const Zcu = @import("../../Zcu.zig"); +const Module = @import("../../Package/Module.zig"); const InternPool = @import("../../InternPool.zig"); -const Alignment = InternPool.Alignment; -const Target = std.Target; const Type = @import("../../Type.zig"); const Value = @import("../../Value.zig"); -const Instruction = @import("encoder.zig").Instruction; const abi = @import("abi.zig"); const bits = @import("bits.zig"); -const errUnionErrorOffset = codegen.errUnionErrorOffset; -const errUnionPayloadOffset = codegen.errUnionPayloadOffset; const encoder = @import("encoder.zig"); const Condition = bits.Condition; @@ -46,7 +31,7 @@ const RegisterManager = abi.RegisterManager; const RegisterLock = RegisterManager.RegisterLock; const FrameIndex = bits.FrameIndex; -const InnerError = CodeGenError || error{OutOfRegisters}; +const InnerError = codegen.CodeGenError || error{OutOfRegisters}; gpa: Allocator, pt: Zcu.PerThread, @@ -57,7 +42,7 @@ debug_output: link.File.DebugInfoOutput, target: *const std.Target, owner: Owner, inline_func: InternPool.Index, -mod: *Package.Module, +mod: *Module, arg_index: u32, args: []MCValue, va_info: union { @@ -89,6 +74,7 @@ end_di_column: u32, /// which is a relative jump, based on the address following the reloc. exitlude_jump_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .empty, +reused_operands: std.StaticBitSet(Liveness.bpi - 1) = undefined, const_tracking: ConstTrackingMap = .{}, inst_tracking: InstTrackingMap = .{}, @@ -108,13 +94,11 @@ loops: std.AutoHashMapUnmanaged(Air.Inst.Index, struct { /// The state to restore before branching. state: State, /// The branch target. - jmp_target: Mir.Inst.Index, + target: Mir.Inst.Index, }) = .{}, -/// Debug field, used to find bugs in the compiler. -air_bookkeeping: @TypeOf(air_bookkeeping_init) = air_bookkeeping_init, - -const air_bookkeeping_init = if (std.debug.runtime_safety) @as(usize, 0) else {}; +next_temp_index: Temp.Index = @enumFromInt(0), +temp_type: [Temp.Index.max]Type = undefined, const Owner = union(enum) { nav_index: InternPool.Nav.Index, @@ -433,7 +417,7 @@ pub const MCValue = union(enum) { .reserved_frame, .lea_symbol, => unreachable, - .memory => |addr| if (math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr| .{ + .memory => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr| .{ .base = .{ .reg = .ds }, .mod = .{ .rm = .{ .size = size, @@ -484,8 +468,8 @@ pub const MCValue = union(enum) { .register_overflow => |pl| try writer.print("{s}:{s}", .{ @tagName(pl.eflags), @tagName(pl.reg), }), - .load_symbol => |pl| try writer.print("[{} + 0x{x}]", .{ pl.sym_index, pl.off }), - .lea_symbol => |pl| try writer.print("{} + 0x{x}", .{ pl.sym_index, pl.off }), + .load_symbol => |pl| try writer.print("[sym:{} + 0x{x}]", .{ pl.sym_index, pl.off }), + .lea_symbol => |pl| try writer.print("sym:{} + 0x{x}", .{ pl.sym_index, pl.off }), .indirect => |pl| try writer.print("[{s} + 0x{x}]", .{ @tagName(pl.reg), pl.off }), .load_direct => |pl| try writer.print("[direct:{d}]", .{pl}), .lea_direct => |pl| try writer.print("direct:{d}", .{pl}), @@ -562,7 +546,7 @@ const InstTracking = struct { .reserved_frame => |index| self.long = .{ .load_frame = .{ .index = index } }, else => unreachable, } - tracking_log.debug("spill %{d} from {} to {}", .{ inst, self.short, self.long }); + tracking_log.debug("spill {} from {} to {}", .{ inst, self.short, self.long }); try function.genCopy(function.typeOfIndex(inst), self.long, self.short, .{}); } @@ -605,7 +589,7 @@ const InstTracking = struct { fn trackSpill(self: *InstTracking, function: *Self, inst: Air.Inst.Index) !void { try function.freeValue(self.short); self.reuseFrame(); - tracking_log.debug("%{d} => {} (spilled)", .{ inst, self.* }); + tracking_log.debug("{} => {} (spilled)", .{ inst, self.* }); } fn verifyMaterialize(self: InstTracking, target: InstTracking) void { @@ -678,14 +662,14 @@ const InstTracking = struct { else => target.long, } else target.long; self.short = target.short; - tracking_log.debug("%{d} => {} (materialize)", .{ inst, self.* }); + tracking_log.debug("{} => {} (materialize)", .{ inst, self.* }); } fn resurrect(self: *InstTracking, inst: Air.Inst.Index, scope_generation: u32) void { switch (self.short) { .dead => |die_generation| if (die_generation >= scope_generation) { self.reuseFrame(); - tracking_log.debug("%{d} => {} (resurrect)", .{ inst, self.* }); + tracking_log.debug("{} => {} (resurrect)", .{ inst, self.* }); }, else => {}, } @@ -695,7 +679,7 @@ const InstTracking = struct { if (self.short == .dead) return; try function.freeValue(self.short); self.short = .{ .dead = function.scope_generation }; - tracking_log.debug("%{d} => {} (death)", .{ inst, self.* }); + tracking_log.debug("{} => {} (death)", .{ inst, self.* }); } fn reuse( @@ -705,16 +689,13 @@ const InstTracking = struct { old_inst: Air.Inst.Index, ) void { self.short = .{ .dead = function.scope_generation }; - if (new_inst) |inst| - tracking_log.debug("%{d} => {} (reuse %{d})", .{ inst, self.*, old_inst }) - else - tracking_log.debug("tmp => {} (reuse %{d})", .{ self.*, old_inst }); + tracking_log.debug("{?} => {} (reuse {})", .{ new_inst, self.*, old_inst }); } fn liveOut(self: *InstTracking, function: *Self, inst: Air.Inst.Index) void { for (self.getRegs()) |reg| { if (function.register_manager.isRegFree(reg)) { - tracking_log.debug("%{d} => {} (live-out)", .{ inst, self.* }); + tracking_log.debug("{} => {} (live-out)", .{ inst, self.* }); continue; } @@ -741,7 +722,7 @@ const InstTracking = struct { // Perform side-effects of freeValue manually. function.register_manager.freeReg(reg); - tracking_log.debug("%{d} => {} (live-out %{d})", .{ inst, self.*, tracked_inst }); + tracking_log.debug("{} => {} (live-out {})", .{ inst, self.*, tracked_inst }); } } @@ -759,10 +740,10 @@ const InstTracking = struct { const FrameAlloc = struct { abi_size: u31, spill_pad: u3, - abi_align: Alignment, + abi_align: InternPool.Alignment, ref_count: u16, - fn init(alloc_abi: struct { size: u64, pad: u3 = 0, alignment: Alignment }) FrameAlloc { + fn init(alloc_abi: struct { size: u64, pad: u3 = 0, alignment: InternPool.Alignment }) FrameAlloc { return .{ .abi_size = @intCast(alloc_abi.size), .spill_pad = alloc_abi.pad, @@ -779,14 +760,14 @@ const FrameAlloc = struct { fn initSpill(ty: Type, zcu: *Zcu) FrameAlloc { const abi_size = ty.abiSize(zcu); const spill_size = if (abi_size < 8) - math.ceilPowerOfTwoAssert(u64, abi_size) + std.math.ceilPowerOfTwoAssert(u64, abi_size) else std.mem.alignForward(u64, abi_size, 8); return init(.{ .size = spill_size, .pad = @intCast(spill_size - abi_size), .alignment = ty.abiAlignment(zcu).maxStrict( - Alignment.fromNonzeroByteUnits(@min(spill_size, 8)), + InternPool.Alignment.fromNonzeroByteUnits(@min(spill_size, 8)), ), }); } @@ -819,7 +800,7 @@ pub fn generate( liveness: Liveness, code: *std.ArrayListUnmanaged(u8), debug_output: link.File.DebugInfoOutput, -) CodeGenError!void { +) codegen.CodeGenError!void { const zcu = pt.zcu; const comp = zcu.comp; const gpa = zcu.gpa; @@ -862,6 +843,11 @@ pub fn generate( function.mir_instructions.deinit(gpa); function.mir_extra.deinit(gpa); } + try function.inst_tracking.ensureTotalCapacity(gpa, Temp.Index.max); + for (0..Temp.Index.max) |temp_index| { + const temp: Temp.Index = @enumFromInt(temp_index); + function.inst_tracking.putAssumeCapacityNoClobber(temp.toIndex(), InstTracking.init(.none)); + } wip_mir_log.debug("{}:", .{fmtNav(func.owner_nav, ip)}); @@ -891,9 +877,9 @@ pub fn generate( })); function.frame_allocs.set(@intFromEnum(FrameIndex.base_ptr), FrameAlloc.init(.{ .size = Type.usize.abiSize(zcu), - .alignment = Alignment.min( + .alignment = InternPool.Alignment.min( call_info.stack_align, - Alignment.fromNonzeroByteUnits(function.target.stackAlignment()), + InternPool.Alignment.fromNonzeroByteUnits(function.target.stackAlignment()), ), })); function.frame_allocs.set( @@ -972,7 +958,7 @@ pub fn generateLazy( lazy_sym: link.File.LazySymbol, code: *std.ArrayListUnmanaged(u8), debug_output: link.File.DebugInfoOutput, -) CodeGenError!void { +) codegen.CodeGenError!void { const comp = bin_file.comp; const gpa = comp.gpa; // This function is for generating global code, so we use the root module. @@ -1169,14 +1155,14 @@ fn formatWipMir( lower.mir.extraData(Mir.Imm64, mir_inst.data.ai.i).data.decode(), }), .pseudo_dbg_local_as => { - const mem_op: Instruction.Operand = .{ .mem = .initSib(.qword, .{ + const mem_op: encoder.Instruction.Operand = .{ .mem = .initSib(.qword, .{ .base = .{ .reloc = mir_inst.data.as.sym_index }, }) }; try writer.print(" {}, {}", .{ mir_inst.data.as.air_inst, mem_op.fmt(.m) }); }, .pseudo_dbg_local_aso => { const sym_off = lower.mir.extraData(bits.SymbolOffset, mir_inst.data.ax.payload).data; - const mem_op: Instruction.Operand = .{ .mem = .initSib(.qword, .{ + const mem_op: encoder.Instruction.Operand = .{ .mem = .initSib(.qword, .{ .base = .{ .reloc = sym_off.sym_index }, .disp = sym_off.off, }) }; @@ -1184,7 +1170,7 @@ fn formatWipMir( }, .pseudo_dbg_local_aro => { const air_off = lower.mir.extraData(Mir.AirOffset, mir_inst.data.rx.payload).data; - const mem_op: Instruction.Operand = .{ .mem = .initSib(.qword, .{ + const mem_op: encoder.Instruction.Operand = .{ .mem = .initSib(.qword, .{ .base = .{ .reg = mir_inst.data.rx.r1 }, .disp = air_off.off, }) }; @@ -1192,14 +1178,14 @@ fn formatWipMir( }, .pseudo_dbg_local_af => { const frame_addr = lower.mir.extraData(bits.FrameAddr, mir_inst.data.ax.payload).data; - const mem_op: Instruction.Operand = .{ .mem = .initSib(.qword, .{ + const mem_op: encoder.Instruction.Operand = .{ .mem = .initSib(.qword, .{ .base = .{ .frame = frame_addr.index }, .disp = frame_addr.off, }) }; - try writer.print(" {}, {d}", .{ mir_inst.data.ax.air_inst, mem_op.fmt(.m) }); + try writer.print(" {}, {}", .{ mir_inst.data.ax.air_inst, mem_op.fmt(.m) }); }, .pseudo_dbg_local_am => { - const mem_op: Instruction.Operand = .{ + const mem_op: encoder.Instruction.Operand = .{ .mem = lower.mir.extraData(Mir.Memory, mir_inst.data.ax.payload).data.decode(), }; try writer.print(" {}, {}", .{ mir_inst.data.ax.air_inst, mem_op.fmt(.m) }); @@ -1221,7 +1207,7 @@ fn formatTracking( writer: anytype, ) @TypeOf(writer).Error!void { var it = data.self.inst_tracking.iterator(); - while (it.next()) |entry| try writer.print("\n%{d} = {}", .{ entry.key_ptr.*, entry.value_ptr.* }); + while (it.next()) |entry| try writer.print("\n{} = {}", .{ entry.key_ptr.*, entry.value_ptr.* }); } fn fmtTracking(self: *Self) std.fmt.Formatter(formatTracking) { return .{ .data = .{ .self = self } }; @@ -1427,7 +1413,7 @@ fn asmAirImmediate(self: *Self, tag: MirTagAir, inst: Air.Inst.Index, imm: Immed .i = @bitCast(s), } }, }), - .unsigned => |u| _ = if (math.cast(u32, u)) |small| try self.addInst(.{ + .unsigned => |u| _ = if (std.math.cast(u32, u)) |small| try self.addInst(.{ .tag = .pseudo, .ops = switch (tag) { .dbg_local => .pseudo_dbg_local_ai_u, @@ -1632,7 +1618,7 @@ fn asmRegisterRegister(self: *Self, tag: Mir.Inst.FixedTag, reg1: Register, reg2 fn asmRegisterImmediate(self: *Self, tag: Mir.Inst.FixedTag, reg: Register, imm: Immediate) !void { const ops: Mir.Inst.Ops, const i: u32 = switch (imm) { .signed => |s| .{ .ri_s, @bitCast(s) }, - .unsigned => |u| if (math.cast(u32, u)) |small| + .unsigned => |u| if (std.math.cast(u32, u)) |small| .{ .ri_u, small } else .{ .ri_64, try self.addExtra(Mir.Imm64.encode(imm.unsigned)) }, @@ -1831,8 +1817,8 @@ fn asmRegisterMemoryImmediate( imm: Immediate, ) !void { if (switch (imm) { - .signed => |s| if (math.cast(i16, s)) |x| @as(u16, @bitCast(x)) else null, - .unsigned => |u| math.cast(u16, u), + .signed => |s| if (std.math.cast(i16, s)) |x| @as(u16, @bitCast(x)) else null, + .unsigned => |u| std.math.cast(u16, u), .reloc => unreachable, }) |small_imm| { _ = try self.addInst(.{ @@ -1967,8 +1953,8 @@ fn gen(self: *Self) InnerError!void { const cc = abi.resolveCallingConvention(fn_info.cc, self.target.*); if (cc != .naked) { try self.asmRegister(.{ ._, .push }, .rbp); - try self.asmPseudoImmediate(.pseudo_cfi_adjust_cfa_offset_i_s, Immediate.s(8)); - try self.asmPseudoRegisterImmediate(.pseudo_cfi_rel_offset_ri_s, .rbp, Immediate.s(0)); + try self.asmPseudoImmediate(.pseudo_cfi_adjust_cfa_offset_i_s, .s(8)); + try self.asmPseudoRegisterImmediate(.pseudo_cfi_rel_offset_ri_s, .rbp, .s(0)); try self.asmRegisterRegister(.{ ._, .mov }, .rbp, .rsp); try self.asmPseudoRegister(.pseudo_cfi_def_cfa_register_r, .rbp); const backpatch_push_callee_preserved_regs = try self.asmPlaceholder(); @@ -2016,7 +2002,7 @@ fn gen(self: *Self) InnerError!void { .{}, ); - try self.asmRegisterImmediate(.{ ._, .cmp }, .al, Immediate.u(info.fp_count)); + try self.asmRegisterImmediate(.{ ._, .cmp }, .al, .u(info.fp_count)); const skip_sse_reloc = try self.asmJccReloc(.na, undefined); const vec_2_f64 = try pt.vectorType(.{ .len = 2, .child = .f64_type }); @@ -2055,15 +2041,15 @@ fn gen(self: *Self) InnerError!void { const backpatch_stack_dealloc = try self.asmPlaceholder(); const backpatch_pop_callee_preserved_regs = try self.asmPlaceholder(); try self.asmRegister(.{ ._, .pop }, .rbp); - try self.asmPseudoRegisterImmediate(.pseudo_cfi_def_cfa_ri_s, .rsp, Immediate.s(8)); + try self.asmPseudoRegisterImmediate(.pseudo_cfi_def_cfa_ri_s, .rsp, .s(8)); try self.asmOpOnly(.{ ._, .ret }); const frame_layout = try self.computeFrameLayout(cc); - const need_frame_align = frame_layout.stack_mask != math.maxInt(u32); + const need_frame_align = frame_layout.stack_mask != std.math.maxInt(u32); const need_stack_adjust = frame_layout.stack_adjust > 0; const need_save_reg = frame_layout.save_reg_list.count() > 0; if (need_frame_align) { - const page_align = @as(u32, math.maxInt(u32)) << 12; + const page_align = @as(u32, std.math.maxInt(u32)) << 12; self.mir_instructions.set(backpatch_frame_align, .{ .tag = .@"and", .ops = .ri_s, @@ -2170,23 +2156,18 @@ fn gen(self: *Self) InnerError!void { }); } -fn checkInvariantsAfterAirInst(self: *Self, inst: Air.Inst.Index, old_air_bookkeeping: @TypeOf(air_bookkeeping_init)) void { +fn checkInvariantsAfterAirInst(self: *Self) void { assert(!self.register_manager.lockedRegsExist()); if (std.debug.runtime_safety) { - if (self.air_bookkeeping < old_air_bookkeeping + 1) { - std.debug.panic("in codegen.zig, handling of AIR instruction %{d} ('{}') did not do proper bookkeeping. Look for a missing call to finishAir.", .{ inst, self.air.instructions.items(.tag)[@intFromEnum(inst)] }); - } - - { // check consistency of tracked registers - var it = self.register_manager.free_registers.iterator(.{ .kind = .unset }); - while (it.next()) |index| { - const tracked_inst = self.register_manager.registers[index]; - const tracking = self.getResolvedInstValue(tracked_inst); - for (tracking.getRegs()) |reg| { - if (RegisterManager.indexOfRegIntoTracked(reg).? == index) break; - } else unreachable; // tracked register not in use - } + // check consistency of tracked registers + var it = self.register_manager.free_registers.iterator(.{ .kind = .unset }); + while (it.next()) |index| { + const tracked_inst = self.register_manager.registers[index]; + const tracking = self.getResolvedInstValue(tracked_inst); + for (tracking.getRegs()) |reg| { + if (RegisterManager.indexOfRegIntoTracked(reg).? == index) break; + } else unreachable; // tracked register not in use } } } @@ -2202,6 +2183,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { const zcu = pt.zcu; const ip = &zcu.intern_pool; const air_tags = self.air.instructions.items(.tag); + const air_datas = self.air.instructions.items(.data); + const use_old = self.target.ofmt == .coff; self.arg_index = 0; for (body) |inst| switch (air_tags[@intFromEnum(inst)]) { @@ -2209,12 +2192,13 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { wip_mir_log.debug("{}", .{self.fmtAir(inst)}); verbose_tracking_log.debug("{}", .{self.fmtTracking()}); - const old_air_bookkeeping = self.air_bookkeeping; + self.reused_operands = @TypeOf(self.reused_operands).initEmpty(); try self.inst_tracking.ensureUnusedCapacity(self.gpa, 1); try self.airArg(inst); - self.checkInvariantsAfterAirInst(inst, old_air_bookkeeping); + self.resetTemps(); + self.checkInvariantsAfterAirInst(); }, else => break, }; @@ -2226,7 +2210,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { wip_mir_log.debug("{}", .{self.fmtAir(inst)}); verbose_tracking_log.debug("{}", .{self.fmtTracking()}); - const old_air_bookkeeping = self.air_bookkeeping; + self.reused_operands = @TypeOf(self.reused_operands).initEmpty(); try self.inst_tracking.ensureUnusedCapacity(self.gpa, 1); switch (air_tags[@intFromEnum(inst)]) { // zig fmt: off @@ -2260,7 +2244,6 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .sub_sat => try self.airSubSat(inst), .mul_sat => try self.airMulSat(inst), .shl_sat => try self.airShlSat(inst), - .slice => try self.airSlice(inst), .sin, .cos, @@ -2298,127 +2281,58 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .cmp_vector => try self.airCmpVector(inst), .cmp_lt_errors_len => try self.airCmpLtErrorsLen(inst), - .alloc => try self.airAlloc(inst), - .ret_ptr => try self.airRetPtr(inst), - .arg => try self.airDbgArg(inst), - .assembly => try self.airAsm(inst), - .bitcast => try self.airBitCast(inst), - .block => try self.airBlock(inst), - .br => try self.airBr(inst), - .repeat => try self.airRepeat(inst), - .switch_dispatch => try self.airSwitchDispatch(inst), - .trap => try self.airTrap(), - .breakpoint => try self.airBreakpoint(), - .ret_addr => try self.airRetAddr(inst), - .frame_addr => try self.airFrameAddress(inst), - .cond_br => try self.airCondBr(inst), - .fptrunc => try self.airFptrunc(inst), - .fpext => try self.airFpext(inst), - .intcast => try self.airIntCast(inst), - .trunc => try self.airTrunc(inst), - .int_from_bool => try self.airIntFromBool(inst), - .is_non_null => try self.airIsNonNull(inst), - .is_non_null_ptr => try self.airIsNonNullPtr(inst), - .is_null => try self.airIsNull(inst), - .is_null_ptr => try self.airIsNullPtr(inst), - .is_non_err => try self.airIsNonErr(inst), - .is_non_err_ptr => try self.airIsNonErrPtr(inst), - .is_err => try self.airIsErr(inst), - .is_err_ptr => try self.airIsErrPtr(inst), - .load => try self.airLoad(inst), - .loop => try self.airLoop(inst), - .int_from_ptr => try self.airIntFromPtr(inst), - .ret => try self.airRet(inst, false), - .ret_safe => try self.airRet(inst, true), - .ret_load => try self.airRetLoad(inst), - .store => try self.airStore(inst, false), - .store_safe => try self.airStore(inst, true), - .struct_field_ptr=> try self.airStructFieldPtr(inst), - .struct_field_val=> try self.airStructFieldVal(inst), - .array_to_slice => try self.airArrayToSlice(inst), - .float_from_int => try self.airFloatFromInt(inst), - .int_from_float => try self.airIntFromFloat(inst), - .cmpxchg_strong => try self.airCmpxchg(inst), - .cmpxchg_weak => try self.airCmpxchg(inst), - .atomic_rmw => try self.airAtomicRmw(inst), - .atomic_load => try self.airAtomicLoad(inst), - .memcpy => try self.airMemcpy(inst), - .memset => try self.airMemset(inst, false), - .memset_safe => try self.airMemset(inst, true), - .set_union_tag => try self.airSetUnionTag(inst), - .get_union_tag => try self.airGetUnionTag(inst), - .clz => try self.airClz(inst), - .ctz => try self.airCtz(inst), - .popcount => try self.airPopCount(inst), - .byte_swap => try self.airByteSwap(inst), - .bit_reverse => try self.airBitReverse(inst), - .tag_name => try self.airTagName(inst), - .error_name => try self.airErrorName(inst), - .splat => try self.airSplat(inst), - .select => try self.airSelect(inst), - .shuffle => try self.airShuffle(inst), - .reduce => try self.airReduce(inst), - .aggregate_init => try self.airAggregateInit(inst), - .union_init => try self.airUnionInit(inst), - .prefetch => try self.airPrefetch(inst), - .mul_add => try self.airMulAdd(inst), - .addrspace_cast => return self.fail("TODO implement addrspace_cast", .{}), - - .@"try" => try self.airTry(inst), - .try_cold => try self.airTry(inst), // TODO - .try_ptr => try self.airTryPtr(inst), - .try_ptr_cold => try self.airTryPtr(inst), // TODO - - .dbg_stmt => try self.airDbgStmt(inst), - .dbg_empty_stmt => try self.airDbgEmptyStmt(), - .dbg_inline_block => try self.airDbgInlineBlock(inst), - .dbg_var_ptr, - .dbg_var_val, - .dbg_arg_inline, - => try self.airDbgVar(inst), - - .call => try self.airCall(inst, .auto), - .call_always_tail => try self.airCall(inst, .always_tail), - .call_never_tail => try self.airCall(inst, .never_tail), - .call_never_inline => try self.airCall(inst, .never_inline), + .bitcast => try self.airBitCast(inst), + .fptrunc => try self.airFptrunc(inst), + .fpext => try self.airFpext(inst), + .intcast => try self.airIntCast(inst), + .trunc => try self.airTrunc(inst), + .is_non_null => try self.airIsNonNull(inst), + .is_null => try self.airIsNull(inst), + .is_non_err => try self.airIsNonErr(inst), + .is_err => try self.airIsErr(inst), + .load => try self.airLoad(inst), + .store => try self.airStore(inst, false), + .store_safe => try self.airStore(inst, true), + .struct_field_val => try self.airStructFieldVal(inst), + .float_from_int => try self.airFloatFromInt(inst), + .int_from_float => try self.airIntFromFloat(inst), + .cmpxchg_strong => try self.airCmpxchg(inst), + .cmpxchg_weak => try self.airCmpxchg(inst), + .atomic_rmw => try self.airAtomicRmw(inst), + .atomic_load => try self.airAtomicLoad(inst), + .memcpy => try self.airMemcpy(inst), + .memset => try self.airMemset(inst, false), + .memset_safe => try self.airMemset(inst, true), + .set_union_tag => try self.airSetUnionTag(inst), + .get_union_tag => try self.airGetUnionTag(inst), + .clz => try self.airClz(inst), + .ctz => try self.airCtz(inst), + .popcount => try self.airPopCount(inst), + .byte_swap => try self.airByteSwap(inst), + .bit_reverse => try self.airBitReverse(inst), + .tag_name => try self.airTagName(inst), + .error_name => try self.airErrorName(inst), + .splat => try self.airSplat(inst), + .select => try self.airSelect(inst), + .shuffle => try self.airShuffle(inst), + .reduce => try self.airReduce(inst), + .aggregate_init => try self.airAggregateInit(inst), + .union_init => try self.airUnionInit(inst), + .prefetch => try self.airPrefetch(inst), + .mul_add => try self.airMulAdd(inst), .atomic_store_unordered => try self.airAtomicStore(inst, .unordered), .atomic_store_monotonic => try self.airAtomicStore(inst, .monotonic), .atomic_store_release => try self.airAtomicStore(inst, .release), .atomic_store_seq_cst => try self.airAtomicStore(inst, .seq_cst), - .struct_field_ptr_index_0 => try self.airStructFieldPtrIndex(inst, 0), - .struct_field_ptr_index_1 => try self.airStructFieldPtrIndex(inst, 1), - .struct_field_ptr_index_2 => try self.airStructFieldPtrIndex(inst, 2), - .struct_field_ptr_index_3 => try self.airStructFieldPtrIndex(inst, 3), - - .field_parent_ptr => try self.airFieldParentPtr(inst), - - .switch_br => try self.airSwitchBr(inst), - .loop_switch_br => try self.airLoopSwitchBr(inst), - .slice_ptr => try self.airSlicePtr(inst), - .slice_len => try self.airSliceLen(inst), - - .ptr_slice_len_ptr => try self.airPtrSliceLenPtr(inst), - .ptr_slice_ptr_ptr => try self.airPtrSlicePtrPtr(inst), - .array_elem_val => try self.airArrayElemVal(inst), .slice_elem_val => try self.airSliceElemVal(inst), - .slice_elem_ptr => try self.airSliceElemPtr(inst), .ptr_elem_val => try self.airPtrElemVal(inst), - .ptr_elem_ptr => try self.airPtrElemPtr(inst), - - .inferred_alloc, .inferred_alloc_comptime => unreachable, - .unreach => self.finishAirBookkeeping(), .optional_payload => try self.airOptionalPayload(inst), - .optional_payload_ptr => try self.airOptionalPayloadPtr(inst), - .optional_payload_ptr_set => try self.airOptionalPayloadPtrSet(inst), .unwrap_errunion_err => try self.airUnwrapErrUnionErr(inst), .unwrap_errunion_payload => try self.airUnwrapErrUnionPayload(inst), - .unwrap_errunion_err_ptr => try self.airUnwrapErrUnionErrPtr(inst), - .unwrap_errunion_payload_ptr=> try self.airUnwrapErrUnionPayloadPtr(inst), - .errunion_payload_ptr_set => try self.airErrUnionPayloadPtrSet(inst), .err_return_trace => try self.airErrReturnTrace(inst), .set_err_return_trace => try self.airSetErrReturnTrace(inst), .save_err_return_trace_index=> try self.airSaveErrReturnTraceIndex(inst), @@ -2426,7 +2340,12 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .wrap_optional => try self.airWrapOptional(inst), .wrap_errunion_payload => try self.airWrapErrUnionPayload(inst), .wrap_errunion_err => try self.airWrapErrUnionErr(inst), + // zig fmt: on + .add_safe, + .sub_safe, + .mul_safe, + => return self.fail("TODO implement safety_checked_instructions", .{}), .add_optimized, .sub_optimized, .mul_optimized, @@ -2448,13 +2367,429 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .int_from_float_optimized, => return self.fail("TODO implement optimized float mode", .{}), - .add_safe, - .sub_safe, - .mul_safe, - => return self.fail("TODO implement safety_checked_instructions", .{}), + .arg => try self.airDbgArg(inst), + .alloc => if (use_old) try self.airAlloc(inst) else { + var slot = try self.tempFromValue(self.typeOfIndex(inst), .{ .lea_frame = .{ + .index = try self.allocMemPtr(inst), + } }); + try slot.moveTo(inst, self); + }, + .inferred_alloc => unreachable, + .inferred_alloc_comptime => unreachable, + .ret_ptr => if (use_old) try self.airRetPtr(inst) else { + var slot = switch (self.ret_mcv.long) { + else => unreachable, + .none => try self.tempFromValue(self.typeOfIndex(inst), .{ .lea_frame = .{ + .index = try self.allocMemPtr(inst), + } }), + .load_frame => slot: { + var slot = try self.tempFromValue(self.typeOfIndex(inst), self.ret_mcv.long); + try slot.toOffset(self.ret_mcv.short.indirect.off, self); + break :slot slot; + }, + }; + try slot.moveTo(inst, self); + }, + .assembly => try self.airAsm(inst), + .block => if (use_old) try self.airBlock(inst) else { + const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; + const extra = self.air.extraData(Air.Block, ty_pl.payload); + try self.asmPseudo(.pseudo_dbg_enter_block_none); + try self.lowerBlock(inst, @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len])); + try self.asmPseudo(.pseudo_dbg_leave_block_none); + }, + .loop => if (use_old) try self.airLoop(inst) else { + const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; + const extra = self.air.extraData(Air.Block, ty_pl.payload); + self.scope_generation += 1; + try self.loops.putNoClobber(self.gpa, inst, .{ + .state = try self.saveState(), + .target = @intCast(self.mir_instructions.len), + }); + defer assert(self.loops.remove(inst)); + try self.genBodyBlock(@ptrCast(self.air.extra[extra.end..][0..extra.data.body_len])); + }, + .repeat => if (use_old) try self.airRepeat(inst) else { + const repeat = air_datas[@intFromEnum(inst)].repeat; + const loop = self.loops.get(repeat.loop_inst).?; + try self.restoreState(loop.state, &.{}, .{ + .emit_instructions = true, + .update_tracking = false, + .resurrect = false, + .close_scope = true, + }); + _ = try self.asmJmpReloc(loop.target); + }, + .br => try self.airBr(inst), + .trap => try self.asmOpOnly(.{ ._, .ud2 }), + .breakpoint => try self.asmOpOnly(.{ ._, .int3 }), + .ret_addr => if (use_old) try self.airRetAddr(inst) else { + var slot = try self.tempFromValue(self.typeOfIndex(inst), .{ .load_frame = .{ + .index = .ret_addr, + } }); + while (try slot.toAnyReg(self)) {} + try slot.moveTo(inst, self); + }, + .frame_addr => if (use_old) try self.airFrameAddress(inst) else { + var slot = try self.tempFromValue(self.typeOfIndex(inst), .{ .lea_frame = .{ + .index = .base_ptr, + } }); + try slot.moveTo(inst, self); + }, + .call => try self.airCall(inst, .auto), + .call_always_tail => try self.airCall(inst, .always_tail), + .call_never_tail => try self.airCall(inst, .never_tail), + .call_never_inline => try self.airCall(inst, .never_inline), + + .cond_br => try self.airCondBr(inst), + .switch_br => try self.airSwitchBr(inst), + .loop_switch_br => try self.airLoopSwitchBr(inst), + .switch_dispatch => try self.airSwitchDispatch(inst), + .@"try", .try_cold => try self.airTry(inst), + .try_ptr, .try_ptr_cold => try self.airTryPtr(inst), + .dbg_stmt => if (use_old) try self.airDbgStmt(inst) else { + const dbg_stmt = air_datas[@intFromEnum(inst)].dbg_stmt; + _ = try self.addInst(.{ + .tag = .pseudo, + .ops = .pseudo_dbg_line_line_column, + .data = .{ .line_column = .{ + .line = dbg_stmt.line, + .column = dbg_stmt.column, + } }, + }); + }, + .dbg_empty_stmt => if (use_old) try self.airDbgEmptyStmt() else { + if (self.mir_instructions.len > 0) { + const prev_mir_op = &self.mir_instructions.items(.ops)[self.mir_instructions.len - 1]; + if (prev_mir_op.* == .pseudo_dbg_line_stmt_line_column) + prev_mir_op.* = .pseudo_dbg_line_line_column; + } + try self.asmOpOnly(.{ ._, .nop }); + }, + .dbg_inline_block => if (use_old) try self.airDbgInlineBlock(inst) else { + const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; + const extra = self.air.extraData(Air.DbgInlineBlock, ty_pl.payload); + const old_inline_func = self.inline_func; + defer self.inline_func = old_inline_func; + self.inline_func = extra.data.func; + _ = try self.addInst(.{ + .tag = .pseudo, + .ops = .pseudo_dbg_enter_inline_func, + .data = .{ .func = extra.data.func }, + }); + try self.lowerBlock(inst, @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len])); + _ = try self.addInst(.{ + .tag = .pseudo, + .ops = .pseudo_dbg_leave_inline_func, + .data = .{ .func = old_inline_func }, + }); + }, + .dbg_var_ptr, .dbg_var_val, .dbg_arg_inline => if (use_old) try self.airDbgVar(inst) else { + const pl_op = air_datas[@intFromEnum(inst)].pl_op; + var ops = try self.tempsFromOperands(inst, .{pl_op.operand}); + try self.genLocalDebugInfo(inst, ops[0].tracking(self).short); + try ops[0].die(self); + }, + .is_null_ptr => if (use_old) try self.airIsNullPtr(inst) else { + const un_op = air_datas[@intFromEnum(inst)].un_op; + const opt_ty = self.typeOf(un_op).childType(zcu); + const opt_repr_is_pl = opt_ty.optionalReprIsPayload(zcu); + const opt_child_ty = opt_ty.optionalChild(zcu); + const opt_child_abi_size: u31 = @intCast(opt_child_ty.abiSize(zcu)); + var ops = try self.tempsFromOperands(inst, .{un_op}); + if (!opt_repr_is_pl) try ops[0].toOffset(opt_child_abi_size, self); + while (try ops[0].toLea(self)) {} + try self.asmMemoryImmediate( + .{ ._, .cmp }, + try ops[0].tracking(self).short.deref().mem(self, if (!opt_repr_is_pl) + .byte + else if (opt_child_ty.isSlice(zcu)) + .qword + else + Memory.Size.fromSize(opt_child_abi_size)), + .u(0), + ); + var is_null = try self.tempFromValue(self.typeOfIndex(inst), .{ .eflags = .e }); + try ops[0].die(self); + try is_null.moveTo(inst, self); + }, + .is_non_null_ptr => if (use_old) try self.airIsNonNullPtr(inst) else { + const un_op = air_datas[@intFromEnum(inst)].un_op; + const opt_ty = self.typeOf(un_op).childType(zcu); + const opt_repr_is_pl = opt_ty.optionalReprIsPayload(zcu); + const opt_child_ty = opt_ty.optionalChild(zcu); + const opt_child_abi_size: u31 = @intCast(opt_child_ty.abiSize(zcu)); + var ops = try self.tempsFromOperands(inst, .{un_op}); + if (!opt_repr_is_pl) try ops[0].toOffset(opt_child_abi_size, self); + while (try ops[0].toLea(self)) {} + try self.asmMemoryImmediate( + .{ ._, .cmp }, + try ops[0].tracking(self).short.deref().mem(self, if (!opt_repr_is_pl) + .byte + else if (opt_child_ty.isSlice(zcu)) + .qword + else + Memory.Size.fromSize(opt_child_abi_size)), + .u(0), + ); + var is_non_null = try self.tempFromValue(self.typeOfIndex(inst), .{ .eflags = .ne }); + try ops[0].die(self); + try is_non_null.moveTo(inst, self); + }, + .is_err_ptr => if (use_old) try self.airIsErrPtr(inst) else { + const un_op = air_datas[@intFromEnum(inst)].un_op; + const eu_ty = self.typeOf(un_op).childType(zcu); + const eu_err_ty = eu_ty.errorUnionSet(zcu); + const eu_pl_ty = eu_ty.errorUnionPayload(zcu); + const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu)); + var ops = try self.tempsFromOperands(inst, .{un_op}); + try ops[0].toOffset(eu_err_off, self); + while (try ops[0].toLea(self)) {} + try self.asmMemoryImmediate( + .{ ._, .cmp }, + try ops[0].tracking(self).short.deref().mem(self, self.memSize(eu_err_ty)), + .u(0), + ); + var is_err = try self.tempFromValue(self.typeOfIndex(inst), .{ .eflags = .ne }); + try ops[0].die(self); + try is_err.moveTo(inst, self); + }, + .is_non_err_ptr => if (use_old) try self.airIsNonErrPtr(inst) else { + const un_op = air_datas[@intFromEnum(inst)].un_op; + const eu_ty = self.typeOf(un_op).childType(zcu); + const eu_err_ty = eu_ty.errorUnionSet(zcu); + const eu_pl_ty = eu_ty.errorUnionPayload(zcu); + const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu)); + var ops = try self.tempsFromOperands(inst, .{un_op}); + try ops[0].toOffset(eu_err_off, self); + while (try ops[0].toLea(self)) {} + try self.asmMemoryImmediate( + .{ ._, .cmp }, + try ops[0].tracking(self).short.deref().mem(self, self.memSize(eu_err_ty)), + .u(0), + ); + var is_non_err = try self.tempFromValue(self.typeOfIndex(inst), .{ .eflags = .e }); + try ops[0].die(self); + try is_non_err.moveTo(inst, self); + }, + .int_from_ptr => if (use_old) try self.airIntFromPtr(inst) else { + const un_op = air_datas[@intFromEnum(inst)].un_op; + var ops = try self.tempsFromOperands(inst, .{un_op}); + try ops[0].toLimb(0, self); + try ops[0].moveTo(inst, self); + }, + .int_from_bool => if (use_old) try self.airIntFromBool(inst) else { + const un_op = air_datas[@intFromEnum(inst)].un_op; + var ops = try self.tempsFromOperands(inst, .{un_op}); + try ops[0].moveTo(inst, self); + }, + .ret => try self.airRet(inst, false), + .ret_safe => try self.airRet(inst, true), + .ret_load => try self.airRetLoad(inst), + .unreach => {}, + .optional_payload_ptr => if (use_old) try self.airOptionalPayloadPtr(inst) else { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].moveTo(inst, self); + }, + .optional_payload_ptr_set => if (use_old) try self.airOptionalPayloadPtrSet(inst) else { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + const opt_ty = self.typeOf(ty_op.operand).childType(zcu); + var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); + if (!opt_ty.optionalReprIsPayload(zcu)) { + const opt_child_ty = opt_ty.optionalChild(zcu); + const opt_child_abi_size: i32 = @intCast(opt_child_ty.abiSize(zcu)); + try ops[0].toOffset(opt_child_abi_size, self); + var has_value = try self.tempFromValue(Type.bool, .{ .immediate = 1 }); + try ops[0].store(&has_value, self); + try has_value.die(self); + try ops[0].toOffset(-opt_child_abi_size, self); + } + try ops[0].moveTo(inst, self); + }, + .unwrap_errunion_payload_ptr => if (use_old) try self.airUnwrapErrUnionPayloadPtr(inst) else { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + const eu_ty = self.typeOf(ty_op.operand).childType(zcu); + const eu_pl_ty = eu_ty.errorUnionPayload(zcu); + const eu_pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(eu_pl_ty, zcu)); + var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toOffset(eu_pl_off, self); + try ops[0].moveTo(inst, self); + }, + .unwrap_errunion_err_ptr => if (use_old) try self.airUnwrapErrUnionErrPtr(inst) else { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + const eu_ty = self.typeOf(ty_op.operand).childType(zcu); + const eu_pl_ty = eu_ty.errorUnionPayload(zcu); + const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu)); + var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toOffset(eu_err_off, self); + var err = try ops[0].load(eu_ty.errorUnionSet(zcu), self); + try ops[0].die(self); + try err.moveTo(inst, self); + }, + .errunion_payload_ptr_set => if (use_old) try self.airErrUnionPayloadPtrSet(inst) else { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + const eu_ty = self.typeOf(ty_op.operand).childType(zcu); + const eu_err_ty = eu_ty.errorUnionSet(zcu); + const eu_pl_ty = eu_ty.errorUnionPayload(zcu); + const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu)); + const eu_pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(eu_pl_ty, zcu)); + var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toOffset(eu_err_off, self); + var no_err = try self.tempFromValue(eu_err_ty, .{ .immediate = 0 }); + try ops[0].store(&no_err, self); + try no_err.die(self); + try ops[0].toOffset(eu_pl_off - eu_err_off, self); + try ops[0].moveTo(inst, self); + }, + .struct_field_ptr => if (use_old) try self.airStructFieldPtr(inst) else { + const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; + const extra = self.air.extraData(Air.StructField, ty_pl.payload).data; + var ops = try self.tempsFromOperands(inst, .{extra.struct_operand}); + try ops[0].toOffset(self.fieldOffset(self.typeOf(extra.struct_operand), self.typeOfIndex(inst), extra.field_index), self); + try ops[0].moveTo(inst, self); + }, + .struct_field_ptr_index_0 => if (use_old) try self.airStructFieldPtrIndex(inst, 0) else { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toOffset(self.fieldOffset(self.typeOf(ty_op.operand), self.typeOfIndex(inst), 0), self); + try ops[0].moveTo(inst, self); + }, + .struct_field_ptr_index_1 => if (use_old) try self.airStructFieldPtrIndex(inst, 1) else { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toOffset(self.fieldOffset(self.typeOf(ty_op.operand), self.typeOfIndex(inst), 1), self); + try ops[0].moveTo(inst, self); + }, + .struct_field_ptr_index_2 => if (use_old) try self.airStructFieldPtrIndex(inst, 2) else { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toOffset(self.fieldOffset(self.typeOf(ty_op.operand), self.typeOfIndex(inst), 2), self); + try ops[0].moveTo(inst, self); + }, + .struct_field_ptr_index_3 => if (use_old) try self.airStructFieldPtrIndex(inst, 3) else { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toOffset(self.fieldOffset(self.typeOf(ty_op.operand), self.typeOfIndex(inst), 3), self); + try ops[0].moveTo(inst, self); + }, + .slice => if (use_old) try self.airSlice(inst) else { + const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; + const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; + var ops = try self.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); + try ops[0].toPair(&ops[1], self); + try ops[0].moveTo(inst, self); + }, + .slice_len => if (use_old) try self.airSliceLen(inst) else { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toLimb(1, self); + try ops[0].moveTo(inst, self); + }, + .slice_ptr => if (use_old) try self.airSlicePtr(inst) else { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toLimb(0, self); + try ops[0].moveTo(inst, self); + }, + .ptr_slice_len_ptr => if (use_old) try self.airPtrSliceLenPtr(inst) else { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toOffset(8, self); + try ops[0].moveTo(inst, self); + }, + .ptr_slice_ptr_ptr => if (use_old) try self.airPtrSlicePtrPtr(inst) else { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toOffset(0, self); + try ops[0].moveTo(inst, self); + }, + .slice_elem_ptr, .ptr_elem_ptr => |tag| if (use_old) switch (tag) { + else => unreachable, + .slice_elem_ptr => try self.airSliceElemPtr(inst), + .ptr_elem_ptr => try self.airPtrElemPtr(inst), + } else { + const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; + const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; + var ops = try self.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); + switch (tag) { + else => unreachable, + .slice_elem_ptr => try ops[0].toLimb(0, self), + .ptr_elem_ptr => {}, + } + const dst_ty = self.typeOfIndex(inst); + if (dst_ty.ptrInfo(zcu).flags.vector_index == .none) zero_offset: { + const elem_size = dst_ty.childType(zcu).abiSize(zcu); + if (elem_size == 0) break :zero_offset; + while (true) for (&ops) |*op| { + if (try op.toAnyReg(self)) break; + } else break; + const lhs_reg = ops[0].unwrap(self).temp.tracking(self).short.register.to64(); + const rhs_reg = ops[1].unwrap(self).temp.tracking(self).short.register.to64(); + if (!std.math.isPowerOfTwo(elem_size)) { + try self.spillEflagsIfOccupied(); + try self.asmRegisterRegisterImmediate( + .{ .i_, .mul }, + rhs_reg, + rhs_reg, + .u(elem_size), + ); + try self.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ + .base = .{ .reg = lhs_reg }, + .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, + }); + } else if (elem_size > 8) { + try self.spillEflagsIfOccupied(); + try self.asmRegisterImmediate( + .{ ._l, .sh }, + rhs_reg, + .u(std.math.log2_int(u64, elem_size)), + ); + try self.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ + .base = .{ .reg = lhs_reg }, + .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, + }); + } else try self.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ + .base = .{ .reg = lhs_reg }, + .mod = .{ .rm = .{ + .size = .qword, + .index = rhs_reg, + .scale = .fromFactor(@intCast(elem_size)), + } }, + }); + } + try ops[1].die(self); + try ops[0].moveTo(inst, self); + }, + .array_to_slice => if (use_old) try self.airArrayToSlice(inst) else { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); + var len = try self.tempFromValue(Type.usize, .{ + .immediate = self.typeOf(ty_op.operand).childType(zcu).arrayLen(zcu), + }); + try ops[0].toPair(&len, self); + try ops[0].moveTo(inst, self); + }, + .error_set_has_value => return self.fail("TODO implement error_set_has_value", .{}), + .field_parent_ptr => if (use_old) try self.airFieldParentPtr(inst) else { + const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; + const extra = self.air.extraData(Air.FieldParentPtr, ty_pl.payload).data; + var ops = try self.tempsFromOperands(inst, .{extra.field_ptr}); + try ops[0].toOffset(-self.fieldOffset(self.typeOfIndex(inst), self.typeOf(extra.field_ptr), extra.field_index), self); + try ops[0].moveTo(inst, self); + }, .is_named_enum_value => return self.fail("TODO implement is_named_enum_value", .{}), - .error_set_has_value => return self.fail("TODO implement error_set_has_value", .{}), + + .wasm_memory_size => unreachable, + .wasm_memory_grow => unreachable, + + .addrspace_cast => { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].moveTo(inst, self); + }, + .vector_store_elem => return self.fail("TODO implement vector_store_elem", .{}), .c_va_arg => try self.airVaArg(inst), @@ -2462,15 +2797,12 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .c_va_end => try self.airVaEnd(inst), .c_va_start => try self.airVaStart(inst), - .wasm_memory_size => unreachable, - .wasm_memory_grow => unreachable, - .work_item_id => unreachable, .work_group_size => unreachable, .work_group_id => unreachable, - // zig fmt: on } - self.checkInvariantsAfterAirInst(inst, old_air_bookkeeping); + self.resetTemps(); + self.checkInvariantsAfterAirInst(); } verbose_tracking_log.debug("{}", .{self.fmtTracking()}); } @@ -2530,7 +2862,7 @@ fn genLazy(self: *Self, lazy_sym: link.File.LazySymbol) InnerError!void { data_off += @intCast(tag_name_len + 1); } - try self.airTrap(); + try self.asmOpOnly(.{ ._, .ud2 }); for (exitlude_jump_relocs) |reloc| self.performReloc(reloc); try self.asmOpOnly(.{ ._, .ret }); @@ -2544,6 +2876,10 @@ fn genLazy(self: *Self, lazy_sym: link.File.LazySymbol) InnerError!void { fn getValue(self: *Self, value: MCValue, inst: ?Air.Inst.Index) !void { for (value.getRegs()) |reg| try self.register_manager.getReg(reg, inst); + switch (value) { + else => {}, + .eflags, .register_overflow => self.eflags_inst = inst, + } } fn getValueIfFree(self: *Self, value: MCValue, inst: ?Air.Inst.Index) void { @@ -2577,26 +2913,18 @@ fn processDeath(self: *Self, inst: Air.Inst.Index) !void { try self.inst_tracking.getPtr(inst).?.die(self, inst); } -/// Called when there are no operands, and the instruction is always unreferenced. -fn finishAirBookkeeping(self: *Self) void { - if (std.debug.runtime_safety) { - self.air_bookkeeping += 1; - } -} - fn finishAirResult(self: *Self, inst: Air.Inst.Index, result: MCValue) void { if (self.liveness.isUnused(inst) and self.air.instructions.items(.tag)[@intFromEnum(inst)] != .arg) switch (result) { .none, .dead, .unreach => {}, else => unreachable, // Why didn't the result die? } else { - tracking_log.debug("%{d} => {} (birth)", .{ inst, result }); + tracking_log.debug("{} => {} (birth)", .{ inst, result }); self.inst_tracking.putAssumeCapacityNoClobber(inst, InstTracking.init(result)); // In some cases, an operand may be reused as the result. // If that operand died and was a register, it was freed by // processDeath, so we have to "re-allocate" the register. self.getValueIfFree(result, inst); } - self.finishAirBookkeeping(); } fn finishAir( @@ -2605,11 +2933,10 @@ fn finishAir( result: MCValue, operands: [Liveness.bpi - 1]Air.Inst.Ref, ) !void { - var tomb_bits = self.liveness.getTombBits(inst); - for (operands) |op| { - const dies = @as(u1, @truncate(tomb_bits)) != 0; - tomb_bits >>= 1; - if (!dies) continue; + const tomb_bits = self.liveness.getTombBits(inst); + for (0.., operands) |op_index, op| { + if (tomb_bits & @as(Liveness.Bpi, 1) << @intCast(op_index) == 0) continue; + if (self.reused_operands.isSet(op_index)) continue; try self.processDeath(op.toIndexAllowNone() orelse continue); } self.finishAirResult(inst, result); @@ -2657,7 +2984,7 @@ fn computeFrameLayout(self: *Self, cc: std.builtin.CallingConvention) !FrameLayo } }; const sort_context = SortContext{ .frame_align = frame_align }; - mem.sort(FrameIndex, stack_frame_order, sort_context, SortContext.lessThan); + std.mem.sort(FrameIndex, stack_frame_order, sort_context, SortContext.lessThan); } const call_frame_align = frame_align[@intFromEnum(FrameIndex.call_frame)]; @@ -2697,13 +3024,13 @@ fn computeFrameLayout(self: *Self, cc: std.builtin.CallingConvention) !FrameLayo @intCast(rsp_offset - frame_offset[@intFromEnum(FrameIndex.stack_frame)]); return .{ - .stack_mask = @as(u32, math.maxInt(u32)) << @intCast(if (need_align_stack) @intFromEnum(needed_align) else 0), + .stack_mask = @as(u32, std.math.maxInt(u32)) << @intCast(if (need_align_stack) @intFromEnum(needed_align) else 0), .stack_adjust = @intCast(rsp_offset - frame_offset[@intFromEnum(FrameIndex.call_frame)]), .save_reg_list = save_reg_list, }; } -fn getFrameAddrAlignment(self: *Self, frame_addr: bits.FrameAddr) Alignment { +fn getFrameAddrAlignment(self: *Self, frame_addr: bits.FrameAddr) InternPool.Alignment { const alloc_align = self.frame_allocs.get(@intFromEnum(frame_addr.index)).abi_align; return @enumFromInt(@min(@intFromEnum(alloc_align), @ctz(frame_addr.off))); } @@ -2741,7 +3068,7 @@ fn allocMemPtr(self: *Self, inst: Air.Inst.Index) !FrameIndex { const ptr_ty = self.typeOfIndex(inst); const val_ty = ptr_ty.childType(zcu); return self.allocFrameIndex(FrameAlloc.init(.{ - .size = math.cast(u32, val_ty.abiSize(zcu)) orelse { + .size = std.math.cast(u32, val_ty.abiSize(zcu)) orelse { return self.fail("type '{}' too big to fit into stack frame", .{val_ty.fmt(pt)}); }, .alignment = ptr_ty.ptrAlignment(zcu).max(.@"1"), @@ -2759,7 +3086,7 @@ fn allocTempRegOrMem(self: *Self, elem_ty: Type, reg_ok: bool) !MCValue { fn allocRegOrMemAdvanced(self: *Self, ty: Type, inst: ?Air.Inst.Index, reg_ok: bool) !MCValue { const pt = self.pt; const zcu = pt.zcu; - const abi_size = math.cast(u32, ty.abiSize(zcu)) orelse { + const abi_size = std.math.cast(u32, ty.abiSize(zcu)) orelse { return self.fail("type '{}' too big to fit into stack frame", .{ty.fmt(pt)}); }; @@ -2857,8 +3184,8 @@ fn restoreState(self: *Self, state: State, deaths: []const Air.Inst.Index, compt } if (opts.resurrect) for ( - self.inst_tracking.keys()[0..state.inst_tracking_len], - self.inst_tracking.values()[0..state.inst_tracking_len], + self.inst_tracking.keys()[Temp.Index.max..state.inst_tracking_len], + self.inst_tracking.values()[Temp.Index.max..state.inst_tracking_len], ) |inst, *tracking| tracking.resurrect(inst, state.scope_generation); for (deaths) |death| try self.processDeath(death); @@ -3067,7 +3394,7 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { .{ .v_, .cvtps2ph }, dst_reg, mat_src_reg.to128(), - Immediate.u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), + .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), ); }, else => unreachable, @@ -3267,7 +3594,7 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { const dst_elem_abi_size = dst_ty.childType(zcu).abiSize(zcu); const src_elem_abi_size = src_ty.childType(zcu).abiSize(zcu); - switch (math.order(dst_elem_abi_size, src_elem_abi_size)) { + switch (std.math.order(dst_elem_abi_size, src_elem_abi_size)) { .lt => { const mir_tag: Mir.Inst.FixedTag = switch (dst_elem_abi_size) { else => break :result null, @@ -3431,8 +3758,8 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { }; const dst_mcv = if (dst_int_info.bits <= src_storage_bits and - math.divCeil(u16, dst_int_info.bits, 64) catch unreachable == - math.divCeil(u32, src_storage_bits, 64) catch unreachable and + std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable == + std.math.divCeil(u32, src_storage_bits, 64) catch unreachable and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: { const dst_mcv = try self.allocRegOrMem(inst, true); try self.genCopy(min_ty, dst_mcv, src_mcv, .{}); @@ -3449,8 +3776,8 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { break :result .{ .register = registerAlias(dst_mcv.getReg().?, dst_abi_size) }; } - const src_limbs_len = math.divCeil(u16, src_int_info.bits, 64) catch unreachable; - const dst_limbs_len = math.divCeil(u16, dst_int_info.bits, 64) catch unreachable; + const src_limbs_len = std.math.divCeil(u16, src_int_info.bits, 64) catch unreachable; + const dst_limbs_len = std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable; const high_mcv: MCValue = if (dst_mcv.isMemory()) dst_mcv.address().offset((src_limbs_len - 1) * 8).deref() @@ -3570,7 +3897,7 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { const dst_info = dst_elem_ty.intInfo(zcu); const src_info = src_elem_ty.intInfo(zcu); - const mask_val = try pt.intValue(src_elem_ty, @as(u64, math.maxInt(u64)) >> @intCast(64 - dst_info.bits)); + const mask_val = try pt.intValue(src_elem_ty, @as(u64, std.math.maxInt(u64)) >> @intCast(64 - dst_info.bits)); const splat_ty = try pt.vectorType(.{ .len = @intCast(@divExact(@as(u64, if (src_abi_size > 16) 256 else 128), src_info.bits)), @@ -3607,7 +3934,7 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { .{ if (self.hasFeature(.avx2)) .v_i128 else .v_f128, .extract }, registerAlias(temp_reg, dst_abi_size), dst_alias, - Immediate.u(1), + .u(1), ); try self.asmRegisterRegisterRegister( mir_tag, @@ -3806,7 +4133,7 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void { try self.asmMemoryImmediate( .{ ._, .mov }, .{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{ .size = .qword } } }, - Immediate.u(0), + .u(0), ); const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); @@ -3920,11 +4247,7 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void { .mod = .{ .rm = .{ .size = .qword } }, }, ); - try self.asmRegisterImmediate( - .{ ._, .sbb }, - dst_mcv.register_pair[1], - Immediate.u(0), - ); + try self.asmRegisterImmediate(.{ ._, .sbb }, dst_mcv.register_pair[1], .u(0)); try self.freeValue( .{ .load_frame = .{ .index = signed_div_floor_state.frame_index } }, ); @@ -4068,7 +4391,7 @@ fn airAddSat(self: *Self, inst: Air.Inst.Index) !void { break :cc .o; } else cc: { try self.genSetReg(limit_reg, ty, .{ - .immediate = @as(u64, math.maxInt(u64)) >> @intCast(64 - ty.bitSize(zcu)), + .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - ty.bitSize(zcu)), }, .{}); try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, rhs_mcv); @@ -4266,12 +4589,12 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void { mat_rhs_mcv.register_pair[1], ); - try self.asmRegisterImmediate(.{ ._r, .sa }, tmp_reg, Immediate.u(63)); + try self.asmRegisterImmediate(.{ ._r, .sa }, tmp_reg, .u(63)); try self.asmRegister(.{ ._, .not }, tmp_reg); - try self.asmMemoryImmediate(.{ ._, .cmp }, try overflow.mem(self, .dword), Immediate.s(0)); + try self.asmMemoryImmediate(.{ ._, .cmp }, try overflow.mem(self, .dword), .s(0)); try self.freeValue(overflow); try self.asmCmovccRegisterRegister(.ne, dst_mcv.register_pair[0], tmp_reg); - try self.asmRegisterImmediate(.{ ._c, .bt }, tmp_reg, Immediate.u(63)); + try self.asmRegisterImmediate(.{ ._c, .bt }, tmp_reg, .u(63)); try self.asmCmovccRegisterRegister(.ne, dst_mcv.register_pair[1], tmp_reg); break :result dst_mcv; } @@ -4321,7 +4644,7 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void { break :cc .o; } else cc: { try self.genSetReg(limit_reg, ty, .{ - .immediate = @as(u64, math.maxInt(u64)) >> @intCast(64 - reg_bits), + .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - reg_bits), }, .{}); break :cc .c; }; @@ -4366,7 +4689,7 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void { }; const tuple_ty = self.typeOfIndex(inst); - if (int_info.bits >= 8 and math.isPowerOfTwo(int_info.bits)) { + if (int_info.bits >= 8 and std.math.isPowerOfTwo(int_info.bits)) { switch (partial_mcv) { .register => |reg| { self.eflags_inst = inst; @@ -4444,7 +4767,7 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { const cc = Condition.ne; const tuple_ty = self.typeOfIndex(inst); - if (int_info.bits >= 8 and math.isPowerOfTwo(int_info.bits)) { + if (int_info.bits >= 8 and std.math.isPowerOfTwo(int_info.bits)) { switch (partial_mcv) { .register => |reg| { self.eflags_inst = inst; @@ -4576,7 +4899,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { if (dst_info.bits > 128 and dst_info.signedness == .unsigned) { const slow_inc = self.hasFeature(.slow_incdec); const abi_size: u32 = @intCast(dst_ty.abiSize(zcu)); - const limb_len = math.divCeil(u32, abi_size, 8) catch unreachable; + const limb_len = std.math.divCeil(u32, abi_size, 8) catch unreachable; try self.spillRegisters(&.{ .rax, .rcx, .rdx }); const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rax, .rcx, .rdx }); @@ -4618,7 +4941,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegisterRegister(.{ ._, .xor }, .edx, .edx); const inner_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); - try self.asmRegisterImmediate(.{ ._r, .sh }, .cl, Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._r, .sh }, .cl, .u(1)); try self.asmMemoryRegister(.{ ._, .adc }, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .mod = .{ .rm = .{ @@ -4642,7 +4965,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { }); try self.asmRegister(.{ ._, .mul }, temp_regs[1].to64()); - try self.asmRegisterImmediate(.{ ._r, .sh }, .ch, Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._r, .sh }, .ch, .u(1)); try self.asmMemoryRegister(.{ ._, .adc }, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .mod = .{ .rm = .{ @@ -4656,30 +4979,22 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { try self.asmSetccRegister(.c, .ch); if (slow_inc) { - try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), Immediate.u(1)); - try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[3].to32(), Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), .u(1)); + try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[3].to32(), .u(1)); } else { try self.asmRegister(.{ ._, .inc }, temp_regs[2].to32()); try self.asmRegister(.{ ._, .inc }, temp_regs[3].to32()); } - try self.asmRegisterImmediate( - .{ ._, .cmp }, - temp_regs[3].to32(), - Immediate.u(limb_len), - ); + try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[3].to32(), .u(limb_len)); _ = try self.asmJccReloc(.b, inner_loop); try self.asmRegisterRegister(.{ ._, .@"or" }, .rdx, .rcx); const overflow = try self.asmJccReloc(.nz, undefined); const overflow_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); - try self.asmRegisterImmediate( - .{ ._, .cmp }, - temp_regs[2].to32(), - Immediate.u(limb_len), - ); + try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[2].to32(), .u(limb_len)); const no_overflow = try self.asmJccReloc(.nb, undefined); if (slow_inc) { - try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), .u(1)); } else { try self.asmRegister(.{ ._, .inc }, temp_regs[2].to32()); } @@ -4691,7 +5006,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { .scale = .@"8", .disp = lhs_mcv.load_frame.off - 8, } }, - }, Immediate.u(0)); + }, .u(0)); _ = try self.asmJccReloc(.z, overflow_loop); self.performReloc(overflow); try self.asmMemoryImmediate(.{ ._, .mov }, .{ @@ -4701,20 +5016,16 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { .disp = dst_mcv.load_frame.off + @as(i32, @intCast(tuple_ty.structFieldOffset(1, zcu))), } }, - }, Immediate.u(1)); + }, .u(1)); self.performReloc(no_overflow); self.performReloc(skip_inner); if (slow_inc) { - try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1)); } else { try self.asmRegister(.{ ._, .inc }, temp_regs[0].to32()); } - try self.asmRegisterImmediate( - .{ ._, .cmp }, - temp_regs[0].to32(), - Immediate.u(limb_len), - ); + try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[0].to32(), .u(limb_len)); _ = try self.asmJccReloc(.b, outer_loop); break :result dst_mcv; @@ -4750,7 +5061,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { try self.asmMemoryImmediate( .{ ._, .cmp }, try overflow.mem(self, self.memSize(Type.c_int)), - Immediate.s(0), + .s(0), ); try self.genSetMem( .{ .frame = dst_mcv.load_frame.index }, @@ -5038,7 +5349,7 @@ fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCVa try self.asmRegisterImmediate( .{ ._r, .sa }, registerAlias(divisor, abi_size), - Immediate.u(int_info.bits - 1), + .u(int_info.bits - 1), ); try self.asmRegisterRegister( .{ ._, .@"test" }, @@ -5217,8 +5528,8 @@ fn airShlShrBinOp(self: *Self, inst: Air.Inst.Index) !void { defer for (reg_locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock); - const shift_imm = - Immediate.u(@intCast(Value.fromInterned(rhs_elem).toUnsignedInt(zcu))); + const shift_imm: Immediate = + .u(@intCast(Value.fromInterned(rhs_elem).toUnsignedInt(zcu))); if (self.hasFeature(.avx)) try self.asmRegisterRegisterImmediate( mir_tag, registerAlias(dst_reg, abi_size), @@ -5434,7 +5745,7 @@ fn airUnwrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { break :result operand; } - const err_off = errUnionErrorOffset(payload_ty, zcu); + const err_off = codegen.errUnionErrorOffset(payload_ty, zcu); switch (operand) { .register => |reg| { // TODO reuse operand @@ -5492,7 +5803,7 @@ fn airUnwrapErrUnionErrPtr(self: *Self, inst: Air.Inst.Index) !void { const eu_ty = src_ty.childType(zcu); const pl_ty = eu_ty.errorUnionPayload(zcu); const err_ty = eu_ty.errorUnionSet(zcu); - const err_off: i32 = @intCast(errUnionErrorOffset(pl_ty, zcu)); + const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu)); const err_abi_size: u32 = @intCast(err_ty.abiSize(zcu)); try self.asmRegisterMemory( .{ ._, .mov }, @@ -5535,7 +5846,7 @@ fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { const eu_ty = src_ty.childType(zcu); const pl_ty = eu_ty.errorUnionPayload(zcu); const err_ty = eu_ty.errorUnionSet(zcu); - const err_off: i32 = @intCast(errUnionErrorOffset(pl_ty, zcu)); + const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu)); const err_abi_size: u32 = @intCast(err_ty.abiSize(zcu)); try self.asmMemoryImmediate( .{ ._, .mov }, @@ -5546,7 +5857,7 @@ fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { .disp = err_off, } }, }, - Immediate.u(0), + .u(0), ); if (self.liveness.isUnused(inst)) break :result .unreach; @@ -5559,7 +5870,7 @@ fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - const pl_off: i32 = @intCast(errUnionPayloadOffset(pl_ty, zcu)); + const pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(pl_ty, zcu)); const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu)); try self.asmRegisterMemory( .{ ._, .lea }, @@ -5587,7 +5898,7 @@ fn genUnwrapErrUnionPayloadMir( const result: MCValue = result: { if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none; - const payload_off: u31 = @intCast(errUnionPayloadOffset(payload_ty, zcu)); + const payload_off: u31 = @intCast(codegen.errUnionPayloadOffset(payload_ty, zcu)); switch (err_union) { .load_frame => |frame_addr| break :result .{ .load_frame = .{ .index = frame_addr.index, @@ -5636,7 +5947,7 @@ fn genUnwrapErrUnionPayloadPtrMir( const payload_ty = err_union_ty.errorUnionPayload(zcu); const result: MCValue = result: { - const payload_off = errUnionPayloadOffset(payload_ty, zcu); + const payload_off = codegen.errUnionPayloadOffset(payload_ty, zcu); const result_mcv: MCValue = if (maybe_inst) |inst| try self.copyToRegisterWithInstTracking(inst, ptr_ty, ptr_mcv) else @@ -5696,7 +6007,7 @@ fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegisterImmediate( .{ ._s, .bt }, opt_reg, - Immediate.u(@as(u6, @intCast(pl_abi_size * 8))), + .u(@as(u6, @intCast(pl_abi_size * 8))), ); }, @@ -5709,7 +6020,7 @@ fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void { .disp = frame_addr.off + pl_abi_size, } }, }, - Immediate.u(1), + .u(1), ), } } @@ -5733,8 +6044,8 @@ fn airWrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void { if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .{ .immediate = 0 }; const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(eu_ty, zcu)); - const pl_off: i32 = @intCast(errUnionPayloadOffset(pl_ty, zcu)); - const err_off: i32 = @intCast(errUnionErrorOffset(pl_ty, zcu)); + const pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(pl_ty, zcu)); + const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu)); try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, operand, .{}); try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, .{ .immediate = 0 }, .{}); break :result .{ .load_frame = .{ .index = frame_index } }; @@ -5756,8 +6067,8 @@ fn airWrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result try self.resolveInst(ty_op.operand); const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(eu_ty, zcu)); - const pl_off: i32 = @intCast(errUnionPayloadOffset(pl_ty, zcu)); - const err_off: i32 = @intCast(errUnionErrorOffset(pl_ty, zcu)); + const pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(pl_ty, zcu)); + const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu)); try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, .undef, .{}); const operand = try self.resolveInst(ty_op.operand); try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, operand, .{}); @@ -5770,11 +6081,20 @@ fn airSlicePtr(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; const result = result: { const src_mcv = try self.resolveInst(ty_op.operand); - if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv; + const ptr_mcv: MCValue = switch (src_mcv) { + .register_pair => |regs| .{ .register = regs[0] }, + else => src_mcv, + }; + if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) { + switch (src_mcv) { + .register_pair => |regs| try self.freeValue(.{ .register = regs[1] }), + else => {}, + } + break :result ptr_mcv; + } const dst_mcv = try self.allocRegOrMem(inst, true); - const dst_ty = self.typeOfIndex(inst); - try self.genCopy(dst_ty, dst_mcv, src_mcv, .{}); + try self.genCopy(self.typeOfIndex(inst), dst_mcv, ptr_mcv, .{}); break :result dst_mcv; }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); @@ -5782,23 +6102,28 @@ fn airSlicePtr(self: *Self, inst: Air.Inst.Index) !void { fn airSliceLen(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - - const result: MCValue = result: { + const result = result: { const src_mcv = try self.resolveInst(ty_op.operand); - switch (src_mcv) { - .load_frame => |frame_addr| { - const len_mcv: MCValue = .{ .load_frame = .{ - .index = frame_addr.index, - .off = frame_addr.off + 8, - } }; - if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result len_mcv; - - const dst_mcv = try self.allocRegOrMem(inst, true); - try self.genCopy(Type.usize, dst_mcv, len_mcv, .{}); - break :result dst_mcv; - }, + const len_mcv: MCValue = switch (src_mcv) { + .register_pair => |regs| .{ .register = regs[1] }, + .load_frame => |frame_addr| .{ .load_frame = .{ + .index = frame_addr.index, + .off = frame_addr.off + 8, + } }, else => return self.fail("TODO implement slice_len for {}", .{src_mcv}), + }; + if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) { + switch (src_mcv) { + .register_pair => |regs| try self.freeValue(.{ .register = regs[0] }), + .load_frame => {}, + else => unreachable, + } + break :result len_mcv; } + + const dst_mcv = try self.allocRegOrMem(inst, true); + try self.genCopy(self.typeOfIndex(inst), dst_mcv, len_mcv, .{}); + break :result dst_mcv; }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } @@ -6296,27 +6621,27 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { const src_bits: u31 = @intCast(src_ty.bitSize(zcu)); const has_lzcnt = self.hasFeature(.lzcnt); if (src_bits > @as(u32, if (has_lzcnt) 128 else 64)) { - const limbs_len = math.divCeil(u32, abi_size, 8) catch unreachable; + const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable; const extra_bits = abi_size * 8 - src_bits; const index_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); const index_lock = self.register_manager.lockRegAssumeUnused(index_reg); defer self.register_manager.unlockReg(index_lock); - try self.asmRegisterImmediate(.{ ._, .mov }, index_reg.to32(), Immediate.u(limbs_len)); + try self.asmRegisterImmediate(.{ ._, .mov }, index_reg.to32(), .u(limbs_len)); switch (extra_bits) { 1 => try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32()), else => try self.asmRegisterImmediate( .{ ._, .mov }, dst_reg.to32(), - Immediate.s(@as(i32, extra_bits) - 1), + .s(@as(i32, extra_bits) - 1), ), } const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); try self.asmRegisterRegister(.{ ._, .@"test" }, index_reg.to32(), index_reg.to32()); const zero = try self.asmJccReloc(.z, undefined); if (self.hasFeature(.slow_incdec)) { - try self.asmRegisterImmediate(.{ ._, .sub }, index_reg.to32(), Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._, .sub }, index_reg.to32(), .u(1)); } else { try self.asmRegister(.{ ._, .dec }, index_reg.to32()); } @@ -6328,7 +6653,7 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { .scale = .@"8", .disp = src_mcv.load_frame.off, } }, - }, Immediate.u(0)); + }, .u(0)); _ = try self.asmJccReloc(.e, loop); try self.asmRegisterMemory(.{ ._, .bsr }, dst_reg.to64(), .{ .base = .{ .frame = src_mcv.load_frame.index }, @@ -6340,9 +6665,9 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { } }, }); self.performReloc(zero); - try self.asmRegisterImmediate(.{ ._l, .sh }, index_reg.to32(), Immediate.u(6)); + try self.asmRegisterImmediate(.{ ._l, .sh }, index_reg.to32(), .u(6)); try self.asmRegisterRegister(.{ ._, .add }, index_reg.to32(), dst_reg.to32()); - try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to32(), Immediate.u(src_bits - 1)); + try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to32(), .u(src_bits - 1)); try self.asmRegisterRegister(.{ ._, .sub }, dst_reg.to32(), index_reg.to32()); break :result dst_mcv; } @@ -6404,7 +6729,7 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { assert(src_bits <= 64); const cmov_abi_size = @max(@as(u32, @intCast(dst_ty.abiSize(zcu))), 2); - if (math.isPowerOfTwo(src_bits)) { + if (std.math.isPowerOfTwo(src_bits)) { const imm_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits ^ (src_bits - 1), }); @@ -6429,7 +6754,7 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { try self.genBinOpMir(.{ ._, .xor }, dst_ty, dst_mcv, .{ .immediate = src_bits - 1 }); } else { const imm_reg = try self.copyToTmpRegister(dst_ty, .{ - .immediate = @as(u64, math.maxInt(u64)) >> @intCast(64 - self.regBitSize(dst_ty)), + .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - self.regBitSize(dst_ty)), }); const imm_lock = self.register_manager.lockRegAssumeUnused(imm_reg); defer self.register_manager.unlockReg(imm_lock); @@ -6493,30 +6818,30 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { const src_bits: u31 = @intCast(src_ty.bitSize(zcu)); const has_bmi = self.hasFeature(.bmi); if (src_bits > @as(u32, if (has_bmi) 128 else 64)) { - const limbs_len = math.divCeil(u32, abi_size, 8) catch unreachable; + const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable; const extra_bits = abi_size * 8 - src_bits; const index_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); const index_lock = self.register_manager.lockRegAssumeUnused(index_reg); defer self.register_manager.unlockReg(index_lock); - try self.asmRegisterImmediate(.{ ._, .mov }, index_reg.to32(), Immediate.s(-1)); + try self.asmRegisterImmediate(.{ ._, .mov }, index_reg.to32(), .s(-1)); switch (extra_bits) { 0 => try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32()), 1 => try self.asmRegisterRegister(.{ ._, .mov }, dst_reg.to32(), dst_reg.to32()), else => try self.asmRegisterImmediate( .{ ._, .mov }, dst_reg.to32(), - Immediate.s(-@as(i32, extra_bits)), + .s(-@as(i32, extra_bits)), ), } const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); if (self.hasFeature(.slow_incdec)) { - try self.asmRegisterImmediate(.{ ._, .add }, index_reg.to32(), Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._, .add }, index_reg.to32(), .u(1)); } else { try self.asmRegister(.{ ._, .inc }, index_reg.to32()); } - try self.asmRegisterImmediate(.{ ._, .cmp }, index_reg.to32(), Immediate.u(limbs_len)); + try self.asmRegisterImmediate(.{ ._, .cmp }, index_reg.to32(), .u(limbs_len)); const zero = try self.asmJccReloc(.nb, undefined); try self.asmMemoryImmediate(.{ ._, .cmp }, .{ .base = .{ .frame = src_mcv.load_frame.index }, @@ -6526,7 +6851,7 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { .scale = .@"8", .disp = src_mcv.load_frame.off, } }, - }, Immediate.u(0)); + }, .u(0)); _ = try self.asmJccReloc(.e, loop); try self.asmRegisterMemory(.{ ._, .bsf }, dst_reg.to64(), .{ .base = .{ .frame = src_mcv.load_frame.index }, @@ -6538,7 +6863,7 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { } }, }); self.performReloc(zero); - try self.asmRegisterImmediate(.{ ._l, .sh }, index_reg.to32(), Immediate.u(6)); + try self.asmRegisterImmediate(.{ ._l, .sh }, index_reg.to32(), .u(6)); try self.asmRegisterRegister(.{ ._, .add }, dst_reg.to32(), index_reg.to32()); break :result dst_mcv; } @@ -6558,7 +6883,7 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { .{ ._, .@"or" }, wide_ty, tmp_mcv, - .{ .immediate = (@as(u64, math.maxInt(u64)) >> @intCast(64 - extra_bits)) << + .{ .immediate = (@as(u64, std.math.maxInt(u64)) >> @intCast(64 - extra_bits)) << @intCast(src_bits) }, ); break :masked tmp_mcv; @@ -6585,7 +6910,7 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { .{ ._, .@"or" }, Type.u64, dst_mcv, - .{ .immediate = @as(u64, math.maxInt(u64)) << @intCast(src_bits - 64) }, + .{ .immediate = @as(u64, std.math.maxInt(u64)) << @intCast(src_bits - 64) }, ); break :masked dst_mcv; } else hi_mat_src_mcv; @@ -6602,7 +6927,7 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { const width_lock = self.register_manager.lockRegAssumeUnused(width_reg); defer self.register_manager.unlockReg(width_lock); - if (src_bits <= 8 or !math.isPowerOfTwo(src_bits)) { + if (src_bits <= 8 or !std.math.isPowerOfTwo(src_bits)) { const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv); const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg); defer self.register_manager.unlockReg(wide_lock); @@ -6701,11 +7026,11 @@ fn genPopCount( }, ); - const mask = @as(u64, math.maxInt(u64)) >> @intCast(64 - src_abi_size * 8); - const imm_0_1 = Immediate.u(mask / 0b1_1); - const imm_00_11 = Immediate.u(mask / 0b01_01); - const imm_0000_1111 = Immediate.u(mask / 0b0001_0001); - const imm_0000_0001 = Immediate.u(mask / 0b1111_1111); + const mask = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - src_abi_size * 8); + const imm_0_1: Immediate = .u(mask / 0b1_1); + const imm_00_11: Immediate = .u(mask / 0b01_01); + const imm_0000_1111: Immediate = .u(mask / 0b0001_0001); + const imm_0000_0001: Immediate = .u(mask / 0b1111_1111); const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); @@ -6722,7 +7047,7 @@ fn genPopCount( // dst = operand try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); // tmp = operand - try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, .u(1)); // tmp = operand >> 1 if (src_abi_size > 4) { try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1); @@ -6733,7 +7058,7 @@ fn genPopCount( // dst = temp1 = operand - ((operand >> 1) & 0x55...55) try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); // tmp = temp1 - try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u(2)); + try self.asmRegisterImmediate(.{ ._r, .sh }, dst, .u(2)); // dst = temp1 >> 2 if (src_abi_size > 4) { try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11); @@ -6749,7 +7074,7 @@ fn genPopCount( // tmp = temp2 = (temp1 & 0x33...33) + ((temp1 >> 2) & 0x33...33) try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp); // dst = temp2 - try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, Immediate.u(4)); + try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, .u(4)); // tmp = temp2 >> 4 try self.asmRegisterRegister(.{ ._, .add }, dst, tmp); // dst = temp2 + (temp2 >> 4) @@ -6767,7 +7092,7 @@ fn genPopCount( // dst = temp3 = (temp2 + (temp2 >> 4)) & 0x0f...0f // dst = temp3 * 0x01...01 if (src_abi_size > 1) { - try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u((src_abi_size - 1) * 8)); + try self.asmRegisterImmediate(.{ ._r, .sh }, dst, .u((src_abi_size - 1) * 8)); } // dst = (temp3 * 0x01...01) >> (bits - 8) } @@ -6847,7 +7172,7 @@ fn genByteSwap( return .{ .register_pair = .{ dst_regs[1], dst_regs[0] } }; }, else => { - const limbs_len = math.divCeil(u32, abi_size, 8) catch unreachable; + const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable; const temp_regs = try self.register_manager.allocRegs(4, .{null} ** 4, abi.RegisterClass.gp); @@ -6856,11 +7181,7 @@ fn genByteSwap( const dst_mcv = try self.allocRegOrMem(inst, false); try self.asmRegisterRegister(.{ ._, .xor }, temp_regs[0].to32(), temp_regs[0].to32()); - try self.asmRegisterImmediate( - .{ ._, .mov }, - temp_regs[1].to32(), - Immediate.u(limbs_len - 1), - ); + try self.asmRegisterImmediate(.{ ._, .mov }, temp_regs[1].to32(), .u(limbs_len - 1)); const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); try self.asmRegisterMemory( @@ -6912,8 +7233,8 @@ fn genByteSwap( } }, }, temp_regs[2].to64()); if (self.hasFeature(.slow_incdec)) { - try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), Immediate.u(1)); - try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[1].to32(), Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1)); + try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[1].to32(), .u(1)); } else { try self.asmRegister(.{ ._, .inc }, temp_regs[0].to32()); try self.asmRegister(.{ ._, .dec }, temp_regs[1].to32()); @@ -6994,10 +7315,10 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { else undefined; - const mask = @as(u64, math.maxInt(u64)) >> @intCast(64 - limb_abi_size * 8); - const imm_0000_1111 = Immediate.u(mask / 0b0001_0001); - const imm_00_11 = Immediate.u(mask / 0b01_01); - const imm_0_1 = Immediate.u(mask / 0b1_1); + const mask = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - limb_abi_size * 8); + const imm_0000_1111: Immediate = .u(mask / 0b0001_0001); + const imm_00_11: Immediate = .u(mask / 0b01_01); + const imm_0_1: Immediate = .u(mask / 0b1_1); for (dst_mcv.getRegs()) |dst_reg| { const dst = registerAlias(dst_reg, limb_abi_size); @@ -7005,7 +7326,7 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { // dst = temp1 = bswap(operand) try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); // tmp = temp1 - try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u(4)); + try self.asmRegisterImmediate(.{ ._r, .sh }, dst, .u(4)); // dst = temp1 >> 4 if (limb_abi_size > 4) { try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0000_1111); @@ -7017,13 +7338,13 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { } // tmp = temp1 & 0x0F...0F // dst = (temp1 >> 4) & 0x0F...0F - try self.asmRegisterImmediate(.{ ._l, .sh }, tmp, Immediate.u(4)); + try self.asmRegisterImmediate(.{ ._l, .sh }, tmp, .u(4)); // tmp = (temp1 & 0x0F...0F) << 4 try self.asmRegisterRegister(.{ ._, .@"or" }, dst, tmp); // dst = temp2 = ((temp1 >> 4) & 0x0F...0F) | ((temp1 & 0x0F...0F) << 4) try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); // tmp = temp2 - try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u(2)); + try self.asmRegisterImmediate(.{ ._r, .sh }, dst, .u(2)); // dst = temp2 >> 2 if (limb_abi_size > 4) { try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11); @@ -7050,7 +7371,7 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { // tmp = temp3 = ((temp2 >> 2) & 0x33...33) + ((temp2 & 0x33...33) << 2) try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp); // dst = temp3 - try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, .u(1)); // tmp = temp3 >> 1 if (limb_abi_size > 4) { try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1); @@ -7337,7 +7658,7 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: Ro dst_alias, dst_alias, try src_mcv.mem(self, Memory.Size.fromSize(abi_size)), - Immediate.u(@as(u5, @bitCast(mode))), + .u(@as(u5, @bitCast(mode))), ) else try self.asmRegisterRegisterRegisterImmediate( mir_tag, dst_alias, @@ -7346,13 +7667,13 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: Ro src_mcv.getReg().? else try self.copyToTmpRegister(ty, src_mcv), abi_size), - Immediate.u(@as(u5, @bitCast(mode))), + .u(@as(u5, @bitCast(mode))), ), else => if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( mir_tag, dst_alias, try src_mcv.mem(self, Memory.Size.fromSize(abi_size)), - Immediate.u(@as(u5, @bitCast(mode))), + .u(@as(u5, @bitCast(mode))), ) else try self.asmRegisterRegisterImmediate( mir_tag, dst_alias, @@ -7360,7 +7681,7 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: Ro src_mcv.getReg().? else try self.copyToTmpRegister(ty, src_mcv), abi_size), - Immediate.u(@as(u5, @bitCast(mode))), + .u(@as(u5, @bitCast(mode))), ), } } @@ -7433,7 +7754,7 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void { defer self.register_manager.unlockReg(tmp_lock); try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_regs[1]); - try self.asmRegisterImmediate(.{ ._r, .sa }, tmp_reg, Immediate.u(63)); + try self.asmRegisterImmediate(.{ ._r, .sa }, tmp_reg, .u(63)); try self.asmRegisterRegister(.{ ._, .xor }, dst_regs[0], tmp_reg); try self.asmRegisterRegister(.{ ._, .xor }, dst_regs[1], tmp_reg); try self.asmRegisterRegister(.{ ._, .sub }, dst_regs[0], tmp_reg); @@ -7443,7 +7764,7 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void { }, else => { const abi_size: u31 = @intCast(ty.abiSize(zcu)); - const limb_len = math.divCeil(u31, abi_size, 8) catch unreachable; + const limb_len = std.math.divCeil(u31, abi_size, 8) catch unreachable; const tmp_regs = try self.register_manager.allocRegs(3, .{null} ** 3, abi.RegisterClass.gp); @@ -7460,7 +7781,7 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void { try self.asmMemoryImmediate( .{ ._, .cmp }, try dst_mcv.address().offset((limb_len - 1) * 8).deref().mem(self, .qword), - Immediate.u(0), + .u(0), ); const positive = try self.asmJccReloc(.ns, undefined); @@ -7469,7 +7790,7 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void { const neg_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); try self.asmRegisterRegister(.{ ._, .xor }, tmp_regs[2].to32(), tmp_regs[2].to32()); - try self.asmRegisterImmediate(.{ ._r, .sh }, tmp_regs[1].to8(), Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._r, .sh }, tmp_regs[1].to8(), .u(1)); try self.asmRegisterMemory(.{ ._, .sbb }, tmp_regs[2].to64(), .{ .base = .{ .frame = dst_mcv.load_frame.index }, .mod = .{ .rm = .{ @@ -7491,11 +7812,11 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void { }, tmp_regs[2].to64()); if (self.hasFeature(.slow_incdec)) { - try self.asmRegisterImmediate(.{ ._, .add }, tmp_regs[0].to32(), Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._, .add }, tmp_regs[0].to32(), .u(1)); } else { try self.asmRegister(.{ ._, .inc }, tmp_regs[0].to32()); } - try self.asmRegisterImmediate(.{ ._, .cmp }, tmp_regs[0].to32(), Immediate.u(limb_len)); + try self.asmRegisterImmediate(.{ ._, .cmp }, tmp_regs[0].to32(), .u(limb_len)); _ = try self.asmJccReloc(.b, neg_loop); self.performReloc(positive); @@ -7620,7 +7941,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { .{ .v_, .cvtps2ph }, dst_reg, dst_reg, - Immediate.u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), + .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), ); break :result dst_mcv; }, @@ -7650,7 +7971,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { .{ .v_, .cvtps2ph }, dst_reg, dst_reg, - Immediate.u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), + .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), ); break :result dst_mcv; }, @@ -7675,7 +7996,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { .{ .v_, .cvtps2ph }, dst_reg, wide_reg, - Immediate.u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), + .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), ); break :result dst_mcv; }, @@ -7699,9 +8020,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { else => unreachable, }, else => unreachable, - }) orelse return self.fail("TODO implement airSqrt for {}", .{ - ty.fmt(pt), - }); + }) orelse return self.fail("TODO implement airSqrt for {}", .{ty.fmt(pt)}); switch (mir_tag[0]) { .v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( mir_tag, @@ -7805,7 +8124,7 @@ fn reuseOperandAdvanced( } // Prevent the operand deaths processing code from deallocating it. - self.liveness.clearOperandDeath(inst, op_index); + self.reused_operands.set(op_index); const op_inst = operand.toIndex().?; self.getResolvedInstValue(op_inst).reuse(self, maybe_tracked_inst, op_inst); @@ -7890,7 +8209,7 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) Inn } }, }); try self.spillEflagsIfOccupied(); - try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, Immediate.u(val_bit_off)); + try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, .u(val_bit_off)); } else { const tmp_reg = registerAlias(try self.register_manager.allocReg(null, abi.RegisterClass.gp), val_abi_size); @@ -7913,12 +8232,7 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) Inn } }, }); try self.spillEflagsIfOccupied(); - try self.asmRegisterRegisterImmediate( - .{ ._rd, .sh }, - dst_alias, - tmp_reg, - Immediate.u(val_bit_off), - ); + try self.asmRegisterRegisterImmediate(.{ ._rd, .sh }, dst_alias, tmp_reg, .u(val_bit_off)); } if (val_extra_bits > 0) try self.truncateRegister(val_ty, dst_reg); @@ -8064,16 +8378,16 @@ fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) In } }, }; - const part_mask = (@as(u64, math.maxInt(u64)) >> @intCast(64 - part_bit_size)) << + const part_mask = (@as(u64, std.math.maxInt(u64)) >> @intCast(64 - part_bit_size)) << @intCast(part_bit_off); - const part_mask_not = part_mask ^ (@as(u64, math.maxInt(u64)) >> @intCast(64 - limb_abi_bits)); + const part_mask_not = part_mask ^ (@as(u64, std.math.maxInt(u64)) >> @intCast(64 - limb_abi_bits)); if (limb_abi_size <= 4) { - try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, Immediate.u(part_mask_not)); - } else if (math.cast(i32, @as(i64, @bitCast(part_mask_not)))) |small| { - try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, Immediate.s(small)); + try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, .u(part_mask_not)); + } else if (std.math.cast(i32, @as(i64, @bitCast(part_mask_not)))) |small| { + try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, .s(small)); } else { const part_mask_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - try self.asmRegisterImmediate(.{ ._, .mov }, part_mask_reg, Immediate.u(part_mask_not)); + try self.asmRegisterImmediate(.{ ._, .mov }, part_mask_reg, .u(part_mask_not)); try self.asmMemoryRegister(.{ ._, .@"and" }, limb_mem, part_mask_reg); } @@ -8209,25 +8523,14 @@ fn airStructFieldPtr(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ extra.struct_operand, .none, .none }); } -fn airStructFieldPtrIndex(self: *Self, inst: Air.Inst.Index, index: u8) !void { +fn airStructFieldPtrIndex(self: *Self, inst: Air.Inst.Index, field_index: u8) !void { const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const result = try self.fieldPtr(inst, ty_op.operand, index); + const result = try self.fieldPtr(inst, ty_op.operand, field_index); return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn fieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, index: u32) !MCValue { - const pt = self.pt; - const zcu = pt.zcu; +fn fieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, field_index: u32) !MCValue { const ptr_field_ty = self.typeOfIndex(inst); - const ptr_container_ty = self.typeOf(operand); - const container_ty = ptr_container_ty.childType(zcu); - - const field_off: i32 = switch (container_ty.containerLayout(zcu)) { - .auto, .@"extern" => @intCast(container_ty.structFieldOffset(index, zcu)), - .@"packed" => @divExact(@as(i32, ptr_container_ty.ptrInfo(zcu).packed_offset.bit_offset) + - (if (zcu.typeToStruct(container_ty)) |struct_obj| pt.structPackedFieldBitOffset(struct_obj, index) else 0) - - ptr_field_ty.ptrInfo(zcu).packed_offset.bit_offset, 8), - }; const src_mcv = try self.resolveInst(operand); const dst_mcv = if (switch (src_mcv) { @@ -8235,7 +8538,19 @@ fn fieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, index: u32 .register, .register_offset => self.reuseOperand(inst, operand, 0, src_mcv), else => false, }) src_mcv else try self.copyToRegisterWithInstTracking(inst, ptr_field_ty, src_mcv); - return dst_mcv.offset(field_off); + return dst_mcv.offset(self.fieldOffset(self.typeOf(operand), ptr_field_ty, field_index)); +} + +fn fieldOffset(self: *Self, ptr_agg_ty: Type, ptr_field_ty: Type, field_index: u32) i32 { + const pt = self.pt; + const zcu = pt.zcu; + const agg_ty = ptr_agg_ty.childType(zcu); + return switch (agg_ty.containerLayout(zcu)) { + .auto, .@"extern" => @intCast(agg_ty.structFieldOffset(field_index, zcu)), + .@"packed" => @divExact(@as(i32, ptr_agg_ty.ptrInfo(zcu).packed_offset.bit_offset) + + (if (zcu.typeToStruct(agg_ty)) |struct_obj| pt.structPackedFieldBitOffset(struct_obj, field_index) else 0) - + ptr_field_ty.ptrInfo(zcu).packed_offset.bit_offset, 8), + }; } fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { @@ -8476,7 +8791,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { } }, }); try self.spillEflagsIfOccupied(); - try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, Immediate.u(field_bit_off)); + try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, .u(field_bit_off)); } else { const tmp_reg = registerAlias( try self.register_manager.allocReg(null, abi.RegisterClass.gp), @@ -8509,7 +8824,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { .{ ._rd, .sh }, dst_alias, tmp_reg, - Immediate.u(field_bit_off), + .u(field_bit_off), ); } @@ -8528,27 +8843,17 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { } fn airFieldParentPtr(self: *Self, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; const extra = self.air.extraData(Air.FieldParentPtr, ty_pl.payload).data; - const inst_ty = self.typeOfIndex(inst); - const parent_ty = inst_ty.childType(zcu); - const field_off: i32 = switch (parent_ty.containerLayout(zcu)) { - .auto, .@"extern" => @intCast(parent_ty.structFieldOffset(extra.field_index, zcu)), - .@"packed" => @divExact(@as(i32, inst_ty.ptrInfo(zcu).packed_offset.bit_offset) + - (if (zcu.typeToStruct(parent_ty)) |struct_obj| pt.structPackedFieldBitOffset(struct_obj, extra.field_index) else 0) - - self.typeOf(extra.field_ptr).ptrInfo(zcu).packed_offset.bit_offset, 8), - }; - + const ptr_agg_ty = self.typeOfIndex(inst); const src_mcv = try self.resolveInst(extra.field_ptr); const dst_mcv = if (src_mcv.isRegisterOffset() and self.reuseOperand(inst, extra.field_ptr, 0, src_mcv)) src_mcv else - try self.copyToRegisterWithInstTracking(inst, inst_ty, src_mcv); - const result = dst_mcv.offset(-field_off); + try self.copyToRegisterWithInstTracking(inst, ptr_agg_ty, src_mcv); + const result = dst_mcv.offset(-self.fieldOffset(ptr_agg_ty, self.typeOf(extra.field_ptr), extra.field_index)); return self.finishAir(inst, result, .{ extra.field_ptr, .none, .none }); } @@ -8613,7 +8918,7 @@ fn genUnOp(self: *Self, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: }; if (int_info.signedness == .unsigned and self.regExtraBits(limb_ty) > 0) { - const mask = @as(u64, math.maxInt(u64)) >> @intCast(64 - limb_bits); + const mask = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - limb_bits); try self.genBinOpMir(.{ ._, .xor }, limb_ty, limb_mcv, .{ .immediate = mask }); } else try self.genUnOpMir(.{ ._, .not }, limb_ty, limb_mcv); } @@ -8698,7 +9003,7 @@ fn genShiftBinOpMir( try self.spillEflagsIfOccupied(); if (abi_size > 16) { - const limbs_len = math.divCeil(u32, abi_size, 8) catch unreachable; + const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable; assert(shift_abi_size >= 1 and shift_abi_size <= 2); const rcx_lock: ?RegisterLock = switch (rhs_mcv) { @@ -8725,12 +9030,12 @@ fn genShiftBinOpMir( switch (tag[0]) { ._l => { - try self.asmRegisterImmediate(.{ ._, .mov }, temp_regs[1].to32(), Immediate.u(limbs_len - 1)); + try self.asmRegisterImmediate(.{ ._, .mov }, temp_regs[1].to32(), .u(limbs_len - 1)); switch (rhs_mcv) { .immediate => |shift_imm| try self.asmRegisterImmediate( .{ ._, .mov }, temp_regs[0].to32(), - Immediate.u(limbs_len - (shift_imm >> 6) - 1), + .u(limbs_len - (shift_imm >> 6) - 1), ), else => { try self.asmRegisterRegister( @@ -8738,16 +9043,8 @@ fn genShiftBinOpMir( temp_regs[2].to32(), registerAlias(.rcx, shift_abi_size), ); - try self.asmRegisterImmediate( - .{ ._, .@"and" }, - .cl, - Immediate.u(math.maxInt(u6)), - ); - try self.asmRegisterImmediate( - .{ ._r, .sh }, - temp_regs[2].to32(), - Immediate.u(6), - ); + try self.asmRegisterImmediate(.{ ._, .@"and" }, .cl, .u(std.math.maxInt(u6))); + try self.asmRegisterImmediate(.{ ._r, .sh }, temp_regs[2].to32(), .u(6)); try self.asmRegisterRegister( .{ ._, .mov }, temp_regs[0].to32(), @@ -8767,7 +9064,7 @@ fn genShiftBinOpMir( .immediate => |shift_imm| try self.asmRegisterImmediate( .{ ._, .mov }, temp_regs[0].to32(), - Immediate.u(shift_imm >> 6), + .u(shift_imm >> 6), ), else => { try self.asmRegisterRegister( @@ -8775,16 +9072,8 @@ fn genShiftBinOpMir( temp_regs[0].to32(), registerAlias(.rcx, shift_abi_size), ); - try self.asmRegisterImmediate( - .{ ._, .@"and" }, - .cl, - Immediate.u(math.maxInt(u6)), - ); - try self.asmRegisterImmediate( - .{ ._r, .sh }, - temp_regs[0].to32(), - Immediate.u(6), - ); + try self.asmRegisterImmediate(.{ ._, .@"and" }, .cl, .u(std.math.maxInt(u6))); + try self.asmRegisterImmediate(.{ ._r, .sh }, temp_regs[0].to32(), .u(6)); }, } }, @@ -8813,7 +9102,7 @@ fn genShiftBinOpMir( try self.asmRegisterImmediate( .{ ._, .cmp }, temp_regs[0].to32(), - Immediate.u(limbs_len - 1), + .u(limbs_len - 1), ); break :skip try self.asmJccReloc(.nb, undefined); }, @@ -8843,7 +9132,7 @@ fn genShiftBinOpMir( }, .sh }, temp_regs[2].to64(), temp_regs[3].to64(), - Immediate.u(shift_imm & math.maxInt(u6)), + .u(shift_imm & std.math.maxInt(u6)), ), else => try self.asmRegisterRegisterRegister(.{ switch (tag[0]) { ._l => ._ld, @@ -8864,8 +9153,8 @@ fn genShiftBinOpMir( switch (tag[0]) { ._l => { if (slow_inc_dec) { - try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[1].to32(), Immediate.u(1)); - try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[0].to32(), Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[1].to32(), .u(1)); + try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[0].to32(), .u(1)); } else { try self.asmRegister(.{ ._, .dec }, temp_regs[1].to32()); try self.asmRegister(.{ ._, .dec }, temp_regs[0].to32()); @@ -8874,8 +9163,8 @@ fn genShiftBinOpMir( }, ._r => { if (slow_inc_dec) { - try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[1].to32(), Immediate.u(1)); - try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[1].to32(), .u(1)); + try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1)); } else { try self.asmRegister(.{ ._, .inc }, temp_regs[1].to32()); try self.asmRegister(.{ ._, .inc }, temp_regs[0].to32()); @@ -8883,7 +9172,7 @@ fn genShiftBinOpMir( try self.asmRegisterImmediate( .{ ._, .cmp }, temp_regs[0].to32(), - Immediate.u(limbs_len - 1), + .u(limbs_len - 1), ); _ = try self.asmJccReloc(.b, loop); }, @@ -8898,7 +9187,7 @@ fn genShiftBinOpMir( .immediate => |shift_imm| try self.asmRegisterImmediate( tag, temp_regs[2].to64(), - Immediate.u(shift_imm & math.maxInt(u6)), + .u(shift_imm & std.math.maxInt(u6)), ), else => try self.asmRegisterRegister(tag, temp_regs[2].to64(), .cl), } @@ -8914,7 +9203,7 @@ fn genShiftBinOpMir( if (tag[0] == ._r and tag[1] == .sa) try self.asmRegisterImmediate( tag, temp_regs[2].to64(), - Immediate.u(63), + .u(63), ); if (switch (rhs_mcv) { .immediate => |shift_imm| shift_imm >> 6 > 0, @@ -8935,7 +9224,7 @@ fn genShiftBinOpMir( try self.asmRegisterImmediate( .{ ._, .cmp }, temp_regs[1].to32(), - Immediate.u(limbs_len - 1), + .u(limbs_len - 1), ); break :skip try self.asmJccReloc(.nb, undefined); }, @@ -8945,12 +9234,12 @@ fn genShiftBinOpMir( const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); switch (tag[0]) { ._l => if (slow_inc_dec) { - try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[1].to32(), Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[1].to32(), .u(1)); } else { try self.asmRegister(.{ ._, .dec }, temp_regs[1].to32()); }, ._r => if (slow_inc_dec) { - try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[1].to32(), Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[1].to32(), .u(1)); } else { try self.asmRegister(.{ ._, .inc }, temp_regs[1].to32()); }, @@ -8972,14 +9261,14 @@ fn genShiftBinOpMir( .scale = .@"8", .disp = lhs_mcv.load_frame.off, } }, - }, Immediate.u(0)); + }, .u(0)); switch (tag[0]) { ._l => _ = try self.asmJccReloc(.nz, loop), ._r => { try self.asmRegisterImmediate( .{ ._, .cmp }, temp_regs[1].to32(), - Immediate.u(limbs_len - 1), + .u(limbs_len - 1), ); _ = try self.asmJccReloc(.b, loop); }, @@ -9021,12 +9310,12 @@ fn genShiftBinOpMir( info.double_tag, lhs_regs[info.indices[1]], lhs_regs[info.indices[0]], - Immediate.u(shift_imm), + .u(shift_imm), ); try self.asmRegisterImmediate( tag, lhs_regs[info.indices[0]], - Immediate.u(shift_imm), + .u(shift_imm), ); return; } else { @@ -9039,7 +9328,7 @@ fn genShiftBinOpMir( if (tag[0] == ._r and tag[1] == .sa) try self.asmRegisterImmediate( tag, lhs_regs[info.indices[0]], - Immediate.u(63), + .u(63), ) else try self.asmRegisterRegister( .{ ._, .xor }, lhs_regs[info.indices[0]], @@ -9048,7 +9337,7 @@ fn genShiftBinOpMir( if (shift_imm > 64) try self.asmRegisterImmediate( tag, lhs_regs[info.indices[1]], - Immediate.u(shift_imm - 64), + .u(shift_imm - 64), ); return; }, @@ -9059,7 +9348,7 @@ fn genShiftBinOpMir( if (tag[0] == ._r and tag[1] == .sa) { try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, lhs_regs[info.indices[0]]); - try self.asmRegisterImmediate(tag, tmp_reg, Immediate.u(63)); + try self.asmRegisterImmediate(tag, tmp_reg, .u(63)); } else try self.asmRegisterRegister( .{ ._, .xor }, tmp_reg.to32(), @@ -9076,11 +9365,7 @@ fn genShiftBinOpMir( lhs_regs[info.indices[0]], registerAlias(shift_reg, 1), ); - try self.asmRegisterImmediate( - .{ ._, .cmp }, - registerAlias(shift_reg, 1), - Immediate.u(64), - ); + try self.asmRegisterImmediate(.{ ._, .cmp }, registerAlias(shift_reg, 1), .u(64)); try self.asmCmovccRegisterRegister( .ae, lhs_regs[info.indices[1]], @@ -9119,7 +9404,7 @@ fn genShiftBinOpMir( } }, }, tmp_reg, - Immediate.u(shift_imm), + .u(shift_imm), ); try self.asmMemoryImmediate( tag, @@ -9130,7 +9415,7 @@ fn genShiftBinOpMir( .disp = dst_frame_addr.off + info.indices[0] * 8, } }, }, - Immediate.u(shift_imm), + .u(shift_imm), ); return; } else { @@ -9149,7 +9434,7 @@ fn genShiftBinOpMir( if (shift_imm > 64) try self.asmRegisterImmediate( tag, tmp_reg, - Immediate.u(shift_imm - 64), + .u(shift_imm - 64), ); try self.asmMemoryRegister( .{ ._, .mov }, @@ -9171,7 +9456,7 @@ fn genShiftBinOpMir( .disp = dst_frame_addr.off + info.indices[0] * 8, } }, }, - Immediate.u(63), + .u(63), ) else { try self.asmRegisterRegister(.{ ._, .xor }, tmp_reg.to32(), tmp_reg.to32()); try self.asmMemoryRegister( @@ -9223,7 +9508,7 @@ fn genShiftBinOpMir( ); if (tag[0] == ._r and tag[1] == .sa) { try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, first_reg); - try self.asmRegisterImmediate(tag, tmp_reg, Immediate.u(63)); + try self.asmRegisterImmediate(tag, tmp_reg, .u(63)); } else try self.asmRegisterRegister( .{ ._, .xor }, tmp_reg.to32(), @@ -9239,7 +9524,7 @@ fn genShiftBinOpMir( try self.asmRegisterImmediate( .{ ._, .cmp }, registerAlias(shift_reg, 1), - Immediate.u(64), + .u(64), ); try self.asmCmovccRegisterRegister(.ae, second_reg, first_reg); try self.asmCmovccRegisterRegister(.ae, first_reg, tmp_reg); @@ -9277,7 +9562,7 @@ fn genShiftBinOpMir( .immediate => |shift_imm| return self.asmRegisterImmediate( tag, registerAlias(lhs_reg, abi_size), - Immediate.u(shift_imm), + .u(shift_imm), ), .register => |shift_reg| return self.asmRegisterRegister( tag, @@ -9292,7 +9577,7 @@ fn genShiftBinOpMir( .base = .{ .reg = .ds }, .mod = .{ .rm = .{ .size = Memory.Size.fromSize(abi_size), - .disp = math.cast(i32, @as(i64, @bitCast(addr))) orelse + .disp = std.math.cast(i32, @as(i64, @bitCast(addr))) orelse return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{ @tagName(lhs_mcv), @tagName(shift_mcv), @@ -9316,11 +9601,7 @@ fn genShiftBinOpMir( else => unreachable, }; switch (shift_mcv) { - .immediate => |shift_imm| return self.asmMemoryImmediate( - tag, - lhs_mem, - Immediate.u(shift_imm), - ), + .immediate => |shift_imm| return self.asmMemoryImmediate(tag, lhs_mem, .u(shift_imm)), .register => |shift_reg| return self.asmMemoryRegister( tag, lhs_mem, @@ -9495,7 +9776,7 @@ fn genMulDivBinOp( switch (tag) { .mul, .mul_wrap => { const slow_inc = self.hasFeature(.slow_incdec); - const limb_len = math.divCeil(u32, src_abi_size, 8) catch unreachable; + const limb_len = std.math.divCeil(u32, src_abi_size, 8) catch unreachable; try self.spillRegisters(&.{ .rax, .rcx, .rdx }); const reg_locks = self.register_manager.lockRegs(3, .{ .rax, .rcx, .rdx }); @@ -9536,7 +9817,7 @@ fn genMulDivBinOp( try self.asmRegisterRegister(.{ ._, .xor }, .edx, .edx); const inner_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); - try self.asmRegisterImmediate(.{ ._r, .sh }, .cl, Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._r, .sh }, .cl, .u(1)); try self.asmMemoryRegister(.{ ._, .adc }, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .mod = .{ .rm = .{ @@ -9559,7 +9840,7 @@ fn genMulDivBinOp( }); try self.asmRegister(.{ ._, .mul }, temp_regs[1].to64()); - try self.asmRegisterImmediate(.{ ._r, .sh }, .ch, Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._r, .sh }, .ch, .u(1)); try self.asmMemoryRegister(.{ ._, .adc }, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .mod = .{ .rm = .{ @@ -9572,30 +9853,22 @@ fn genMulDivBinOp( try self.asmSetccRegister(.c, .ch); if (slow_inc) { - try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), Immediate.u(1)); - try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[3].to32(), Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), .u(1)); + try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[3].to32(), .u(1)); } else { try self.asmRegister(.{ ._, .inc }, temp_regs[2].to32()); try self.asmRegister(.{ ._, .inc }, temp_regs[3].to32()); } - try self.asmRegisterImmediate( - .{ ._, .cmp }, - temp_regs[3].to32(), - Immediate.u(limb_len), - ); + try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[3].to32(), .u(limb_len)); _ = try self.asmJccReloc(.b, inner_loop); self.performReloc(skip_inner); if (slow_inc) { - try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1)); } else { try self.asmRegister(.{ ._, .inc }, temp_regs[0].to32()); } - try self.asmRegisterImmediate( - .{ ._, .cmp }, - temp_regs[0].to32(), - Immediate.u(limb_len), - ); + try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[0].to32(), .u(limb_len)); _ = try self.asmJccReloc(.b, outer_loop); return dst_mcv; @@ -9911,7 +10184,7 @@ fn genBinOp( dst_reg, dst_reg, try rhs_mcv.mem(self, .word), - Immediate.u(1), + .u(1), ) else try self.asmRegisterRegisterRegister( .{ .vp_, .unpcklwd }, dst_reg, @@ -9969,7 +10242,7 @@ fn genBinOp( .{ .v_, .cvtps2ph }, dst_reg, dst_reg, - Immediate.u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), + .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), ); break :adjusted .{ .register = dst_reg }; }, @@ -10278,7 +10551,7 @@ fn genBinOp( .lea_tlv, .lea_frame, => true, - .memory => |addr| math.cast(i32, @as(i64, @bitCast(addr))) == null, + .memory => |addr| std.math.cast(i32, @as(i64, @bitCast(addr))) == null, else => false, .register_pair, .register_overflow, @@ -10410,7 +10683,7 @@ fn genBinOp( dst_reg, dst_reg, try src_mcv.mem(self, .word), - Immediate.u(1), + .u(1), ) else try self.asmRegisterRegisterRegister( .{ .vp_, .unpcklwd }, dst_reg, @@ -10442,7 +10715,7 @@ fn genBinOp( dst_reg, dst_reg, dst_reg, - Immediate.u(@as(u5, @bitCast(RoundMode{ + .u(@as(u5, @bitCast(RoundMode{ .mode = switch (air_tag) { .div_trunc => .zero, .div_floor => .down, @@ -10457,7 +10730,7 @@ fn genBinOp( .{ .v_, .cvtps2ph }, dst_reg, dst_reg, - Immediate.u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), + .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), ); return dst_mcv; }, @@ -10856,7 +11129,7 @@ fn genBinOp( dst_reg, dst_reg, try src_mcv.mem(self, .word), - Immediate.u(1), + .u(1), ) else try self.asmRegisterRegisterRegister( .{ .vp_, .unpcklwd }, dst_reg, @@ -10886,7 +11159,7 @@ fn genBinOp( .{ .v_, .cvtps2ph }, dst_reg, dst_reg, - Immediate.u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), + .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), ); return dst_mcv; }, @@ -10902,7 +11175,7 @@ fn genBinOp( .{ .vp_d, .insr }, dst_reg, try src_mcv.mem(self, .dword), - Immediate.u(1), + .u(1), ) else try self.asmRegisterRegisterRegister( .{ .v_ps, .unpckl }, dst_reg, @@ -10937,7 +11210,7 @@ fn genBinOp( .{ .v_, .cvtps2ph }, dst_reg, dst_reg, - Immediate.u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), + .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), ); return dst_mcv; }, @@ -10980,7 +11253,7 @@ fn genBinOp( .{ .v_, .cvtps2ph }, dst_reg, dst_reg, - Immediate.u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), + .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), ); return dst_mcv; }, @@ -11023,7 +11296,7 @@ fn genBinOp( .{ .v_, .cvtps2ph }, dst_reg, dst_reg.to256(), - Immediate.u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), + .u(@as(u5, @bitCast(RoundMode{ .mode = .mxcsr }))), ); return dst_mcv; }, @@ -11191,7 +11464,7 @@ fn genBinOp( ); }, .cmp => { - const imm = Immediate.u(switch (air_tag) { + const imm: Immediate = .u(switch (air_tag) { .cmp_eq => 0, .cmp_lt, .cmp_gt => 1, .cmp_lte, .cmp_gte => 2, @@ -11289,7 +11562,7 @@ fn genBinOp( mask_reg, rhs_copy_reg, rhs_copy_reg, - Immediate.u(3), // unord + .u(3), // unord ); try self.asmRegisterRegisterRegisterRegister( @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) { @@ -11356,7 +11629,7 @@ fn genBinOp( }), mask_reg, mask_reg, - Immediate.u(if (has_blend) 3 else 7), // unord, ord + .u(if (has_blend) 3 else 7), // unord, ord ); if (has_blend) try self.asmRegisterRegisterRegister( @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) { @@ -11567,29 +11840,29 @@ fn genBinOpMir( 8 => try self.asmRegisterImmediate( mir_limb_tag, dst_alias, - if (math.cast(i8, @as(i64, @bitCast(imm)))) |small| - Immediate.s(small) + if (std.math.cast(i8, @as(i64, @bitCast(imm)))) |small| + .s(small) else - Immediate.u(@as(u8, @intCast(imm))), + .u(@as(u8, @intCast(imm))), ), 16 => try self.asmRegisterImmediate( mir_limb_tag, dst_alias, - if (math.cast(i16, @as(i64, @bitCast(imm)))) |small| - Immediate.s(small) + if (std.math.cast(i16, @as(i64, @bitCast(imm)))) |small| + .s(small) else - Immediate.u(@as(u16, @intCast(imm))), + .u(@as(u16, @intCast(imm))), ), 32 => try self.asmRegisterImmediate( mir_limb_tag, dst_alias, - if (math.cast(i32, @as(i64, @bitCast(imm)))) |small| - Immediate.s(small) + if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |small| + .s(small) else - Immediate.u(@as(u32, @intCast(imm))), + .u(@as(u32, @intCast(imm))), ), - 64 => if (math.cast(i32, @as(i64, @bitCast(imm)))) |small| - try self.asmRegisterImmediate(mir_limb_tag, dst_alias, Immediate.s(small)) + 64 => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |small| + try self.asmRegisterImmediate(mir_limb_tag, dst_alias, .s(small)) else try self.asmRegisterRegister(mir_limb_tag, dst_alias, registerAlias( try self.copyToTmpRegister(ty, src_mcv), @@ -11619,7 +11892,7 @@ fn genBinOpMir( .base = .{ .reg = .ds }, .mod = .{ .rm = .{ .size = Memory.Size.fromSize(limb_abi_size), - .disp = math.cast(i32, addr + off) orelse break :direct, + .disp = std.math.cast(i32, addr + off) orelse break :direct, } }, }, .indirect => |reg_off| .{ @@ -11731,8 +12004,8 @@ fn genBinOpMir( => null, .memory, .load_symbol, .load_got, .load_direct, .load_tlv => src: { switch (resolved_src_mcv) { - .memory => |addr| if (math.cast(i32, @as(i64, @bitCast(addr))) != null and - math.cast(i32, @as(i64, @bitCast(addr)) + abi_size - limb_abi_size) != null) + .memory => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr))) != null and + std.math.cast(i32, @as(i64, @bitCast(addr)) + abi_size - limb_abi_size) != null) break :src null, .load_symbol, .load_got, .load_direct, .load_tlv => {}, else => unreachable, @@ -11823,33 +12096,29 @@ fn genBinOpMir( 8 => try self.asmMemoryImmediate( mir_limb_tag, dst_limb_mem, - if (math.cast(i8, @as(i64, @bitCast(imm)))) |small| - Immediate.s(small) + if (std.math.cast(i8, @as(i64, @bitCast(imm)))) |small| + .s(small) else - Immediate.u(@as(u8, @intCast(imm))), + .u(@as(u8, @intCast(imm))), ), 16 => try self.asmMemoryImmediate( mir_limb_tag, dst_limb_mem, - if (math.cast(i16, @as(i64, @bitCast(imm)))) |small| - Immediate.s(small) + if (std.math.cast(i16, @as(i64, @bitCast(imm)))) |small| + .s(small) else - Immediate.u(@as(u16, @intCast(imm))), + .u(@as(u16, @intCast(imm))), ), 32 => try self.asmMemoryImmediate( mir_limb_tag, dst_limb_mem, - if (math.cast(i32, @as(i64, @bitCast(imm)))) |small| - Immediate.s(small) + if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |small| + .s(small) else - Immediate.u(@as(u32, @intCast(imm))), + .u(@as(u32, @intCast(imm))), ), - 64 => if (math.cast(i32, @as(i64, @bitCast(imm)))) |small| - try self.asmMemoryImmediate( - mir_limb_tag, - dst_limb_mem, - Immediate.s(small), - ) + 64 => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |small| + try self.asmMemoryImmediate(mir_limb_tag, dst_limb_mem, .s(small)) else try self.asmMemoryRegister( mir_limb_tag, @@ -11973,12 +12242,12 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M registerAlias(src_reg, abi_size), ), .immediate => |imm| { - if (math.cast(i32, imm)) |small| { + if (std.math.cast(i32, imm)) |small| { try self.asmRegisterRegisterImmediate( .{ .i_, .mul }, dst_alias, dst_alias, - Immediate.s(small), + .s(small), ); } else { const src_reg = try self.copyToTmpRegister(dst_ty, resolved_src_mcv); @@ -12009,7 +12278,7 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M .base = .{ .reg = .ds }, .mod = .{ .rm = .{ .size = Memory.Size.fromSize(abi_size), - .disp = math.cast(i32, @as(i64, @bitCast(addr))) orelse + .disp = std.math.cast(i32, @as(i64, @bitCast(addr))) orelse return self.asmRegisterRegister( .{ .i_, .mul }, dst_alias, @@ -12087,11 +12356,7 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void { ) |dst_reg, elem_index| { assert(self.register_manager.isRegFree(dst_reg)); if (elem_index > 0) { - try self.asmRegisterImmediate( - .{ ._l, .sh }, - dst_reg.to8(), - Immediate.u(elem_index), - ); + try self.asmRegisterImmediate(.{ ._l, .sh }, dst_reg.to8(), .u(elem_index)); try self.asmRegisterRegister( .{ ._, .@"or" }, dst_reg.to8(), @@ -12127,7 +12392,7 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegisterImmediate( .{ ._, .mov }, index_reg.to32(), - Immediate.u(regs_frame_addr.regs), + .u(regs_frame_addr.regs), ); const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); try self.asmMemoryImmediate(.{ ._, .cmp }, .{ @@ -12147,14 +12412,14 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void { ); self.performReloc(unset); if (self.hasFeature(.slow_incdec)) { - try self.asmRegisterImmediate(.{ ._, .add }, index_reg.to32(), Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._, .add }, index_reg.to32(), .u(1)); } else { try self.asmRegister(.{ ._, .inc }, index_reg.to32()); } try self.asmRegisterImmediate( .{ ._, .cmp }, index_reg.to32(), - Immediate.u(arg_ty.vectorLen(zcu)), + .u(arg_ty.vectorLen(zcu)), ); _ = try self.asmJccReloc(.b, loop); @@ -12180,7 +12445,6 @@ fn airDbgArg(self: *Self, inst: Air.Inst.Index) !void { for (self.args[self.arg_index..]) |arg| { if (arg != .none) break; } else try self.airDbgVarArgs(); - self.finishAirBookkeeping(); } fn airDbgVarArgs(self: *Self) !void { @@ -12200,9 +12464,9 @@ fn genLocalDebugInfo( switch (mcv) { .none => try self.asmAir(.dbg_local, inst), .unreach, .dead, .elementwise_regs_then_frame, .reserved_frame, .air_ref => unreachable, - .immediate => |imm| try self.asmAirImmediate(.dbg_local, inst, Immediate.u(imm)), + .immediate => |imm| try self.asmAirImmediate(.dbg_local, inst, .u(imm)), .lea_frame => |frame_addr| try self.asmAirFrameAddress(.dbg_local, inst, frame_addr), - .lea_symbol => |sym_off| try self.asmAirImmediate(.dbg_local, inst, Immediate.rel(sym_off)), + .lea_symbol => |sym_off| try self.asmAirImmediate(.dbg_local, inst, .rel(sym_off)), else => { const ty = switch (tag) { else => unreachable, @@ -12245,16 +12509,6 @@ fn genLocalDebugInfo( } } -fn airTrap(self: *Self) !void { - try self.asmOpOnly(.{ ._, .ud2 }); - self.finishAirBookkeeping(); -} - -fn airBreakpoint(self: *Self) !void { - try self.asmOpOnly(.{ ._, .int3 }); - self.finishAirBookkeeping(); -} - fn airRetAddr(self: *Self, inst: Air.Inst.Index) !void { const dst_mcv = try self.allocRegOrMem(inst, true); try self.genCopy(Type.usize, dst_mcv, .{ .load_frame = .{ .index = .ret_addr } }, .{}); @@ -12426,7 +12680,7 @@ fn genCall(self: *Self, info: union(enum) { try self.asmRegisterImmediate( .{ ._, .mov }, index_reg.to32(), - Immediate.u(regs_frame_addr.regs), + .u(regs_frame_addr.regs), ); const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); try self.asmMemoryRegister(.{ ._, .bt }, src_mem, index_reg.to32()); @@ -12440,14 +12694,14 @@ fn genCall(self: *Self, info: union(enum) { } }, }); if (self.hasFeature(.slow_incdec)) { - try self.asmRegisterImmediate(.{ ._, .add }, index_reg.to32(), Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._, .add }, index_reg.to32(), .u(1)); } else { try self.asmRegister(.{ ._, .inc }, index_reg.to32()); } try self.asmRegisterImmediate( .{ ._, .cmp }, index_reg.to32(), - Immediate.u(arg_ty.vectorLen(zcu)), + .u(arg_ty.vectorLen(zcu)), ); _ = try self.asmJccReloc(.b, loop); @@ -12521,11 +12775,7 @@ fn genCall(self: *Self, info: union(enum) { 0.., ) |dst_reg, elem_index| { try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32()); - try self.asmMemoryImmediate( - .{ ._, .bt }, - src_mem, - Immediate.u(elem_index), - ); + try self.asmMemoryImmediate(.{ ._, .bt }, src_mem, .u(elem_index)); try self.asmSetccRegister(.c, dst_reg.to8()); } }, @@ -12533,7 +12783,7 @@ fn genCall(self: *Self, info: union(enum) { }; if (fn_info.is_var_args) - try self.asmRegisterImmediate(.{ ._, .mov }, .al, Immediate.u(call_info.fp_count)); + try self.asmRegisterImmediate(.{ ._, .mov }, .al, .u(call_info.fp_count)); // Due to incremental compilation, how function calls are generated depends // on linking. @@ -12551,7 +12801,7 @@ fn genCall(self: *Self, info: union(enum) { if (self.bin_file.cast(.elf)) |elf_file| { const zo = elf_file.zigObjectPtr().?; const sym_index = try zo.getOrCreateMetadataForNav(zcu, func.owner_nav); - try self.asmImmediate(.{ ._, .call }, Immediate.rel(.{ .sym_index = sym_index })); + try self.asmImmediate(.{ ._, .call }, .rel(.{ .sym_index = sym_index })); } else if (self.bin_file.cast(.coff)) |coff_file| { const atom = try coff_file.getOrCreateAtomForNav(func.owner_nav); const sym_index = coff_file.getAtom(atom).getSymbolIndex().?; @@ -12561,7 +12811,7 @@ fn genCall(self: *Self, info: union(enum) { const zo = macho_file.getZigObject().?; const sym_index = try zo.getOrCreateMetadataForNav(macho_file, func.owner_nav); const sym = zo.symbols.items[sym_index]; - try self.asmImmediate(.{ ._, .call }, Immediate.rel(.{ .sym_index = sym.nlist_idx })); + try self.asmImmediate(.{ ._, .call }, .rel(.{ .sym_index = sym.nlist_idx })); } else if (self.bin_file.cast(.plan9)) |p9| { const atom_index = try p9.seeNav(pt, func.owner_nav); const atom = p9.getAtom(atom_index); @@ -12579,13 +12829,13 @@ fn genCall(self: *Self, info: union(enum) { @"extern".name.toSlice(ip), @"extern".lib_name.toSlice(ip), ); - try self.asmImmediate(.{ ._, .call }, Immediate.rel(.{ .sym_index = target_sym_index })); + try self.asmImmediate(.{ ._, .call }, .rel(.{ .sym_index = target_sym_index })); } else if (self.bin_file.cast(.macho)) |macho_file| { const target_sym_index = try macho_file.getGlobalSymbol( @"extern".name.toSlice(ip), @"extern".lib_name.toSlice(ip), ); - try self.asmImmediate(.{ ._, .call }, Immediate.rel(.{ .sym_index = target_sym_index })); + try self.asmImmediate(.{ ._, .call }, .rel(.{ .sym_index = target_sym_index })); } else try self.genExternSymbolRef( .call, @"extern".lib_name.toSlice(ip), @@ -12600,10 +12850,10 @@ fn genCall(self: *Self, info: union(enum) { }, .lib => |lib| if (self.bin_file.cast(.elf)) |elf_file| { const target_sym_index = try elf_file.getGlobalSymbol(lib.callee, lib.lib); - try self.asmImmediate(.{ ._, .call }, Immediate.rel(.{ .sym_index = target_sym_index })); + try self.asmImmediate(.{ ._, .call }, .rel(.{ .sym_index = target_sym_index })); } else if (self.bin_file.cast(.macho)) |macho_file| { const target_sym_index = try macho_file.getGlobalSymbol(lib.callee, lib.lib); - try self.asmImmediate(.{ ._, .call }, Immediate.rel(.{ .sym_index = target_sym_index })); + try self.asmImmediate(.{ ._, .call }, .rel(.{ .sym_index = target_sym_index })); } else try self.genExternSymbolRef(.call, lib.lib, lib.callee), } return call_info.return_value.short; @@ -12665,7 +12915,7 @@ fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { try self.exitlude_jump_relocs.append(self.gpa, jmp_reloc); } -fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { +fn airCmp(self: *Self, inst: Air.Inst.Index, op: std.math.CompareOperator) !void { const pt = self.pt; const zcu = pt.zcu; const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; @@ -12754,7 +13004,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { try self.asmRegisterImmediate( .{ ._r, .sh }, registerAlias(temp_lhs_reg, opt_abi_size), - Immediate.u(payload_abi_size * 8), + .u(payload_abi_size * 8), ); } @@ -12775,7 +13025,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { try self.asmRegisterImmediate( .{ ._r, .sh }, registerAlias(temp_rhs_reg, opt_abi_size), - Immediate.u(payload_abi_size * 8), + .u(payload_abi_size * 8), ); try self.asmRegisterRegister( .{ ._, .@"test" }, @@ -12867,10 +13117,10 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { .register_pair, .load_frame => null, .memory, .load_symbol, .load_got, .load_direct, .load_tlv => dst: { switch (resolved_dst_mcv) { - .memory => |addr| if (math.cast( + .memory => |addr| if (std.math.cast( i32, @as(i64, @bitCast(addr)), - ) != null and math.cast( + ) != null and std.math.cast( i32, @as(i64, @bitCast(addr)) + abi_size - 8, ) != null) break :dst null, @@ -12928,10 +13178,10 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { .register_pair, .load_frame => null, .memory, .load_symbol, .load_got, .load_direct, .load_tlv => src: { switch (resolved_src_mcv) { - .memory => |addr| if (math.cast( + .memory => |addr| if (std.math.cast( i32, @as(i64, @bitCast(addr)), - ) != null and math.cast( + ) != null and std.math.cast( i32, @as(i64, @bitCast(addr)) + abi_size - 8, ) != null) break :src null, @@ -12971,7 +13221,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { const locks = self.register_manager.lockRegsAssumeUnused(2, regs); defer for (locks) |lock| self.register_manager.unlockReg(lock); - const limbs_len = math.divCeil(u16, abi_size, 8) catch unreachable; + const limbs_len = std.math.divCeil(u16, abi_size, 8) catch unreachable; var limb_i: u16 = 0; while (limb_i < limbs_len) : (limb_i += 1) { const off = limb_i * 8; @@ -13067,7 +13317,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { tmp1_reg, dst_reg.to128(), try src_mcv.mem(self, .word), - Immediate.u(1), + .u(1), ) else try self.asmRegisterRegisterRegister( .{ .vp_, .unpcklwd }, tmp1_reg, @@ -13232,7 +13482,6 @@ fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void { .column = dbg_stmt.column, } }, }); - self.finishAirBookkeeping(); } fn airDbgEmptyStmt(self: *Self) !void { @@ -13240,7 +13489,6 @@ fn airDbgEmptyStmt(self: *Self) !void { self.mir_instructions.items(.ops)[self.mir_instructions.len - 1] == .pseudo_dbg_line_stmt_line_column) self.mir_instructions.items(.ops)[self.mir_instructions.len - 1] = .pseudo_dbg_line_line_column; try self.asmOpOnly(.{ ._, .nop }); - self.finishAirBookkeeping(); } fn airDbgInlineBlock(self: *Self, inst: Air.Inst.Index) !void { @@ -13278,7 +13526,7 @@ fn genCondBrMir(self: *Self, ty: Type, mcv: MCValue) !Mir.Inst.Index { }, .register => |reg| { try self.spillEflagsIfOccupied(); - try self.asmRegisterImmediate(.{ ._, .@"test" }, reg.to8(), Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._, .@"test" }, reg.to8(), .u(1)); return self.asmJccReloc(.z, undefined); }, .immediate, @@ -13338,7 +13586,6 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { }); // We already took care of pl_op.operand earlier, so there's nothing left to do. - self.finishAirBookkeeping(); } fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MCValue { @@ -13353,7 +13600,7 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC const pl_ty = opt_ty.optionalChild(zcu); - const some_info: struct { off: i32, ty: Type } = if (opt_ty.optionalReprIsPayload(zcu)) + const some_info: struct { off: u31, ty: Type } = if (opt_ty.optionalReprIsPayload(zcu)) .{ .off = 0, .ty = if (pl_ty.isSlice(zcu)) pl_ty.slicePtrFieldType(zcu) else pl_ty } else .{ .off = @intCast(pl_ty.abiSize(zcu)), .ty = Type.bool }; @@ -13366,7 +13613,6 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC .undef, .immediate, .eflags, - .register_pair, .register_offset, .register_overflow, .lea_direct, @@ -13396,7 +13642,25 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC try self.asmRegisterImmediate( .{ ._, .bt }, registerAlias(opt_reg, opt_abi_size), - Immediate.u(@as(u6, @intCast(some_info.off * 8))), + .u(@as(u6, @intCast(some_info.off * 8))), + ); + return .{ .eflags = .nc }; + }, + + .register_pair => |opt_regs| { + if (some_info.off == 0) { + const some_abi_size: u32 = @intCast(some_info.ty.abiSize(zcu)); + const alias_reg = registerAlias(opt_regs[0], some_abi_size); + assert(some_abi_size * 8 == alias_reg.bitSize()); + try self.asmRegisterRegister(.{ ._, .@"test" }, alias_reg, alias_reg); + return .{ .eflags = .z }; + } + assert(some_info.ty.ip_index == .bool_type); + const opt_abi_size: u32 = @intCast(opt_ty.abiSize(zcu)); + try self.asmRegisterImmediate( + .{ ._, .bt }, + registerAlias(opt_regs[some_info.off / 8], opt_abi_size), + .u(@as(u6, @truncate(some_info.off * 8))), ); return .{ .eflags = .nc }; }, @@ -13422,7 +13686,7 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC .disp = some_info.off, } }, }, - Immediate.u(0), + .u(0), ); return .{ .eflags = .e }; }, @@ -13448,7 +13712,7 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC }, else => unreachable, }, - Immediate.u(0), + .u(0), ); return .{ .eflags = .e }; }, @@ -13485,7 +13749,7 @@ fn isNullPtr(self: *Self, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) .disp = some_info.off, } }, }, - Immediate.u(0), + .u(0), ); self.eflags_inst = inst; @@ -13500,7 +13764,7 @@ fn isErr(self: *Self, maybe_inst: ?Air.Inst.Index, eu_ty: Type, eu_mcv: MCValue) try self.spillEflagsIfOccupied(); - const err_off: u31 = @intCast(errUnionErrorOffset(eu_ty.errorUnionPayload(zcu), zcu)); + const err_off: u31 = @intCast(codegen.errUnionErrorOffset(eu_ty.errorUnionPayload(zcu), zcu)); switch (eu_mcv) { .register => |reg| { const eu_lock = self.register_manager.lockReg(reg); @@ -13557,7 +13821,7 @@ fn isErrPtr(self: *Self, maybe_inst: ?Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCV const ptr_lock = self.register_manager.lockReg(ptr_reg); defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock); - const err_off: u31 = @intCast(errUnionErrorOffset(eu_ty.errorUnionPayload(zcu), zcu)); + const err_off: u31 = @intCast(codegen.errUnionErrorOffset(eu_ty.errorUnionPayload(zcu), zcu)); try self.asmMemoryImmediate( .{ ._, .cmp }, .{ @@ -13567,7 +13831,7 @@ fn isErrPtr(self: *Self, maybe_inst: ?Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCV .disp = err_off, } }, }, - Immediate.u(0), + .u(0), ); if (maybe_inst) |inst| self.eflags_inst = inst; @@ -13686,12 +13950,11 @@ fn airLoop(self: *Self, inst: Air.Inst.Index) !void { try self.loops.putNoClobber(self.gpa, inst, .{ .state = state, - .jmp_target = @intCast(self.mir_instructions.len), + .target = @intCast(self.mir_instructions.len), }); defer assert(self.loops.remove(inst)); try self.genBodyBlock(body); - self.finishAirBookkeeping(); } fn airBlock(self: *Self, inst: Air.Inst.Index) !void { @@ -13729,7 +13992,6 @@ fn lowerBlock(self: *Self, inst: Air.Inst.Index, body: []const Air.Inst.Index) ! const tracking = &self.inst_tracking.values()[inst_tracking_i]; if (self.liveness.isUnused(inst)) try tracking.die(self, inst); self.getValueIfFree(tracking.short, inst); - self.finishAirBookkeeping(); } fn lowerSwitchBr(self: *Self, inst: Air.Inst.Index, switch_br: Air.UnwrappedSwitch, condition: MCValue) !void { @@ -13864,7 +14126,6 @@ fn airSwitchBr(self: *Self, inst: Air.Inst.Index) !void { try self.lowerSwitchBr(inst, switch_br, condition); // We already took care of pl_op.operand earlier, so there's nothing left to do - self.finishAirBookkeeping(); } fn airLoopSwitchBr(self: *Self, inst: Air.Inst.Index) !void { @@ -13893,7 +14154,7 @@ fn airLoopSwitchBr(self: *Self, inst: Air.Inst.Index) !void { try self.loops.putNoClobber(self.gpa, inst, .{ .state = state, - .jmp_target = @intCast(self.mir_instructions.len), + .target = @intCast(self.mir_instructions.len), }); defer assert(self.loops.remove(inst)); @@ -13903,7 +14164,6 @@ fn airLoopSwitchBr(self: *Self, inst: Air.Inst.Index) !void { try self.lowerSwitchBr(inst, switch_br, mat_cond); try self.processDeath(inst); - self.finishAirBookkeeping(); } fn airSwitchDispatch(self: *Self, inst: Air.Inst.Index) !void { @@ -13945,12 +14205,10 @@ fn airSwitchDispatch(self: *Self, inst: Air.Inst.Index) !void { // Emit a jump with a relocation. It will be patched up after the block ends. // Leave the jump offset undefined - _ = try self.asmJmpReloc(loop_data.jmp_target); + _ = try self.asmJmpReloc(loop_data.target); // Stop tracking block result without forgetting tracking info try self.freeValue(block_tracking.short); - - self.finishAirBookkeeping(); } fn performReloc(self: *Self, reloc: Mir.Inst.Index) void { @@ -14023,8 +14281,6 @@ fn airBr(self: *Self, inst: Air.Inst.Index) !void { // Stop tracking block result without forgetting tracking info try self.freeValue(block_tracking.short); - - self.finishAirBookkeeping(); } fn airRepeat(self: *Self, inst: Air.Inst.Index) !void { @@ -14036,8 +14292,7 @@ fn airRepeat(self: *Self, inst: Air.Inst.Index) !void { .resurrect = false, .close_scope = true, }); - _ = try self.asmJmpReloc(repeat_info.jmp_target); - self.finishAirBookkeeping(); + _ = try self.asmJmpReloc(repeat_info.target); } fn airAsm(self: *Self, inst: Air.Inst.Index) !void { @@ -14068,9 +14323,9 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { var outputs_extra_i = extra_i; for (outputs) |output| { - const extra_bytes = mem.sliceAsBytes(self.air.extra[extra_i..]); - const constraint = mem.sliceTo(mem.sliceAsBytes(self.air.extra[extra_i..]), 0); - const name = mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0); + const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]); + const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0); + const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0); // This equation accounts for the fact that even if we have exactly 4 bytes // for the string, we still use the next u32 for the null terminator. extra_i += (constraint.len + name.len + (2 + 3)) / 4; @@ -14097,8 +14352,8 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { const is_early_clobber = constraint[1] == '&'; const rest = constraint[@as(usize, 1) + @intFromBool(is_early_clobber) ..]; const arg_mcv: MCValue = arg_mcv: { - const arg_maybe_reg: ?Register = if (mem.eql(u8, rest, "r") or - mem.eql(u8, rest, "f") or mem.eql(u8, rest, "x")) + const arg_maybe_reg: ?Register = if (std.mem.eql(u8, rest, "r") or + std.mem.eql(u8, rest, "f") or std.mem.eql(u8, rest, "x")) registerAlias( self.register_manager.tryAllocReg(maybe_inst, switch (rest[0]) { 'r' => abi.RegisterClass.gp, @@ -14108,20 +14363,20 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { }) orelse return self.fail("ran out of registers lowering inline asm", .{}), @intCast(ty.abiSize(zcu)), ) - else if (mem.eql(u8, rest, "m")) + else if (std.mem.eql(u8, rest, "m")) if (output != .none) null else return self.fail( "memory constraint unsupported for asm result: '{s}'", .{constraint}, ) - else if (mem.eql(u8, rest, "g") or - mem.eql(u8, rest, "rm") or mem.eql(u8, rest, "mr") or - mem.eql(u8, rest, "r,m") or mem.eql(u8, rest, "m,r")) + else if (std.mem.eql(u8, rest, "g") or + std.mem.eql(u8, rest, "rm") or std.mem.eql(u8, rest, "mr") or + std.mem.eql(u8, rest, "r,m") or std.mem.eql(u8, rest, "m,r")) self.register_manager.tryAllocReg(maybe_inst, abi.RegisterClass.gp) orelse if (output != .none) null else return self.fail("ran out of registers lowering inline asm", .{}) - else if (mem.startsWith(u8, rest, "{") and mem.endsWith(u8, rest, "}")) + else if (std.mem.startsWith(u8, rest, "{") and std.mem.endsWith(u8, rest, "}")) parseRegName(rest["{".len .. rest.len - "}".len]) orelse return self.fail("invalid register constraint: '{s}'", .{constraint}) else if (rest.len == 1 and std.ascii.isDigit(rest[0])) { @@ -14134,7 +14389,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { break :arg_mcv if (arg_maybe_reg) |reg| .{ .register = reg } else arg: { const ptr_mcv = try self.resolveInst(output); switch (ptr_mcv) { - .immediate => |addr| if (math.cast(i32, @as(i64, @bitCast(addr)))) |_| + .immediate => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr)))) |_| break :arg ptr_mcv.deref(), .register, .register_offset, .lea_frame => break :arg ptr_mcv.deref(), else => {}, @@ -14145,7 +14400,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { if (arg_mcv.getReg()) |reg| if (RegisterManager.indexOfRegIntoTracked(reg)) |_| { _ = self.register_manager.lockReg(reg); }; - if (!mem.eql(u8, name, "_")) + if (!std.mem.eql(u8, name, "_")) arg_map.putAssumeCapacityNoClobber(name, @intCast(args.items.len)); args.appendAssumeCapacity(arg_mcv); if (output == .none) result = arg_mcv; @@ -14153,17 +14408,17 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { } for (inputs) |input| { - const input_bytes = mem.sliceAsBytes(self.air.extra[extra_i..]); - const constraint = mem.sliceTo(input_bytes, 0); - const name = mem.sliceTo(input_bytes[constraint.len + 1 ..], 0); + const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]); + const constraint = std.mem.sliceTo(input_bytes, 0); + const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0); // This equation accounts for the fact that even if we have exactly 4 bytes // for the string, we still use the next u32 for the null terminator. extra_i += (constraint.len + name.len + (2 + 3)) / 4; const ty = self.typeOf(input); const input_mcv = try self.resolveInst(input); - const arg_mcv: MCValue = if (mem.eql(u8, constraint, "r") or - mem.eql(u8, constraint, "f") or mem.eql(u8, constraint, "x")) + const arg_mcv: MCValue = if (std.mem.eql(u8, constraint, "r") or + std.mem.eql(u8, constraint, "f") or std.mem.eql(u8, constraint, "x")) arg: { const rc = switch (constraint[0]) { 'r' => abi.RegisterClass.gp, @@ -14177,16 +14432,16 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { const reg = try self.register_manager.allocReg(null, rc); try self.genSetReg(reg, ty, input_mcv, .{}); break :arg .{ .register = registerAlias(reg, @intCast(ty.abiSize(zcu))) }; - } else if (mem.eql(u8, constraint, "i") or mem.eql(u8, constraint, "n")) + } else if (std.mem.eql(u8, constraint, "i") or std.mem.eql(u8, constraint, "n")) switch (input_mcv) { .immediate => |imm| .{ .immediate = imm }, else => return self.fail("immediate operand requires comptime value: '{s}'", .{ constraint, }), } - else if (mem.eql(u8, constraint, "m")) arg: { + else if (std.mem.eql(u8, constraint, "m")) arg: { switch (input_mcv) { - .memory => |addr| if (math.cast(i32, @as(i64, @bitCast(addr)))) |_| + .memory => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr)))) |_| break :arg input_mcv, .indirect, .load_frame => break :arg input_mcv, .load_symbol, .load_direct, .load_got, .load_tlv => {}, @@ -14203,22 +14458,22 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { }; try self.genSetReg(addr_reg, Type.usize, input_mcv.address(), .{}); break :arg .{ .indirect = .{ .reg = addr_reg } }; - } else if (mem.eql(u8, constraint, "g") or - mem.eql(u8, constraint, "rm") or mem.eql(u8, constraint, "mr") or - mem.eql(u8, constraint, "r,m") or mem.eql(u8, constraint, "m,r")) + } else if (std.mem.eql(u8, constraint, "g") or + std.mem.eql(u8, constraint, "rm") or std.mem.eql(u8, constraint, "mr") or + std.mem.eql(u8, constraint, "r,m") or std.mem.eql(u8, constraint, "m,r")) arg: { switch (input_mcv) { .register, .indirect, .load_frame => break :arg input_mcv, - .memory => |addr| if (math.cast(i32, @as(i64, @bitCast(addr)))) |_| + .memory => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr)))) |_| break :arg input_mcv, else => {}, } const temp_mcv = try self.allocTempRegOrMem(ty, true); try self.genCopy(ty, temp_mcv, input_mcv, .{}); break :arg temp_mcv; - } else if (mem.eql(u8, constraint, "X")) + } else if (std.mem.eql(u8, constraint, "X")) input_mcv - else if (mem.startsWith(u8, constraint, "{") and mem.endsWith(u8, constraint, "}")) arg: { + else if (std.mem.startsWith(u8, constraint, "{") and std.mem.endsWith(u8, constraint, "}")) arg: { const reg = parseRegName(constraint["{".len .. constraint.len - "}".len]) orelse return self.fail("invalid register constraint: '{s}'", .{constraint}); try self.register_manager.getReg(reg, null); @@ -14233,7 +14488,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { if (arg_mcv.getReg()) |reg| if (RegisterManager.indexOfRegIntoTracked(reg)) |_| { _ = self.register_manager.lockReg(reg); }; - if (!mem.eql(u8, name, "_")) + if (!std.mem.eql(u8, name, "_")) arg_map.putAssumeCapacityNoClobber(name, @intCast(args.items.len)); args.appendAssumeCapacity(arg_mcv); } @@ -14241,7 +14496,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { { var clobber_i: u32 = 0; while (clobber_i < clobbers_len) : (clobber_i += 1) { - const clobber = mem.sliceTo(mem.sliceAsBytes(self.air.extra[extra_i..]), 0); + const clobber = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0); // This equation accounts for the fact that even if we have exactly 4 bytes // for the string, we still use the next u32 for the null terminator. extra_i += clobber.len / 4 + 1; @@ -14294,20 +14549,20 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { labels.deinit(self.gpa); } - const asm_source = mem.sliceAsBytes(self.air.extra[extra_i..])[0..extra.data.source_len]; - var line_it = mem.tokenizeAny(u8, asm_source, "\n\r;"); + const asm_source = std.mem.sliceAsBytes(self.air.extra[extra_i..])[0..extra.data.source_len]; + var line_it = std.mem.tokenizeAny(u8, asm_source, "\n\r;"); next_line: while (line_it.next()) |line| { - var mnem_it = mem.tokenizeAny(u8, line, " \t"); - var prefix: Instruction.Prefix = .none; + var mnem_it = std.mem.tokenizeAny(u8, line, " \t"); + var prefix: encoder.Instruction.Prefix = .none; const mnem_str = while (mnem_it.next()) |mnem_str| { if (mnem_str[0] == '#') continue :next_line; - if (mem.startsWith(u8, mnem_str, "//")) continue :next_line; - if (std.meta.stringToEnum(Instruction.Prefix, mnem_str)) |pre| { + if (std.mem.startsWith(u8, mnem_str, "//")) continue :next_line; + if (std.meta.stringToEnum(encoder.Instruction.Prefix, mnem_str)) |pre| { if (prefix != .none) return self.fail("extra prefix: '{s}'", .{mnem_str}); prefix = pre; continue; } - if (!mem.endsWith(u8, mnem_str, ":")) break mnem_str; + if (!std.mem.endsWith(u8, mnem_str, ":")) break mnem_str; const label_name = mnem_str[0 .. mnem_str.len - ":".len]; if (!Label.isValid(.definition, label_name)) return self.fail("invalid label: '{s}'", .{label_name}); @@ -14332,21 +14587,21 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { var mnem_size: ?Memory.Size = if (prefix == .directive) null - else if (mem.endsWith(u8, mnem_str, "b")) + else if (std.mem.endsWith(u8, mnem_str, "b")) .byte - else if (mem.endsWith(u8, mnem_str, "w")) + else if (std.mem.endsWith(u8, mnem_str, "w")) .word - else if (mem.endsWith(u8, mnem_str, "l")) + else if (std.mem.endsWith(u8, mnem_str, "l")) .dword - else if (mem.endsWith(u8, mnem_str, "q") and - (std.mem.indexOfScalar(u8, "vp", mnem_str[0]) == null or !mem.endsWith(u8, mnem_str, "dq"))) + else if (std.mem.endsWith(u8, mnem_str, "q") and + (std.mem.indexOfScalar(u8, "vp", mnem_str[0]) == null or !std.mem.endsWith(u8, mnem_str, "dq"))) .qword - else if (mem.endsWith(u8, mnem_str, "t")) + else if (std.mem.endsWith(u8, mnem_str, "t")) .tbyte else null; const mnem_tag = while (true) break std.meta.stringToEnum( - Instruction.Mnemonic, + encoder.Instruction.Mnemonic, mnem_str[0 .. mnem_str.len - @intFromBool(mnem_size != null)], ) orelse if (mnem_size) |_| { mnem_size = null; @@ -14367,18 +14622,18 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { .{ ._, .pseudo } else for (std.enums.values(Mir.Inst.Fixes)) |fixes| { const fixes_name = @tagName(fixes); - const space_i = mem.indexOfScalar(u8, fixes_name, ' '); + const space_i = std.mem.indexOfScalar(u8, fixes_name, ' '); const fixes_prefix = if (space_i) |i| - std.meta.stringToEnum(Instruction.Prefix, fixes_name[0..i]).? + std.meta.stringToEnum(encoder.Instruction.Prefix, fixes_name[0..i]).? else .none; if (fixes_prefix != prefix) continue; const pattern = fixes_name[if (space_i) |i| i + " ".len else 0..]; - const wildcard_i = mem.indexOfScalar(u8, pattern, '_').?; + const wildcard_i = std.mem.indexOfScalar(u8, pattern, '_').?; const mnem_prefix = pattern[0..wildcard_i]; const mnem_suffix = pattern[wildcard_i + "_".len ..]; - if (!mem.startsWith(u8, mnem_name, mnem_prefix)) continue; - if (!mem.endsWith(u8, mnem_name, mnem_suffix)) continue; + if (!std.mem.startsWith(u8, mnem_name, mnem_prefix)) continue; + if (!std.mem.endsWith(u8, mnem_name, mnem_suffix)) continue; break .{ fixes, std.meta.stringToEnum( Mir.Inst.Tag, mnem_name[mnem_prefix.len .. mnem_name.len - mnem_suffix.len], @@ -14400,21 +14655,21 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { var ops: [4]Operand = .{.none} ** 4; var last_op = false; - var op_it = mem.splitScalar(u8, mnem_it.rest(), ','); + var op_it = std.mem.splitScalar(u8, mnem_it.rest(), ','); next_op: for (&ops) |*op| { const op_str = while (!last_op) { const full_str = op_it.next() orelse break :next_op; - const code_str = if (mem.indexOfScalar(u8, full_str, '#') orelse - mem.indexOf(u8, full_str, "//")) |comment| + const code_str = if (std.mem.indexOfScalar(u8, full_str, '#') orelse + std.mem.indexOf(u8, full_str, "//")) |comment| code: { last_op = true; break :code full_str[0..comment]; } else full_str; - const trim_str = mem.trim(u8, code_str, " \t*"); + const trim_str = std.mem.trim(u8, code_str, " \t*"); if (trim_str.len > 0) break trim_str; } else break; - if (mem.startsWith(u8, op_str, "%%")) { - const colon = mem.indexOfScalarPos(u8, op_str, "%%".len + 2, ':'); + if (std.mem.startsWith(u8, op_str, "%%")) { + const colon = std.mem.indexOfScalarPos(u8, op_str, "%%".len + 2, ':'); const reg = parseRegName(op_str["%%".len .. colon orelse op_str.len]) orelse return self.fail("invalid register: '{s}'", .{op_str}); if (colon) |colon_pos| { @@ -14432,8 +14687,8 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { return self.fail("invalid register size: '{s}'", .{op_str}); op.* = .{ .reg = reg }; } - } else if (mem.startsWith(u8, op_str, "%[") and mem.endsWith(u8, op_str, "]")) { - const colon = mem.indexOfScalarPos(u8, op_str, "%[".len, ':'); + } else if (std.mem.startsWith(u8, op_str, "%[") and std.mem.endsWith(u8, op_str, "]")) { + const colon = std.mem.indexOfScalarPos(u8, op_str, "%[".len, ':'); const modifier = if (colon) |colon_pos| op_str[colon_pos + ":".len .. op_str.len - "]".len] else @@ -14442,15 +14697,15 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { arg_map.get(op_str["%[".len .. colon orelse op_str.len - "]".len]) orelse return self.fail("no matching constraint: '{s}'", .{op_str}) ]) { - .immediate => |imm| if (mem.eql(u8, modifier, "") or mem.eql(u8, modifier, "c")) - .{ .imm = Immediate.u(imm) } + .immediate => |imm| if (std.mem.eql(u8, modifier, "") or std.mem.eql(u8, modifier, "c")) + .{ .imm = .u(imm) } else return self.fail("invalid modifier: '{s}'", .{modifier}), - .register => |reg| if (mem.eql(u8, modifier, "")) + .register => |reg| if (std.mem.eql(u8, modifier, "")) .{ .reg = reg } else return self.fail("invalid modifier: '{s}'", .{modifier}), - .memory => |addr| if (mem.eql(u8, modifier, "") or mem.eql(u8, modifier, "P")) + .memory => |addr| if (std.mem.eql(u8, modifier, "") or std.mem.eql(u8, modifier, "P")) .{ .mem = .{ .base = .{ .reg = .ds }, .mod = .{ .rm = .{ @@ -14461,7 +14716,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { } } else return self.fail("invalid modifier: '{s}'", .{modifier}), - .indirect => |reg_off| if (mem.eql(u8, modifier, "")) + .indirect => |reg_off| if (std.mem.eql(u8, modifier, "")) .{ .mem = .{ .base = .{ .reg = reg_off.reg }, .mod = .{ .rm = .{ @@ -14472,7 +14727,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { } } else return self.fail("invalid modifier: '{s}'", .{modifier}), - .load_frame => |frame_addr| if (mem.eql(u8, modifier, "")) + .load_frame => |frame_addr| if (std.mem.eql(u8, modifier, "")) .{ .mem = .{ .base = .{ .frame = frame_addr.index }, .mod = .{ .rm = .{ @@ -14483,42 +14738,42 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { } } else return self.fail("invalid modifier: '{s}'", .{modifier}), - .lea_got => |sym_index| if (mem.eql(u8, modifier, "P")) + .lea_got => |sym_index| if (std.mem.eql(u8, modifier, "P")) .{ .reg = try self.copyToTmpRegister(Type.usize, .{ .lea_got = sym_index }) } else return self.fail("invalid modifier: '{s}'", .{modifier}), - .lea_symbol => |sym_off| if (mem.eql(u8, modifier, "P")) + .lea_symbol => |sym_off| if (std.mem.eql(u8, modifier, "P")) .{ .reg = try self.copyToTmpRegister(Type.usize, .{ .lea_symbol = sym_off }) } else return self.fail("invalid modifier: '{s}'", .{modifier}), else => return self.fail("invalid constraint: '{s}'", .{op_str}), }; - } else if (mem.startsWith(u8, op_str, "$")) { + } else if (std.mem.startsWith(u8, op_str, "$")) { if (std.fmt.parseInt(i32, op_str["$".len..], 0)) |s| { if (mnem_size) |size| { - const max = @as(u64, math.maxInt(u64)) >> @intCast(64 - (size.bitSize() - 1)); + const max = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - (size.bitSize() - 1)); if ((if (s < 0) ~s else s) > max) return self.fail("invalid immediate size: '{s}'", .{op_str}); } - op.* = .{ .imm = Immediate.s(s) }; + op.* = .{ .imm = .s(s) }; } else |_| if (std.fmt.parseInt(u64, op_str["$".len..], 0)) |u| { if (mnem_size) |size| { - const max = @as(u64, math.maxInt(u64)) >> @intCast(64 - size.bitSize()); + const max = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - size.bitSize()); if (u > max) return self.fail("invalid immediate size: '{s}'", .{op_str}); } - op.* = .{ .imm = Immediate.u(u) }; + op.* = .{ .imm = .u(u) }; } else |_| return self.fail("invalid immediate: '{s}'", .{op_str}); - } else if (mem.endsWith(u8, op_str, ")")) { - const open = mem.indexOfScalar(u8, op_str, '(') orelse + } else if (std.mem.endsWith(u8, op_str, ")")) { + const open = std.mem.indexOfScalar(u8, op_str, '(') orelse return self.fail("invalid operand: '{s}'", .{op_str}); - var sib_it = mem.splitScalar(u8, op_str[open + "(".len .. op_str.len - ")".len], ','); + var sib_it = std.mem.splitScalar(u8, op_str[open + "(".len .. op_str.len - ")".len], ','); const base_str = sib_it.next() orelse return self.fail("invalid memory operand: '{s}'", .{op_str}); - if (base_str.len > 0 and !mem.startsWith(u8, base_str, "%%")) + if (base_str.len > 0 and !std.mem.startsWith(u8, base_str, "%%")) return self.fail("invalid memory operand: '{s}'", .{op_str}); const index_str = sib_it.next() orelse ""; - if (index_str.len > 0 and !mem.startsWith(u8, base_str, "%%")) + if (index_str.len > 0 and !std.mem.startsWith(u8, base_str, "%%")) return self.fail("invalid memory operand: '{s}'", .{op_str}); const scale_str = sib_it.next() orelse ""; if (index_str.len == 0 and scale_str.len > 0) @@ -14550,10 +14805,10 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { else .none, .scale = scale, - .disp = if (mem.startsWith(u8, op_str[0..open], "%[") and - mem.endsWith(u8, op_str[0..open], "]")) + .disp = if (std.mem.startsWith(u8, op_str[0..open], "%[") and + std.mem.endsWith(u8, op_str[0..open], "]")) disp: { - const colon = mem.indexOfScalarPos(u8, op_str[0..open], "%[".len, ':'); + const colon = std.mem.indexOfScalarPos(u8, op_str[0..open], "%[".len, ':'); const modifier = if (colon) |colon_pos| op_str[colon_pos + ":".len .. open - "]".len] else @@ -14562,9 +14817,9 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { arg_map.get(op_str["%[".len .. colon orelse open - "]".len]) orelse return self.fail("no matching constraint: '{s}'", .{op_str}) ]) { - .immediate => |imm| if (mem.eql(u8, modifier, "") or - mem.eql(u8, modifier, "c")) - math.cast(i32, @as(i64, @bitCast(imm))) orelse + .immediate => |imm| if (std.mem.eql(u8, modifier, "") or + std.mem.eql(u8, modifier, "c")) + std.math.cast(i32, @as(i64, @bitCast(imm))) orelse return self.fail("invalid displacement: '{s}'", .{op_str}) else return self.fail("invalid modifier: '{s}'", .{modifier}), @@ -14730,10 +14985,10 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { return self.fail("undefined label: '{s}'", .{label.key_ptr.*}); for (outputs, args.items[0..outputs.len]) |output, arg_mcv| { - const extra_bytes = mem.sliceAsBytes(self.air.extra[outputs_extra_i..]); + const extra_bytes = std.mem.sliceAsBytes(self.air.extra[outputs_extra_i..]); const constraint = - mem.sliceTo(mem.sliceAsBytes(self.air.extra[outputs_extra_i..]), 0); - const name = mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0); + std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[outputs_extra_i..]), 0); + const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0); // This equation accounts for the fact that even if we have exactly 4 bytes // for the string, we still use the next u32 for the null terminator. outputs_extra_i += (constraint.len + name.len + (2 + 3)) / 4; @@ -14777,7 +15032,10 @@ const MoveStrategy = union(enum) { pub fn read(strat: MoveStrategy, self: *Self, dst_reg: Register, src_mem: Memory) !void { switch (strat) { - .move => |tag| try self.asmRegisterMemory(tag, dst_reg, src_mem), + .move => |tag| try self.asmRegisterMemory(tag, switch (tag[1]) { + else => dst_reg, + .lea => if (dst_reg.bitSize() >= 32) dst_reg else dst_reg.to32(), + }, src_mem), .x87_load_store => { try self.asmMemory(.{ .f_, .ld }, src_mem); assert(dst_reg != .st7); @@ -14787,14 +15045,14 @@ const MoveStrategy = union(enum) { ie.insert, dst_reg, src_mem, - Immediate.u(0), + .u(0), ), .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate( ie.insert, dst_reg, dst_reg, src_mem, - Immediate.u(0), + .u(0), ), } } @@ -14809,7 +15067,7 @@ const MoveStrategy = union(enum) { ie.extract, dst_mem, src_reg, - Immediate.u(0), + .u(0), ), } } @@ -14823,7 +15081,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo .mmx => {}, .sse => switch (ty.zigTypeTag(zcu)) { else => { - const classes = mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target.*, .other), .none); + const classes = std.mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target.*, .other), .none); assert(std.mem.indexOfNone(abi.Class, classes, &.{ .integer, .sse, .sseup, .memory, .float, .float_combine, }) == null); @@ -15135,7 +15393,7 @@ fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: Copy ), .memory, .load_symbol, .load_direct, .load_got, .load_tlv => { switch (dst_mcv) { - .memory => |addr| if (math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr| + .memory => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr| return self.genSetMem(.{ .reg = .ds }, small_addr, ty, src_mcv, opts), .load_symbol, .load_direct, .load_got, .load_tlv => {}, else => unreachable, @@ -15179,17 +15437,17 @@ fn genSetReg( => unreachable, .undef => if (opts.safety) switch (dst_reg.class()) { .general_purpose => switch (abi_size) { - 1 => try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to8(), Immediate.u(0xAA)), - 2 => try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to16(), Immediate.u(0xAAAA)), + 1 => try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to8(), .u(0xAA)), + 2 => try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to16(), .u(0xAAAA)), 3...4 => try self.asmRegisterImmediate( .{ ._, .mov }, dst_reg.to32(), - Immediate.s(@as(i32, @bitCast(@as(u32, 0xAAAAAAAA)))), + .s(@as(i32, @bitCast(@as(u32, 0xAAAAAAAA)))), ), 5...8 => try self.asmRegisterImmediate( .{ ._, .mov }, dst_reg.to64(), - Immediate.u(0xAAAAAAAAAAAAAAAA), + .u(0xAAAAAAAAAAAAAAAA), ), else => unreachable, }, @@ -15203,20 +15461,20 @@ fn genSetReg( // register is the fastest way to zero a register. try self.spillEflagsIfOccupied(); try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32()); - } else if (abi_size > 4 and math.cast(u32, imm) != null) { + } else if (abi_size > 4 and std.math.cast(u32, imm) != null) { // 32-bit moves zero-extend to 64-bit. - try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to32(), Immediate.u(imm)); + try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to32(), .u(imm)); } else if (abi_size <= 4 and @as(i64, @bitCast(imm)) < 0) { try self.asmRegisterImmediate( .{ ._, .mov }, registerAlias(dst_reg, abi_size), - Immediate.s(@intCast(@as(i64, @bitCast(imm)))), + .s(@intCast(@as(i64, @bitCast(imm)))), ); } else { try self.asmRegisterImmediate( .{ ._, .mov }, registerAlias(dst_reg, abi_size), - Immediate.u(imm), + .u(imm), ); } }, @@ -15325,15 +15583,15 @@ fn genSetReg( .load_frame => |frame_addr| try self.moveStrategy( ty, dst_reg.class(), - self.getFrameAddrAlignment(frame_addr).compare(.gte, Alignment.fromLog2Units( - math.log2_int_ceil(u10, @divExact(dst_reg.bitSize(), 8)), + self.getFrameAddrAlignment(frame_addr).compare(.gte, InternPool.Alignment.fromLog2Units( + std.math.log2_int_ceil(u10, @divExact(dst_reg.bitSize(), 8)), )), ), .lea_frame => .{ .move = .{ ._, .lea } }, else => unreachable, }).read(self, registerAlias(dst_reg, abi_size), switch (src_mcv) { .register_offset, .indirect => |reg_off| .{ - .base = .{ .reg = reg_off.reg }, + .base = .{ .reg = reg_off.reg.to64() }, .mod = .{ .rm = .{ .size = self.memSize(ty), .disp = reg_off.off, @@ -15350,7 +15608,7 @@ fn genSetReg( }), .memory, .load_symbol, .load_direct, .load_got, .load_tlv => { switch (src_mcv) { - .memory => |addr| if (math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr| + .memory => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr| return (try self.moveStrategy( ty, dst_reg.class(), @@ -15400,14 +15658,10 @@ fn genSetReg( const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); defer self.register_manager.unlockReg(addr_lock); - try (try self.moveStrategy(ty, dst_reg.class(), false)).read( - self, - registerAlias(dst_reg, abi_size), - .{ - .base = .{ .reg = addr_reg }, - .mod = .{ .rm = .{ .size = self.memSize(ty) } }, - }, - ); + try (try self.moveStrategy(ty, dst_reg.class(), false)).read(self, registerAlias(dst_reg, abi_size), .{ + .base = .{ .reg = addr_reg.to64() }, + .mod = .{ .rm = .{ .size = self.memSize(ty) } }, + }); }, .lea_symbol => |sym_off| switch (self.bin_file.tag) { .elf, .macho => try self.asmRegisterMemory( @@ -15478,12 +15732,12 @@ fn genSetMem( ), .immediate => |imm| switch (abi_size) { 1, 2, 4 => { - const immediate = switch (if (ty.isAbiInt(zcu)) + const immediate: Immediate = switch (if (ty.isAbiInt(zcu)) ty.intInfo(zcu).signedness else .unsigned) { - .signed => Immediate.s(@truncate(@as(i64, @bitCast(imm)))), - .unsigned => Immediate.u(@as(u32, @intCast(imm))), + .signed => .s(@truncate(@as(i64, @bitCast(imm)))), + .unsigned => .u(@as(u32, @intCast(imm))), }; try self.asmMemoryImmediate( .{ ._, .mov }, @@ -15495,14 +15749,14 @@ fn genSetMem( ); }, 3, 5...7 => unreachable, - else => if (math.cast(i32, @as(i64, @bitCast(imm)))) |small| { + else => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |small| { try self.asmMemoryImmediate( .{ ._, .mov }, .{ .base = base, .mod = .{ .rm = .{ .size = Memory.Size.fromSize(abi_size), .disp = disp, } } }, - Immediate.s(small), + .s(small), ); } else { var offset: i32 = 0; @@ -15512,10 +15766,10 @@ fn genSetMem( .size = .dword, .disp = disp + offset, } } }, - if (ty.isSignedInt(zcu)) Immediate.s( - @truncate(@as(i64, @bitCast(imm)) >> (math.cast(u6, offset * 8) orelse 63)), - ) else Immediate.u( - @as(u32, @truncate(if (math.cast(u6, offset * 8)) |shift| imm >> shift else 0)), + if (ty.isSignedInt(zcu)) .s( + @truncate(@as(i64, @bitCast(imm)) >> (std.math.cast(u6, offset * 8) orelse 63)), + ) else .u( + @as(u32, @truncate(if (std.math.cast(u6, offset * 8)) |shift| imm >> shift else 0)), ), ); }, @@ -15542,7 +15796,9 @@ fn genSetMem( .general_purpose, .segment, .x87, .ip => @divExact(src_alias.bitSize(), 8), .mmx, .sse => abi_size, }); - const src_align = Alignment.fromNonzeroByteUnits(math.ceilPowerOfTwoAssert(u32, src_size)); + const src_align = InternPool.Alignment.fromNonzeroByteUnits( + std.math.ceilPowerOfTwoAssert(u32, src_size), + ); if (src_size > mem_size) { const frame_index = try self.allocFrameIndex(FrameAlloc.init(.{ .size = src_size, @@ -15755,7 +16011,7 @@ fn genLazySymbolRef( .base = .{ .reloc = sym_index }, .mod = .{ .rm = .{ .size = .qword } }, }), - .call => try self.asmImmediate(.{ ._, .call }, Immediate.rel(.{ .sym_index = sym_index })), + .call => try self.asmImmediate(.{ ._, .call }, .rel(.{ .sym_index = sym_index })), else => unreachable, } } else if (self.bin_file.cast(.plan9)) |p9_file| { @@ -15861,7 +16117,7 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void { const dst_mcv = if (dst_rc.supersetOf(src_rc) and dst_ty.abiSize(zcu) <= src_ty.abiSize(zcu) and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: { const dst_mcv = try self.allocRegOrMem(inst, true); - try self.genCopy(switch (math.order(dst_ty.abiSize(zcu), src_ty.abiSize(zcu))) { + try self.genCopy(switch (std.math.order(dst_ty.abiSize(zcu), src_ty.abiSize(zcu))) { .lt => dst_ty, .eq => if (!dst_mcv.isMemory() or src_mcv.isMemory()) dst_ty else src_ty, .gt => src_ty, @@ -15878,7 +16134,7 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void { const bit_size = dst_ty.bitSize(zcu); if (abi_size * 8 <= bit_size or dst_ty.isVector(zcu)) break :result dst_mcv; - const dst_limbs_len = math.divCeil(i32, @intCast(bit_size), 64) catch unreachable; + const dst_limbs_len = std.math.divCeil(i32, @intCast(bit_size), 64) catch unreachable; const high_mcv: MCValue = switch (dst_mcv) { .register => |dst_reg| .{ .register = dst_reg }, .register_pair => |dst_regs| .{ .register = dst_regs[1] }, @@ -15940,7 +16196,7 @@ fn airFloatFromInt(self: *Self, inst: Air.Inst.Index) !void { const src_bits: u32 = @intCast(src_ty.bitSize(zcu)); const src_signedness = if (src_ty.isAbiInt(zcu)) src_ty.intInfo(zcu).signedness else .unsigned; - const src_size = math.divCeil(u32, @max(switch (src_signedness) { + const src_size = std.math.divCeil(u32, @max(switch (src_signedness) { .signed => src_bits, .unsigned => src_bits + 1, }, 32), 8) catch unreachable; @@ -16017,7 +16273,7 @@ fn airIntFromFloat(self: *Self, inst: Air.Inst.Index) !void { const dst_bits: u32 = @intCast(dst_ty.bitSize(zcu)); const dst_signedness = if (dst_ty.isAbiInt(zcu)) dst_ty.intInfo(zcu).signedness else .unsigned; - const dst_size = math.divCeil(u32, @max(switch (dst_signedness) { + const dst_size = std.math.divCeil(u32, @max(switch (dst_signedness) { .signed => dst_bits, .unsigned => dst_bits + 1, }, 32), 8) catch unreachable; @@ -16593,13 +16849,17 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void { const reg_locks = self.register_manager.lockRegsAssumeUnused(4, .{ .rax, .rdi, .rsi, .rcx }); defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); - const dst_ptr = try self.resolveInst(bin_op.lhs); - const dst_ptr_ty = self.typeOf(bin_op.lhs); - const dst_ptr_lock: ?RegisterLock = switch (dst_ptr) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, + const dst = try self.resolveInst(bin_op.lhs); + const dst_ty = self.typeOf(bin_op.lhs); + const dst_locks: [2]?RegisterLock = switch (dst) { + .register => |dst_reg| .{ self.register_manager.lockRegAssumeUnused(dst_reg), null }, + .register_pair => |dst_regs| .{ + self.register_manager.lockRegAssumeUnused(dst_regs[0]), + self.register_manager.lockRegAssumeUnused(dst_regs[1]), + }, + else => .{ null, null }, }; - defer if (dst_ptr_lock) |lock| self.register_manager.unlockReg(lock); + for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock); const src_val = try self.resolveInst(bin_op.rhs); const elem_ty = self.typeOf(bin_op.rhs); @@ -16612,16 +16872,20 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void { const elem_abi_size: u31 = @intCast(elem_ty.abiSize(zcu)); if (elem_abi_size == 1) { - const ptr: MCValue = switch (dst_ptr_ty.ptrSize(zcu)) { - // TODO: this only handles slices stored in the stack - .slice => dst_ptr, - .one => dst_ptr, + const dst_ptr: MCValue = switch (dst_ty.ptrSize(zcu)) { + .slice => switch (dst) { + .register_pair => |dst_regs| .{ .register = dst_regs[0] }, + else => dst, + }, + .one => dst, .c, .many => unreachable, }; - const len: MCValue = switch (dst_ptr_ty.ptrSize(zcu)) { - // TODO: this only handles slices stored in the stack - .slice => dst_ptr.address().offset(8).deref(), - .one => .{ .immediate = dst_ptr_ty.childType(zcu).arrayLen(zcu) }, + const len: MCValue = switch (dst_ty.ptrSize(zcu)) { + .slice => switch (dst) { + .register_pair => |dst_regs| .{ .register = dst_regs[1] }, + else => dst.address().offset(8).deref(), + }, + .one => .{ .immediate = dst_ty.childType(zcu).arrayLen(zcu) }, .c, .many => unreachable, }; const len_lock: ?RegisterLock = switch (len) { @@ -16630,20 +16894,25 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void { }; defer if (len_lock) |lock| self.register_manager.unlockReg(lock); - try self.genInlineMemset(ptr, src_val, len, .{ .safety = safety }); + try self.genInlineMemset(dst_ptr, src_val, len, .{ .safety = safety }); break :result; } // Store the first element, and then rely on memcpy copying forwards. // Length zero requires a runtime check - so we handle arrays specially // here to elide it. - switch (dst_ptr_ty.ptrSize(zcu)) { + switch (dst_ty.ptrSize(zcu)) { .slice => { - const slice_ptr_ty = dst_ptr_ty.slicePtrFieldType(zcu); + const slice_ptr_ty = dst_ty.slicePtrFieldType(zcu); - // TODO: this only handles slices stored in the stack - const ptr = dst_ptr; - const len = dst_ptr.address().offset(8).deref(); + const dst_ptr: MCValue = switch (dst) { + .register_pair => |dst_regs| .{ .register = dst_regs[0] }, + else => dst, + }; + const len: MCValue = switch (dst) { + .register_pair => |dst_regs| .{ .register = dst_regs[1] }, + else => dst.address().offset(8).deref(), + }; // Used to store the number of elements for comparison. // After comparison, updated to store number of bytes needed to copy. @@ -16656,38 +16925,7 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void { try self.asmRegisterRegister(.{ ._, .@"test" }, len_reg, len_reg); const skip_reloc = try self.asmJccReloc(.z, undefined); - try self.store(slice_ptr_ty, ptr, src_val, .{ .safety = safety }); - - const second_elem_ptr_reg = - try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg }; - const second_elem_ptr_lock = - self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg); - defer self.register_manager.unlockReg(second_elem_ptr_lock); - - try self.genSetReg(second_elem_ptr_reg, Type.usize, .{ .register_offset = .{ - .reg = try self.copyToTmpRegister(Type.usize, ptr), - .off = elem_abi_size, - } }, .{}); - - try self.genBinOpMir(.{ ._, .sub }, Type.usize, len_mcv, .{ .immediate = 1 }); - try self.asmRegisterRegisterImmediate( - .{ .i_, .mul }, - len_reg, - len_reg, - Immediate.s(elem_abi_size), - ); - try self.genInlineMemcpy(second_elem_ptr_mcv, ptr, len_mcv); - - self.performReloc(skip_reloc); - }, - .one => { - const elem_ptr_ty = try pt.singleMutPtrType(elem_ty); - - const len = dst_ptr_ty.childType(zcu).arrayLen(zcu); - - assert(len != 0); // prevented by Sema - try self.store(elem_ptr_ty, dst_ptr, src_val, .{ .safety = safety }); + try self.store(slice_ptr_ty, dst_ptr, src_val, .{ .safety = safety }); const second_elem_ptr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); @@ -16701,8 +16939,39 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void { .off = elem_abi_size, } }, .{}); + try self.genBinOpMir(.{ ._, .sub }, Type.usize, len_mcv, .{ .immediate = 1 }); + try self.asmRegisterRegisterImmediate( + .{ .i_, .mul }, + len_reg, + len_reg, + .s(elem_abi_size), + ); + try self.genInlineMemcpy(second_elem_ptr_mcv, dst_ptr, len_mcv); + + self.performReloc(skip_reloc); + }, + .one => { + const elem_ptr_ty = try pt.singleMutPtrType(elem_ty); + + const len = dst_ty.childType(zcu).arrayLen(zcu); + + assert(len != 0); // prevented by Sema + try self.store(elem_ptr_ty, dst, src_val, .{ .safety = safety }); + + const second_elem_ptr_reg = + try self.register_manager.allocReg(null, abi.RegisterClass.gp); + const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg }; + const second_elem_ptr_lock = + self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg); + defer self.register_manager.unlockReg(second_elem_ptr_lock); + + try self.genSetReg(second_elem_ptr_reg, Type.usize, .{ .register_offset = .{ + .reg = try self.copyToTmpRegister(Type.usize, dst), + .off = elem_abi_size, + } }, .{}); + const bytes_to_copy: MCValue = .{ .immediate = elem_abi_size * (len - 1) }; - try self.genInlineMemcpy(second_elem_ptr_mcv, dst_ptr, bytes_to_copy); + try self.genInlineMemcpy(second_elem_ptr_mcv, dst, bytes_to_copy); }, .c, .many => unreachable, } @@ -16719,48 +16988,72 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void { const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx }); defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); - const dst_ptr = try self.resolveInst(bin_op.lhs); - const dst_ptr_ty = self.typeOf(bin_op.lhs); - const dst_ptr_lock: ?RegisterLock = switch (dst_ptr) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, + const dst = try self.resolveInst(bin_op.lhs); + const dst_ty = self.typeOf(bin_op.lhs); + const dst_locks: [2]?RegisterLock = switch (dst) { + .register => |dst_reg| .{ self.register_manager.lockRegAssumeUnused(dst_reg), null }, + .register_pair => |dst_regs| .{ + self.register_manager.lockRegAssumeUnused(dst_regs[0]), + self.register_manager.lockReg(dst_regs[1]), + }, + else => .{ null, null }, }; - defer if (dst_ptr_lock) |lock| self.register_manager.unlockReg(lock); + for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock); - const src_ptr = try self.resolveInst(bin_op.rhs); - const src_ptr_lock: ?RegisterLock = switch (src_ptr) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, + const src = try self.resolveInst(bin_op.rhs); + const src_locks: [2]?RegisterLock = switch (src) { + .register => |src_reg| .{ self.register_manager.lockReg(src_reg), null }, + .register_pair => |src_regs| .{ + self.register_manager.lockRegAssumeUnused(src_regs[0]), + self.register_manager.lockRegAssumeUnused(src_regs[1]), + }, + else => .{ null, null }, }; - defer if (src_ptr_lock) |lock| self.register_manager.unlockReg(lock); + for (src_locks) |src_lock| if (src_lock) |lock| self.register_manager.unlockReg(lock); - const len: MCValue = switch (dst_ptr_ty.ptrSize(zcu)) { + const len: MCValue = switch (dst_ty.ptrSize(zcu)) { .slice => len: { const len_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); const len_lock = self.register_manager.lockRegAssumeUnused(len_reg); defer self.register_manager.unlockReg(len_lock); - try self.asmRegisterMemoryImmediate( - .{ .i_, .mul }, - len_reg, - try dst_ptr.address().offset(8).deref().mem(self, .qword), - Immediate.s(@intCast(dst_ptr_ty.childType(zcu).abiSize(zcu))), - ); + switch (dst) { + .register_pair => |dst_regs| try self.asmRegisterRegisterImmediate( + .{ .i_, .mul }, + len_reg, + dst_regs[1], + .s(@intCast(dst_ty.childType(zcu).abiSize(zcu))), + ), + else => try self.asmRegisterMemoryImmediate( + .{ .i_, .mul }, + len_reg, + try dst.address().offset(8).deref().mem(self, .qword), + .s(@intCast(dst_ty.childType(zcu).abiSize(zcu))), + ), + } break :len .{ .register = len_reg }; }, .one => len: { - const array_ty = dst_ptr_ty.childType(zcu); + const array_ty = dst_ty.childType(zcu); break :len .{ .immediate = array_ty.arrayLen(zcu) * array_ty.childType(zcu).abiSize(zcu) }; }, .c, .many => unreachable, }; const len_lock: ?RegisterLock = switch (len) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), + .register => |reg| self.register_manager.lockReg(reg), else => null, }; defer if (len_lock) |lock| self.register_manager.unlockReg(lock); - // TODO: dst_ptr and src_ptr could be slices rather than raw pointers + const dst_ptr: MCValue = switch (dst) { + .register_pair => |dst_regs| .{ .register = dst_regs[0] }, + else => dst, + }; + const src_ptr: MCValue = switch (src) { + .register_pair => |src_regs| .{ .register = src_regs[0] }, + else => src, + }; + try self.genInlineMemcpy(dst_ptr, src_ptr, len); return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none }); @@ -16930,27 +17223,23 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { try self.genSetReg( regs[1], vector_ty, - .{ .immediate = @as(u64, math.maxInt(u64)) >> @intCast(64 - vector_len) }, + .{ .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - vector_len) }, .{}, ); const src_mcv = try self.resolveInst(ty_op.operand); - const abi_size = @max(math.divCeil(u32, vector_len, 8) catch unreachable, 4); + const abi_size = @max(std.math.divCeil(u32, vector_len, 8) catch unreachable, 4); try self.asmCmovccRegisterRegister( switch (src_mcv) { .eflags => |cc| cc, .register => |src_reg| cc: { - try self.asmRegisterImmediate( - .{ ._, .@"test" }, - src_reg.to8(), - Immediate.u(1), - ); + try self.asmRegisterImmediate(.{ ._, .@"test" }, src_reg.to8(), .u(1)); break :cc .nz; }, else => cc: { try self.asmMemoryImmediate( .{ ._, .@"test" }, try src_mcv.mem(self, .byte), - Immediate.u(1), + .u(1), ); break :cc .nz; }, @@ -17037,7 +17326,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { .{ if (self.hasFeature(.avx)) .vp_w else .p_w, .shufl }, dst_alias, dst_alias, - Immediate.u(0b00_00_00_00), + .u(0b00_00_00_00), ); if (switch (scalar_bits) { 1...8 => vector_len > 4, @@ -17049,7 +17338,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { .{ if (self.hasFeature(.avx)) .vp_d else .p_d, .shuf }, dst_alias, dst_alias, - Immediate.u(if (scalar_bits <= 64) 0b00_00_00_00 else 0b01_00_01_00), + .u(if (scalar_bits <= 64) 0b00_00_00_00 else 0b01_00_01_00), ); break :result .{ .register = dst_reg }; }, @@ -17080,7 +17369,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { dst_reg.to128(), src_reg.to128(), src_reg.to128(), - Immediate.u(0), + .u(0), ); } break :result .{ .register = dst_reg }; @@ -17095,7 +17384,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { .{ ._ps, .shuf }, dst_reg.to128(), dst_reg.to128(), - Immediate.u(0), + .u(0), ); break :result dst_mcv; } @@ -17122,14 +17411,14 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { dst_reg.to128(), src_reg.to128(), src_reg.to128(), - Immediate.u(0), + .u(0), ); try self.asmRegisterRegisterRegisterImmediate( .{ .v_f128, .insert }, dst_reg.to256(), dst_reg.to256(), dst_reg.to128(), - Immediate.u(1), + .u(1), ); } } @@ -17198,7 +17487,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { dst_reg.to256(), dst_reg.to256(), dst_reg.to128(), - Immediate.u(1), + .u(1), ); } } @@ -17231,7 +17520,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { dst_reg.to256(), src_reg.to256(), src_reg.to128(), - Immediate.u(1), + .u(1), ); } break :result .{ .register = dst_reg }; @@ -17308,7 +17597,7 @@ fn airSelect(self: *Self, inst: Air.Inst.Index) !void { mask_alias, mask_alias, mask_reg.to128(), - Immediate.u(1), + .u(1), ); break :broadcast; }, @@ -17362,7 +17651,7 @@ fn airSelect(self: *Self, inst: Air.Inst.Index) !void { .{ if (has_avx) .vp_w else .p_w, .shufl }, mask_alias, mask_alias, - Immediate.u(0b00_00_00_00), + .u(0b00_00_00_00), ); if (abi_size <= 8) break :broadcast; } @@ -17370,7 +17659,7 @@ fn airSelect(self: *Self, inst: Air.Inst.Index) !void { .{ if (has_avx) .vp_d else .p_d, .shuf }, mask_alias, mask_alias, - Immediate.u(switch (elem_abi_size) { + .u(switch (elem_abi_size) { 1...2, 5...8 => 0b01_00_01_00, 3...4 => 0b00_00_00_00, else => unreachable, @@ -17649,7 +17938,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { for (mask_elems, 0..) |maybe_mask_elem, elem_index| { const mask_elem = maybe_mask_elem orelse continue; const mask_elem_index = - math.cast(u5, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :unpck; + std.math.cast(u5, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :unpck; const elem_byte = (elem_index >> 1) * elem_abi_size; if (mask_elem_index * elem_abi_size != (elem_byte & 0b0111) | @as(u4, switch (variant) { .unpckl => 0b0000, @@ -17746,10 +18035,10 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { } else sources[(elem_index & 0b010) >> 1] = source; const select_bit: u3 = @intCast((elem_index & 0b011) << 1); - const select = @as(u8, @intCast(mask_elem_index & 0b011)) << select_bit; + const select_mask = @as(u8, @intCast(mask_elem_index & 0b011)) << select_bit; if (elem_index & 0b100 == 0) - control |= select - else if (control & @as(u8, 0b11) << select_bit != select) break :pshufd; + control |= select_mask + else if (control & @as(u8, 0b11) << select_bit != select_mask) break :pshufd; } const operands = [2]Air.Inst.Ref{ extra.a, extra.b }; @@ -17767,7 +18056,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { .{ if (has_avx) .vp_d else .p_d, .shuf }, dst_alias, try src_mcv.mem(self, Memory.Size.fromSize(max_abi_size)), - Immediate.u(control), + .u(control), ) else try self.asmRegisterRegisterImmediate( .{ if (has_avx) .vp_d else .p_d, .shuf }, dst_alias, @@ -17775,7 +18064,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { src_mcv.getReg().? else try self.copyToTmpRegister(operand_tys[sources[0].?], src_mcv), max_abi_size), - Immediate.u(control), + .u(control), ); break :result .{ .register = dst_reg }; } @@ -17797,10 +18086,10 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { } else sources[(elem_index & 0b010) >> 1] = source; const select_bit: u3 = @intCast((elem_index & 0b011) << 1); - const select = @as(u8, @intCast(mask_elem_index & 0b011)) << select_bit; + const select_mask = @as(u8, @intCast(mask_elem_index & 0b011)) << select_bit; if (elem_index & 0b100 == 0) - control |= select - else if (control & @as(u8, 0b11) << select_bit != select) break :shufps; + control |= select_mask + else if (control & @as(u8, 0b11) << select_bit != select_mask) break :shufps; } if (sources[0] orelse break :shufps == sources[1] orelse break :shufps) break :shufps; @@ -17824,7 +18113,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { dst_alias, registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)), - Immediate.u(control), + .u(control), ) else try self.asmRegisterRegisterRegisterImmediate( .{ .v_ps, .shuf }, dst_alias, @@ -17833,12 +18122,12 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { rhs_mcv.getReg().? else try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), - Immediate.u(control), + .u(control), ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate( .{ ._ps, .shuf }, dst_alias, try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)), - Immediate.u(control), + .u(control), ) else try self.asmRegisterRegisterImmediate( .{ ._ps, .shuf }, dst_alias, @@ -17846,7 +18135,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { rhs_mcv.getReg().? else try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), - Immediate.u(control), + .u(control), ); break :result dst_mcv; } @@ -17891,7 +18180,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { dst_alias, registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)), - Immediate.u(control), + .u(control), ) else try self.asmRegisterRegisterRegisterImmediate( .{ .v_pd, .shuf }, dst_alias, @@ -17900,12 +18189,12 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { rhs_mcv.getReg().? else try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), - Immediate.u(control), + .u(control), ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate( .{ ._pd, .shuf }, dst_alias, try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)), - Immediate.u(control), + .u(control), ) else try self.asmRegisterRegisterImmediate( .{ ._pd, .shuf }, dst_alias, @@ -17913,7 +18202,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { rhs_mcv.getReg().? else try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), - Immediate.u(control), + .u(control), ); break :result dst_mcv; } @@ -17927,13 +18216,13 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { for (mask_elems, 0..) |maybe_mask_elem, elem_index| { const mask_elem = maybe_mask_elem orelse continue; const mask_elem_index = - math.cast(u4, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :blend; + std.math.cast(u4, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :blend; if (mask_elem_index != elem_index) break :blend; - const select = @as(u8, @intFromBool(mask_elem < 0)) << @truncate(elem_index); + const select_mask = @as(u8, @intFromBool(mask_elem < 0)) << @truncate(elem_index); if (elem_index & 0b1000 == 0) - control |= select - else if (control & @as(u8, 0b1) << @truncate(elem_index) != select) break :blend; + control |= select_mask + else if (control & @as(u8, 0b1) << @truncate(elem_index) != select_mask) break :blend; } if (!elem_ty.isRuntimeFloat() and self.hasFeature(.avx2)) vpblendd: { @@ -17961,7 +18250,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { registerAlias(dst_reg, dst_abi_size), registerAlias(lhs_reg, dst_abi_size), try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)), - Immediate.u(expanded_control), + .u(expanded_control), ) else try self.asmRegisterRegisterRegisterImmediate( .{ .vp_d, .blend }, registerAlias(dst_reg, dst_abi_size), @@ -17970,7 +18259,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { rhs_mcv.getReg().? else try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), - Immediate.u(expanded_control), + .u(expanded_control), ); break :result .{ .register = dst_reg }; } @@ -18016,7 +18305,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { else dst_reg, dst_abi_size), try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)), - Immediate.u(expanded_control), + .u(expanded_control), ) else try self.asmRegisterRegisterRegisterImmediate( .{ .vp_w, .blend }, registerAlias(dst_reg, dst_abi_size), @@ -18028,12 +18317,12 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { rhs_mcv.getReg().? else try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), - Immediate.u(expanded_control), + .u(expanded_control), ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate( .{ .p_w, .blend }, registerAlias(dst_reg, dst_abi_size), try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)), - Immediate.u(expanded_control), + .u(expanded_control), ) else try self.asmRegisterRegisterImmediate( .{ .p_w, .blend }, registerAlias(dst_reg, dst_abi_size), @@ -18041,7 +18330,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { rhs_mcv.getReg().? else try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), - Immediate.u(expanded_control), + .u(expanded_control), ); break :result .{ .register = dst_reg }; } @@ -18077,7 +18366,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { else dst_reg, dst_abi_size), try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)), - Immediate.u(expanded_control), + .u(expanded_control), ) else try self.asmRegisterRegisterRegisterImmediate( switch (elem_abi_size) { 4 => .{ .v_ps, .blend }, @@ -18093,7 +18382,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { rhs_mcv.getReg().? else try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), - Immediate.u(expanded_control), + .u(expanded_control), ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate( switch (elem_abi_size) { 4 => .{ ._ps, .blend }, @@ -18102,7 +18391,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { }, registerAlias(dst_reg, dst_abi_size), try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)), - Immediate.u(expanded_control), + .u(expanded_control), ) else try self.asmRegisterRegisterImmediate( switch (elem_abi_size) { 4 => .{ ._ps, .blend }, @@ -18114,7 +18403,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { rhs_mcv.getReg().? else try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), - Immediate.u(expanded_control), + .u(expanded_control), ); break :result .{ .register = dst_reg }; } @@ -18138,7 +18427,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { ) |*select_mask_elem, maybe_mask_elem, elem_index| { const mask_elem = maybe_mask_elem orelse continue; const mask_elem_index = - math.cast(u5, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :blendv; + std.math.cast(u5, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :blendv; if (mask_elem_index != elem_index) break :blendv; select_mask_elem.* = (if (mask_elem < 0) @@ -18380,7 +18669,9 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { break :result null; }) orelse return self.fail("TODO implement airShuffle from {} and {} to {} with {}", .{ - lhs_ty.fmt(pt), rhs_ty.fmt(pt), dst_ty.fmt(pt), + lhs_ty.fmt(pt), + rhs_ty.fmt(pt), + dst_ty.fmt(pt), Value.fromInterned(extra.mask).fmtValue(pt), }); return self.finishAir(inst, result, .{ extra.a, extra.b, .none }); @@ -18397,7 +18688,7 @@ fn airReduce(self: *Self, inst: Air.Inst.Index) !void { try self.spillEflagsIfOccupied(); const operand_mcv = try self.resolveInst(reduce.operand); - const mask_len = (math.cast(u6, operand_ty.vectorLen(zcu)) orelse + const mask_len = (std.math.cast(u6, operand_ty.vectorLen(zcu)) orelse return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(pt)})); const mask = (@as(u64, 1) << mask_len) - 1; const abi_size: u32 = @intCast(operand_ty.abiSize(zcu)); @@ -18406,7 +18697,7 @@ fn airReduce(self: *Self, inst: Air.Inst.Index) !void { if (operand_mcv.isMemory()) try self.asmMemoryImmediate( .{ ._, .@"test" }, try operand_mcv.mem(self, Memory.Size.fromSize(abi_size)), - Immediate.u(mask), + .u(mask), ) else { const operand_reg = registerAlias(if (operand_mcv.isRegister()) operand_mcv.getReg().? @@ -18415,7 +18706,7 @@ fn airReduce(self: *Self, inst: Air.Inst.Index) !void { if (mask_len < abi_size * 8) try self.asmRegisterImmediate( .{ ._, .@"test" }, operand_reg, - Immediate.u(mask), + .u(mask), ) else try self.asmRegisterRegister( .{ ._, .@"test" }, operand_reg, @@ -18431,7 +18722,7 @@ fn airReduce(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegister(.{ ._, .not }, tmp_reg); if (mask_len < abi_size * 8) - try self.asmRegisterImmediate(.{ ._, .@"test" }, tmp_reg, Immediate.u(mask)) + try self.asmRegisterImmediate(.{ ._, .@"test" }, tmp_reg, .u(mask)) else try self.asmRegisterRegister(.{ ._, .@"test" }, tmp_reg, tmp_reg); break :result .{ .eflags = .z }; @@ -18579,12 +18870,12 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegisterImmediate( .{ ._, .@"and" }, registerAlias(elem_reg, @min(result_size, 4)), - Immediate.u(1), + .u(1), ); if (elem_i > 0) try self.asmRegisterImmediate( .{ ._l, .sh }, registerAlias(elem_reg, result_size), - Immediate.u(@intCast(elem_i)), + .u(@intCast(elem_i)), ); try self.asmRegisterRegister( .{ ._, .@"or" }, @@ -18748,8 +19039,8 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { lock.* = self.register_manager.lockRegAssumeUnused(reg); } - const mir_tag = @as(?Mir.Inst.FixedTag, if (mem.eql(u2, &order, &.{ 1, 3, 2 }) or - mem.eql(u2, &order, &.{ 3, 1, 2 })) + const mir_tag = @as(?Mir.Inst.FixedTag, if (std.mem.eql(u2, &order, &.{ 1, 3, 2 }) or + std.mem.eql(u2, &order, &.{ 3, 1, 2 })) switch (ty.zigTypeTag(zcu)) { .float => switch (ty.floatBits(self.target.*)) { 32 => .{ .v_ss, .fmadd132 }, @@ -18776,7 +19067,7 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { }, else => unreachable, } - else if (mem.eql(u2, &order, &.{ 2, 1, 3 }) or mem.eql(u2, &order, &.{ 1, 2, 3 })) + else if (std.mem.eql(u2, &order, &.{ 2, 1, 3 }) or std.mem.eql(u2, &order, &.{ 1, 2, 3 })) switch (ty.zigTypeTag(zcu)) { .float => switch (ty.floatBits(self.target.*)) { 32 => .{ .v_ss, .fmadd213 }, @@ -18803,7 +19094,7 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { }, else => unreachable, } - else if (mem.eql(u2, &order, &.{ 2, 3, 1 }) or mem.eql(u2, &order, &.{ 3, 2, 1 })) + else if (std.mem.eql(u2, &order, &.{ 2, 3, 1 }) or std.mem.eql(u2, &order, &.{ 3, 2, 1 })) switch (ty.zigTypeTag(zcu)) { .float => switch (ty.floatBits(self.target.*)) { 32 => .{ .v_ss, .fmadd231 }, @@ -18953,13 +19244,13 @@ fn airVaArg(self: *Self, inst: Air.Inst.Index) !void { const overflow_arg_area: MCValue = .{ .indirect = .{ .reg = ptr_arg_list_reg, .off = 8 } }; const reg_save_area: MCValue = .{ .indirect = .{ .reg = ptr_arg_list_reg, .off = 16 } }; - const classes = mem.sliceTo(&abi.classifySystemV(promote_ty, zcu, self.target.*, .arg), .none); + const classes = std.mem.sliceTo(&abi.classifySystemV(promote_ty, zcu, self.target.*, .arg), .none); switch (classes[0]) { .integer => { assert(classes.len == 1); try self.genSetReg(offset_reg, Type.c_uint, gp_offset, .{}); - try self.asmRegisterImmediate(.{ ._, .cmp }, offset_reg, Immediate.u( + try self.asmRegisterImmediate(.{ ._, .cmp }, offset_reg, .u( abi.SysV.c_abi_int_param_regs.len * 8, )); const mem_reloc = try self.asmJccReloc(.ae, undefined); @@ -19007,7 +19298,7 @@ fn airVaArg(self: *Self, inst: Air.Inst.Index) !void { assert(classes.len == 1); try self.genSetReg(offset_reg, Type.c_uint, fp_offset, .{}); - try self.asmRegisterImmediate(.{ ._, .cmp }, offset_reg, Immediate.u( + try self.asmRegisterImmediate(.{ ._, .cmp }, offset_reg, .u( abi.SysV.c_abi_int_param_regs.len * 8 + abi.SysV.c_abi_sse_param_regs.len * 16, )); const mem_reloc = try self.asmJccReloc(.ae, undefined); @@ -19055,9 +19346,7 @@ fn airVaArg(self: *Self, inst: Air.Inst.Index) !void { assert(classes.len == 1); unreachable; }, - else => return self.fail("TODO implement c_va_arg for {} on SysV", .{ - promote_ty.fmt(pt), - }), + else => return self.fail("TODO implement c_va_arg for {} on SysV", .{promote_ty.fmt(pt)}), } if (unused) break :result .unreach; @@ -19194,7 +19483,7 @@ fn limitImmediateType(self: *Self, operand: Air.Inst.Ref, comptime T: type) !MCV .immediate => |imm| { // This immediate is unsigned. const U = std.meta.Int(.unsigned, ti.bits - @intFromBool(ti.signedness == .signed)); - if (imm >= math.maxInt(U)) { + if (imm >= std.math.maxInt(U)) { return MCValue{ .register = try self.copyToTmpRegister(Type.usize, mcv) }; } }, @@ -19226,7 +19515,7 @@ const CallMCValues = struct { args: []MCValue, return_value: InstTracking, stack_byte_count: u31, - stack_align: Alignment, + stack_align: InternPool.Alignment, gp_count: u32, fp_count: u32, @@ -19303,7 +19592,7 @@ fn resolveCallingConventionValues( var ret_tracking_i: usize = 0; const classes = switch (resolved_cc) { - .x86_64_sysv => mem.sliceTo(&abi.classifySystemV(ret_ty, zcu, self.target.*, .ret), .none), + .x86_64_sysv => std.mem.sliceTo(&abi.classifySystemV(ret_ty, zcu, self.target.*, .ret), .none), .x86_64_win => &.{abi.classifyWindows(ret_ty, zcu)}, else => unreachable, }; @@ -19378,7 +19667,7 @@ fn resolveCallingConventionValues( var arg_mcv_i: usize = 0; const classes = switch (resolved_cc) { - .x86_64_sysv => mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target.*, .arg), .none), + .x86_64_sysv => std.mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target.*, .arg), .none), .x86_64_win => &.{abi.classifyWindows(ty, zcu)}, else => unreachable, }; @@ -19435,7 +19724,7 @@ fn resolveCallingConventionValues( const frame_elem_align = 8; const frame_elems_len = ty.vectorLen(zcu) - remaining_param_int_regs; - const frame_elem_size = mem.alignForward( + const frame_elem_size = std.mem.alignForward( u64, ty.childType(zcu).abiSize(zcu), frame_elem_align, @@ -19443,7 +19732,7 @@ fn resolveCallingConventionValues( const frame_size: u31 = @intCast(frame_elems_len * frame_elem_size); result.stack_byte_count = - mem.alignForward(u31, result.stack_byte_count, frame_elem_align); + std.mem.alignForward(u31, result.stack_byte_count, frame_elem_align); arg_mcv[arg_mcv_i] = .{ .elementwise_regs_then_frame = .{ .regs = remaining_param_int_regs, .frame_off = @intCast(result.stack_byte_count), @@ -19461,19 +19750,14 @@ fn resolveCallingConventionValues( continue; } - const param_size: u31 = @intCast(ty.abiSize(zcu)); const param_align = ty.abiAlignment(zcu).max(.@"8"); - result.stack_byte_count = mem.alignForward( - u31, - result.stack_byte_count, - @intCast(param_align.toByteUnits().?), - ); + result.stack_byte_count = @intCast(param_align.forward(result.stack_byte_count)); result.stack_align = result.stack_align.max(param_align); arg.* = .{ .load_frame = .{ .index = stack_frame_base, .off = result.stack_byte_count, } }; - result.stack_byte_count += param_size; + result.stack_byte_count += @intCast(ty.abiSize(zcu)); } assert(param_int_reg_i <= 6); result.gp_count = param_int_reg_i; @@ -19509,19 +19793,14 @@ fn resolveCallingConventionValues( arg.* = .none; continue; } - const param_size: u31 = @intCast(ty.abiSize(zcu)); const param_align = ty.abiAlignment(zcu); - result.stack_byte_count = mem.alignForward( - u31, - result.stack_byte_count, - @intCast(param_align.toByteUnits().?), - ); + result.stack_byte_count = @intCast(param_align.forward(result.stack_byte_count)); result.stack_align = result.stack_align.max(param_align); arg.* = .{ .load_frame = .{ .index = stack_frame_base, .off = result.stack_byte_count, } }; - result.stack_byte_count += param_size; + result.stack_byte_count += @intCast(ty.abiSize(zcu)); } }, else => return self.fail("TODO implement function parameters and return values for {} on x86_64", .{cc}), @@ -19541,7 +19820,7 @@ fn fail(self: *Self, comptime format: []const u8, args: anytype) error{ OutOfMem return error.CodegenFail; } -fn failMsg(self: *Self, msg: *ErrorMsg) error{ OutOfMemory, CodegenFail } { +fn failMsg(self: *Self, msg: *Zcu.ErrorMsg) error{ OutOfMemory, CodegenFail } { @branchHint(.cold); const zcu = self.pt.zcu; switch (self.owner) { @@ -19603,8 +19882,7 @@ fn registerAlias(reg: Register, size_bytes: u32) Register { } fn memSize(self: *Self, ty: Type) Memory.Size { - const pt = self.pt; - const zcu = pt.zcu; + const zcu = self.pt.zcu; return switch (ty.zigTypeTag(zcu)) { .float => Memory.Size.fromBitSize(ty.floatBits(self.target.*)), else => Memory.Size.fromSize(@intCast(ty.abiSize(zcu))), @@ -19614,7 +19892,7 @@ fn memSize(self: *Self, ty: Type) Memory.Size { fn splitType(self: *Self, ty: Type) ![2]Type { const pt = self.pt; const zcu = pt.zcu; - const classes = mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target.*, .other), .none); + const classes = std.mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target.*, .other), .none); var parts: [2]Type = undefined; if (classes.len == 2) for (&parts, classes, 0..) |*part, class, part_i| { part.* = switch (class) { @@ -19648,7 +19926,7 @@ fn truncateRegister(self: *Self, ty: Type, reg: Register) !void { .signedness = .unsigned, .bits = @intCast(ty.bitSize(zcu)), }; - const shift = math.cast(u6, 64 - int_info.bits % 64) orelse return; + const shift = std.math.cast(u6, 64 - int_info.bits % 64) orelse return; try self.spillEflagsIfOccupied(); switch (int_info.signedness) { .signed => { @@ -19690,8 +19968,7 @@ fn truncateRegister(self: *Self, ty: Type, reg: Register) !void { } fn regBitSize(self: *Self, ty: Type) u64 { - const pt = self.pt; - const zcu = pt.zcu; + const zcu = self.pt.zcu; const abi_size = ty.abiSize(zcu); return switch (ty.zigTypeTag(zcu)) { else => switch (abi_size) { @@ -19713,14 +19990,14 @@ fn regExtraBits(self: *Self, ty: Type) u64 { return self.regBitSize(ty) - ty.bitSize(self.pt.zcu); } -fn hasFeature(self: *Self, feature: Target.x86.Feature) bool { - return Target.x86.featureSetHas(self.target.cpu.features, feature); +fn hasFeature(self: *Self, feature: std.Target.x86.Feature) bool { + return std.Target.x86.featureSetHas(self.target.cpu.features, feature); } fn hasAnyFeatures(self: *Self, features: anytype) bool { - return Target.x86.featureSetHasAny(self.target.cpu.features, features); + return std.Target.x86.featureSetHasAny(self.target.cpu.features, features); } fn hasAllFeatures(self: *Self, features: anytype) bool { - return Target.x86.featureSetHasAll(self.target.cpu.features, features); + return std.Target.x86.featureSetHasAll(self.target.cpu.features, features); } fn typeOf(self: *Self, inst: Air.Inst.Ref) Type { @@ -19732,9 +20009,13 @@ fn typeOf(self: *Self, inst: Air.Inst.Ref) Type { fn typeOfIndex(self: *Self, inst: Air.Inst.Index) Type { const pt = self.pt; const zcu = pt.zcu; - return switch (self.air.instructions.items(.tag)[@intFromEnum(inst)]) { - .loop_switch_br => self.typeOf(self.air.unwrapSwitch(inst).operand), - else => self.air.typeOfIndex(inst, &zcu.intern_pool), + const temp: Temp = .{ .index = inst }; + return switch (temp.unwrap(self)) { + .ref => switch (self.air.instructions.items(.tag)[@intFromEnum(inst)]) { + .loop_switch_br => self.typeOf(self.air.unwrapSwitch(inst).operand), + else => self.air.typeOfIndex(inst, &zcu.intern_pool), + }, + .temp => temp.typeOf(self), }; } @@ -19815,3 +20096,613 @@ fn promoteVarArg(self: *Self, ty: Type) Type { }, } } + +// ====================================== rewrite starts here ====================================== + +const Temp = struct { + index: Air.Inst.Index, + + fn unwrap(temp: Temp, self: *Self) union(enum) { + ref: Air.Inst.Ref, + temp: Index, + } { + switch (temp.index.unwrap()) { + .ref => |ref| return .{ .ref = ref }, + .target => |target_index| { + const temp_index: Index = @enumFromInt(target_index); + assert(temp_index.isValid(self)); + return .{ .temp = temp_index }; + }, + } + } + + fn typeOf(temp: Temp, self: *Self) Type { + return switch (temp.unwrap(self)) { + .ref => |ref| self.typeOf(ref), + .temp => |temp_index| temp_index.typeOf(self), + }; + } + + fn isMut(temp: Temp, self: *Self) bool { + return temp.unwrap(self) == .temp; + } + + fn tracking(temp: Temp, self: *Self) InstTracking { + return self.inst_tracking.get(temp.index).?; + } + + fn getOffset(temp: Temp, off: i32, self: *Self) !Temp { + const new_temp_index = self.next_temp_index; + self.temp_type[@intFromEnum(new_temp_index)] = Type.usize; + self.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); + switch (temp.tracking(self).short) { + else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }), + .register => |reg| { + const new_reg = + try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); + new_temp_index.tracking(self).* = InstTracking.init(.{ .register = new_reg }); + try self.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{ + .base = .{ .reg = reg.to64() }, + .mod = .{ .rm = .{ + .size = .qword, + .disp = off, + } }, + }); + }, + .register_offset => |reg_off| { + const new_reg = + try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); + new_temp_index.tracking(self).* = InstTracking.init(.{ .register = new_reg }); + try self.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{ + .base = .{ .reg = reg_off.reg.to64() }, + .mod = .{ .rm = .{ + .size = .qword, + .disp = reg_off.off + off, + } }, + }); + }, + .lea_symbol => |sym_off| new_temp_index.tracking(self).* = InstTracking.init(.{ .lea_symbol = .{ + .sym_index = sym_off.sym_index, + .off = sym_off.off + off, + } }), + .load_frame => |frame_addr| { + const new_reg = + try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); + new_temp_index.tracking(self).* = InstTracking.init(.{ .register_offset = .{ + .reg = new_reg, + .off = off, + } }); + try self.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{ + .base = .{ .frame = frame_addr.index }, + .mod = .{ .rm = .{ + .size = .qword, + .disp = frame_addr.off, + } }, + }); + }, + .lea_frame => |frame_addr| new_temp_index.tracking(self).* = InstTracking.init(.{ .lea_frame = .{ + .index = frame_addr.index, + .off = frame_addr.off + off, + } }), + } + return .{ .index = new_temp_index.toIndex() }; + } + + fn toOffset(temp: *Temp, off: i32, self: *Self) !void { + if (off == 0) return; + switch (temp.unwrap(self)) { + .ref => {}, + .temp => |temp_index| { + const temp_tracking = temp_index.tracking(self); + switch (temp_tracking.short) { + else => {}, + .register => |reg| { + try self.freeValue(temp_tracking.long); + temp_tracking.* = InstTracking.init(.{ .register_offset = .{ + .reg = reg, + .off = off, + } }); + return; + }, + .register_offset => |reg_off| { + try self.freeValue(temp_tracking.long); + temp_tracking.* = InstTracking.init(.{ .register_offset = .{ + .reg = reg_off.reg, + .off = reg_off.off + off, + } }); + return; + }, + .lea_symbol => |sym_off| { + assert(std.meta.eql(temp_tracking.long.lea_symbol, sym_off)); + temp_tracking.* = InstTracking.init(.{ .lea_symbol = .{ + .sym_index = sym_off.sym_index, + .off = sym_off.off + off, + } }); + return; + }, + .lea_frame => |frame_addr| { + assert(std.meta.eql(temp_tracking.long.lea_frame, frame_addr)); + temp_tracking.* = InstTracking.init(.{ .lea_frame = .{ + .index = frame_addr.index, + .off = frame_addr.off + off, + } }); + return; + }, + } + }, + } + const new_temp = try temp.getOffset(off, self); + try temp.die(self); + temp.* = new_temp; + } + + fn getLimb(temp: Temp, limb_index: u28, self: *Self) !Temp { + const new_temp_index = self.next_temp_index; + self.temp_type[@intFromEnum(new_temp_index)] = Type.usize; + switch (temp.tracking(self).short) { + else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }), + .immediate => |imm| { + assert(limb_index == 0); + new_temp_index.tracking(self).* = InstTracking.init(.{ .immediate = imm }); + }, + .register => |reg| { + assert(limb_index == 0); + const new_reg = + try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); + new_temp_index.tracking(self).* = InstTracking.init(.{ .register = new_reg }); + try self.asmRegisterRegister(.{ ._, .mov }, new_reg.to64(), reg.to64()); + }, + .register_pair => |regs| { + const new_reg = + try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); + new_temp_index.tracking(self).* = InstTracking.init(.{ .register = new_reg }); + try self.asmRegisterRegister(.{ ._, .mov }, new_reg.to64(), regs[limb_index].to64()); + }, + .register_offset => |reg_off| { + assert(limb_index == 0); + const new_reg = + try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); + new_temp_index.tracking(self).* = InstTracking.init(.{ .register = new_reg }); + try self.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{ + .base = .{ .reg = reg_off.reg.to64() }, + .mod = .{ .rm = .{ + .size = .qword, + .disp = reg_off.off + @as(u31, limb_index) * 8, + } }, + }); + }, + .load_symbol => |sym_off| { + const new_reg = + try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); + new_temp_index.tracking(self).* = InstTracking.init(.{ .register = new_reg }); + try self.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{ + .base = .{ .reloc = sym_off.sym_index }, + .mod = .{ .rm = .{ + .size = .qword, + .disp = sym_off.off + @as(u31, limb_index) * 8, + } }, + }); + }, + .lea_symbol => |sym_off| { + assert(limb_index == 0); + new_temp_index.tracking(self).* = InstTracking.init(.{ .lea_symbol = sym_off }); + }, + .load_frame => |frame_addr| { + const new_reg = + try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); + new_temp_index.tracking(self).* = InstTracking.init(.{ .register = new_reg }); + try self.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{ + .base = .{ .frame = frame_addr.index }, + .mod = .{ .rm = .{ + .size = .qword, + .disp = frame_addr.off + @as(u31, limb_index) * 8, + } }, + }); + }, + .lea_frame => |frame_addr| { + assert(limb_index == 0); + new_temp_index.tracking(self).* = InstTracking.init(.{ .lea_frame = frame_addr }); + }, + } + self.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); + return .{ .index = new_temp_index.toIndex() }; + } + + fn toLimb(temp: *Temp, limb_index: u28, self: *Self) !void { + switch (temp.unwrap(self)) { + .ref => {}, + .temp => |temp_index| { + const temp_tracking = temp_index.tracking(self); + switch (temp_tracking.short) { + else => {}, + .register, .lea_symbol, .lea_frame => { + assert(limb_index == 0); + self.temp_type[@intFromEnum(temp_index)] = Type.usize; + return; + }, + .register_pair => |regs| { + switch (temp_tracking.long) { + .none, .reserved_frame => {}, + else => temp_tracking.long = + temp_tracking.long.address().offset(@as(u31, limb_index) * 8).deref(), + } + for (regs, 0..) |reg, reg_index| if (reg_index != limb_index) + self.register_manager.freeReg(reg); + temp_tracking.* = InstTracking.init(.{ .register = regs[limb_index] }); + self.temp_type[@intFromEnum(temp_index)] = Type.usize; + return; + }, + .load_symbol => |sym_off| { + assert(std.meta.eql(temp_tracking.long.load_symbol, sym_off)); + temp_tracking.* = InstTracking.init(.{ .load_symbol = .{ + .sym_index = sym_off.sym_index, + .off = sym_off.off + @as(u31, limb_index) * 8, + } }); + self.temp_type[@intFromEnum(temp_index)] = Type.usize; + return; + }, + .load_frame => |frame_addr| if (!frame_addr.index.isNamed()) { + assert(std.meta.eql(temp_tracking.long.load_frame, frame_addr)); + temp_tracking.* = InstTracking.init(.{ .load_frame = .{ + .index = frame_addr.index, + .off = frame_addr.off + @as(u31, limb_index) * 8, + } }); + self.temp_type[@intFromEnum(temp_index)] = Type.usize; + return; + }, + } + }, + } + const new_temp = try temp.getLimb(limb_index, self); + try temp.die(self); + temp.* = new_temp; + } + + fn toReg(temp: *Temp, new_reg: Register, self: *Self) !bool { + const val, const ty = switch (temp.unwrap(self)) { + .ref => |ref| .{ temp.tracking(self).short, self.typeOf(ref) }, + .temp => |temp_index| val: { + const temp_tracking = temp_index.tracking(self); + if (temp_tracking.short == .register and + temp_tracking.short.register == new_reg) return false; + break :val .{ temp_tracking.short, temp_index.typeOf(self) }; + }, + }; + const new_temp_index = self.next_temp_index; + self.temp_type[@intFromEnum(new_temp_index)] = ty; + try self.genSetReg(new_reg, ty, val, .{}); + new_temp_index.tracking(self).* = InstTracking.init(.{ .register = new_reg }); + try temp.die(self); + self.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); + temp.* = .{ .index = new_temp_index.toIndex() }; + return true; + } + + fn toAnyReg(temp: *Temp, self: *Self) !bool { + const val, const ty = switch (temp.unwrap(self)) { + .ref => |ref| .{ temp.tracking(self).short, self.typeOf(ref) }, + .temp => |temp_index| val: { + const temp_tracking = temp_index.tracking(self); + if (temp_tracking.short == .register) return false; + break :val .{ temp_tracking.short, temp_index.typeOf(self) }; + }, + }; + const new_temp_index = self.next_temp_index; + self.temp_type[@intFromEnum(new_temp_index)] = ty; + const new_reg = + try self.register_manager.allocReg(new_temp_index.toIndex(), self.regClassForType(ty)); + try self.genSetReg(new_reg, ty, val, .{}); + new_temp_index.tracking(self).* = InstTracking.init(.{ .register = new_reg }); + try temp.die(self); + self.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); + temp.* = .{ .index = new_temp_index.toIndex() }; + return true; + } + + fn toPair(first_temp: *Temp, second_temp: *Temp, self: *Self) !void { + while (true) for ([_]*Temp{ first_temp, second_temp }) |part_temp| { + if (try part_temp.toAnyReg(self)) break; + } else break; + const first_temp_tracking = first_temp.unwrap(self).temp.tracking(self); + const second_temp_tracking = second_temp.unwrap(self).temp.tracking(self); + const result: MCValue = .{ .register_pair = .{ + first_temp_tracking.short.register, + second_temp_tracking.short.register, + } }; + const result_temp_index = self.next_temp_index; + const result_temp: Temp = .{ .index = result_temp_index.toIndex() }; + assert(self.reuseTemp(result_temp.index, first_temp.index, first_temp_tracking)); + assert(self.reuseTemp(result_temp.index, second_temp.index, second_temp_tracking)); + self.temp_type[@intFromEnum(result_temp_index)] = Type.slice_const_u8; + result_temp_index.tracking(self).* = InstTracking.init(result); + first_temp.* = result_temp; + } + + fn toLea(temp: *Temp, self: *Self) !bool { + switch (temp.tracking(self).short) { + .none, + .unreach, + .dead, + .undef, + .eflags, + .register_pair, + .register_overflow, + .elementwise_regs_then_frame, + .reserved_frame, + .air_ref, + => unreachable, // not a valid pointer + .immediate, + .register, + .register_offset, + .lea_direct, + .lea_got, + .lea_tlv, + .lea_frame, + => return false, + .memory, + .indirect, + .load_symbol, + .load_direct, + .load_got, + .load_tlv, + .load_frame, + => return temp.toAnyReg(self), + .lea_symbol => |sym_off| { + const off = sym_off.off; + if (off == 0) return false; + try temp.toOffset(-off, self); + while (try temp.toAnyReg(self)) {} + try temp.toOffset(off, self); + return true; + }, + } + } + + fn load(ptr: *Temp, val_ty: Type, self: *Self) !Temp { + const val_abi_size: u32 = @intCast(val_ty.abiSize(self.pt.zcu)); + const val = try self.tempAlloc(val_ty); + switch (val.tracking(self).short) { + else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }), + .register => |val_reg| { + while (try ptr.toLea(self)) {} + switch (val_reg.class()) { + .general_purpose => try self.asmRegisterMemory( + .{ ._, .mov }, + registerAlias(val_reg, val_abi_size), + try ptr.tracking(self).short.deref().mem(self, self.memSize(val_ty)), + ), + else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }), + } + }, + .load_frame => |val_frame_addr| { + var val_ptr = try self.tempFromValue(Type.usize, .{ .lea_frame = val_frame_addr }); + var len = try self.tempFromValue(Type.usize, .{ .immediate = val_abi_size }); + try val_ptr.memcpy(ptr, &len, self); + try val_ptr.die(self); + try len.die(self); + }, + } + return val; + } + + fn store(ptr: *Temp, val: *Temp, self: *Self) !void { + const val_ty = val.typeOf(self); + const val_abi_size: u32 = @intCast(val_ty.abiSize(self.pt.zcu)); + val: switch (val.tracking(self).short) { + else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }), + .immediate => |imm| if (std.math.cast(i32, imm)) |s| { + while (try ptr.toLea(self)) {} + try self.asmMemoryImmediate( + .{ ._, .mov }, + try ptr.tracking(self).short.deref().mem(self, self.memSize(val_ty)), + .s(s), + ); + } else continue :val .{ .register = undefined }, + .register => { + while (try ptr.toLea(self) or try val.toAnyReg(self)) {} + const val_reg = val.tracking(self).short.register; + switch (val_reg.class()) { + .general_purpose => try self.asmMemoryRegister( + .{ ._, .mov }, + try ptr.tracking(self).short.deref().mem(self, self.memSize(val_ty)), + registerAlias(val_reg, val_abi_size), + ), + else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }), + } + }, + } + } + + fn memcpy(dst: *Temp, src: *Temp, len: *Temp, self: *Self) !void { + while (true) for ([_]*Temp{ dst, src, len }, [_]Register{ .rdi, .rsi, .rcx }) |temp, reg| { + if (try temp.toReg(reg, self)) break; + } else break; + try self.asmOpOnly(.{ .@"rep _sb", .mov }); + } + + fn moveTo(temp: Temp, inst: Air.Inst.Index, self: *Self) !void { + if (self.liveness.isUnused(inst)) try temp.die(self) else switch (temp.unwrap(self)) { + .ref => { + const result = try self.allocRegOrMem(inst, true); + try self.genCopy(self.typeOfIndex(inst), result, temp.tracking(self).short, .{}); + tracking_log.debug("{} => {} (birth)", .{ inst, result }); + self.inst_tracking.putAssumeCapacityNoClobber(inst, InstTracking.init(result)); + }, + .temp => |temp_index| { + const temp_tracking = temp_index.tracking(self); + tracking_log.debug("{} => {} (birth)", .{ inst, temp_tracking.short }); + self.inst_tracking.putAssumeCapacityNoClobber(inst, temp_tracking.*); + assert(self.reuseTemp(inst, temp_index.toIndex(), temp_tracking)); + }, + } + } + + fn die(temp: Temp, self: *Self) !void { + switch (temp.unwrap(self)) { + .ref => {}, + .temp => |temp_index| try temp_index.tracking(self).die(self, temp_index.toIndex()), + } + } + + const Index = enum(u4) { + _, + + fn toIndex(index: Index) Air.Inst.Index { + return Air.Inst.Index.fromTargetIndex(@intFromEnum(index)); + } + + fn fromIndex(index: Air.Inst.Index) Index { + return @enumFromInt(index.toTargetIndex()); + } + + fn tracking(index: Index, self: *Self) *InstTracking { + return &self.inst_tracking.values()[@intFromEnum(index)]; + } + + fn isValid(index: Index, self: *Self) bool { + return index.tracking(self).short != .dead; + } + + fn typeOf(index: Index, self: *Self) Type { + assert(index.isValid(self)); + return self.temp_type[@intFromEnum(index)]; + } + + const max = std.math.maxInt(@typeInfo(Index).@"enum".tag_type); + const Set = std.StaticBitSet(max); + const SafetySet = if (std.debug.runtime_safety) Set else struct { + inline fn initEmpty() @This() { + return .{}; + } + + inline fn isSet(_: @This(), index: usize) bool { + assert(index < max); + return true; + } + + inline fn set(_: @This(), index: usize) void { + assert(index < max); + } + + inline fn eql(_: @This(), _: @This()) bool { + return true; + } + }; + }; +}; + +fn resetTemps(self: *Self) void { + for (0..@intFromEnum(self.next_temp_index)) |temp_index| { + const temp: Temp.Index = @enumFromInt(temp_index); + assert(!temp.isValid(self)); + self.temp_type[temp_index] = undefined; + } + self.next_temp_index = @enumFromInt(0); +} + +fn reuseTemp( + self: *Self, + new_inst: Air.Inst.Index, + old_inst: Air.Inst.Index, + tracking: *InstTracking, +) bool { + switch (tracking.short) { + .register, + .register_pair, + .register_offset, + .register_overflow, + => for (tracking.short.getRegs()) |tracked_reg| { + if (RegisterManager.indexOfRegIntoTracked(tracked_reg)) |tracked_index| { + self.register_manager.registers[tracked_index] = new_inst; + } + }, + .load_frame => |frame_addr| if (frame_addr.index.isNamed()) return false, + else => {}, + } + switch (tracking.short) { + .eflags, .register_overflow => self.eflags_inst = new_inst, + else => {}, + } + tracking.reuse(self, new_inst, old_inst); + return true; +} + +fn tempAlloc(self: *Self, ty: Type) !Temp { + const temp_index = self.next_temp_index; + temp_index.tracking(self).* = InstTracking.init(try self.allocRegOrMemAdvanced(ty, temp_index.toIndex(), true)); + self.temp_type[@intFromEnum(temp_index)] = ty; + self.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); + return .{ .index = temp_index.toIndex() }; +} + +fn tempFromValue(self: *Self, ty: Type, value: MCValue) !Temp { + const temp_index = self.next_temp_index; + temp_index.tracking(self).* = InstTracking.init(value); + self.temp_type[@intFromEnum(temp_index)] = ty; + try self.getValue(value, temp_index.toIndex()); + self.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); + return .{ .index = temp_index.toIndex() }; +} + +fn tempFromOperand( + self: *Self, + inst: Air.Inst.Index, + op_index: Liveness.OperandInt, + op_ref: Air.Inst.Ref, +) !Temp { + const zcu = self.pt.zcu; + const ip = &zcu.intern_pool; + + if (!self.liveness.operandDies(inst, op_index)) { + if (op_ref.toIndex()) |op_inst| return .{ .index = op_inst }; + const val = op_ref.toInterned().?; + const gop = try self.const_tracking.getOrPut(self.gpa, val); + if (!gop.found_existing) gop.value_ptr.* = InstTracking.init(init: { + const const_mcv = try self.genTypedValue(Value.fromInterned(val)); + switch (const_mcv) { + .lea_tlv => |tlv_sym| switch (self.bin_file.tag) { + .elf, .macho => { + if (self.mod.pic) { + try self.spillRegisters(&.{ .rdi, .rax }); + } else { + try self.spillRegisters(&.{.rax}); + } + const frame_index = try self.allocFrameIndex(FrameAlloc.init(.{ + .size = 8, + .alignment = .@"8", + })); + try self.genSetMem( + .{ .frame = frame_index }, + 0, + Type.usize, + .{ .lea_symbol = .{ .sym_index = tlv_sym } }, + .{}, + ); + break :init .{ .load_frame = .{ .index = frame_index } }; + }, + else => break :init const_mcv, + }, + else => break :init const_mcv, + } + }); + return self.tempFromValue(Type.fromInterned(ip.typeOf(val)), gop.value_ptr.short); + } + + const temp_index = self.next_temp_index; + const temp: Temp = .{ .index = temp_index.toIndex() }; + const op_inst = op_ref.toIndex().?; + const tracking = self.getResolvedInstValue(op_inst); + temp_index.tracking(self).* = tracking.*; + if (!self.reuseTemp(temp.index, op_inst, tracking)) return .{ .index = op_ref.toIndex().? }; + self.temp_type[@intFromEnum(temp_index)] = self.typeOf(op_ref); + self.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); + return temp; +} + +inline fn tempsFromOperands(self: *Self, inst: Air.Inst.Index, op_refs: anytype) ![op_refs.len]Temp { + var temps: [op_refs.len]Temp = undefined; + inline for (&temps, 0.., op_refs) |*temp, op_index, op_ref| { + temp.* = try self.tempFromOperand(inst, op_index, op_ref); + } + return temps; +} diff --git a/src/arch/x86_64/abi.zig b/src/arch/x86_64/abi.zig index d825b70cf6..85ece4f93c 100644 --- a/src/arch/x86_64/abi.zig +++ b/src/arch/x86_64/abi.zig @@ -250,9 +250,8 @@ pub fn classifySystemV(ty: Type, zcu: *Zcu, target: std.Target, ctx: Context) [8 return memory_class; }, .optional => { - if (ty.isPtrLikeOptional(zcu)) { - result[0] = .integer; - return result; + if (ty.optionalReprIsPayload(zcu)) { + return classifySystemV(ty.optionalChild(zcu), zcu, target, ctx); } return memory_class; }, diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig index 95397e8064..ac5181cb3e 100644 --- a/src/arch/x86_64/bits.zig +++ b/src/arch/x86_64/bits.zig @@ -547,7 +547,39 @@ pub const Memory = struct { } }; - pub const Scale = enum(u2) { @"1", @"2", @"4", @"8" }; + pub const Scale = enum(u2) { + @"1", + @"2", + @"4", + @"8", + + pub fn fromFactor(factor: u4) Scale { + return switch (factor) { + else => unreachable, + 1 => .@"1", + 2 => .@"2", + 4 => .@"4", + 8 => .@"8", + }; + } + + pub fn toFactor(scale: Scale) u4 { + return switch (scale) { + .@"1" => 1, + .@"2" => 2, + .@"4" => 4, + .@"8" => 8, + }; + } + + pub fn fromLog2(log2: u2) Scale { + return @enumFromInt(log2); + } + + pub fn toLog2(scale: Scale) u2 { + return @intFromEnum(scale); + } + }; }; pub const Immediate = union(enum) { diff --git a/src/print_air.zig b/src/print_air.zig index 280d05edfa..d99be7770d 100644 --- a/src/print_air.zig +++ b/src/print_air.zig @@ -96,8 +96,8 @@ const Writer = struct { fn writeInst(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void { const tag = w.air.instructions.items(.tag)[@intFromEnum(inst)]; try s.writeByteNTimes(' ', w.indent); - try s.print("%{d}{c}= {s}(", .{ - @intFromEnum(inst), + try s.print("{}{c}= {s}(", .{ + inst, @as(u8, if (if (w.liveness) |liveness| liveness.isUnused(inst) else false) '!' else ' '), @tagName(tag), }); @@ -409,7 +409,7 @@ const Writer = struct { try s.writeAll("}"); for (liveness_block.deaths) |operand| { - try s.print(" %{d}!", .{@intFromEnum(operand)}); + try s.print(" {}!", .{operand}); } } @@ -728,7 +728,7 @@ const Writer = struct { try s.writeByteNTimes(' ', w.indent); for (liveness_condbr.else_deaths, 0..) |operand, i| { if (i != 0) try s.writeAll(" "); - try s.print("%{d}!", .{@intFromEnum(operand)}); + try s.print("{}!", .{operand}); } try s.writeAll("\n"); } @@ -739,7 +739,7 @@ const Writer = struct { try s.writeAll("}"); for (liveness_condbr.then_deaths) |operand| { - try s.print(" %{d}!", .{@intFromEnum(operand)}); + try s.print(" {}!", .{operand}); } } @@ -765,7 +765,7 @@ const Writer = struct { try s.writeByteNTimes(' ', w.indent); for (liveness_condbr.else_deaths, 0..) |operand, i| { if (i != 0) try s.writeAll(" "); - try s.print("%{d}!", .{@intFromEnum(operand)}); + try s.print("{}!", .{operand}); } try s.writeAll("\n"); } @@ -776,7 +776,7 @@ const Writer = struct { try s.writeAll("}"); for (liveness_condbr.then_deaths) |operand| { - try s.print(" %{d}!", .{@intFromEnum(operand)}); + try s.print(" {}!", .{operand}); } } @@ -807,7 +807,7 @@ const Writer = struct { try s.writeByteNTimes(' ', w.indent); for (liveness_condbr.then_deaths, 0..) |operand, i| { if (i != 0) try s.writeAll(" "); - try s.print("%{d}!", .{@intFromEnum(operand)}); + try s.print("{}!", .{operand}); } try s.writeAll("\n"); } @@ -827,7 +827,7 @@ const Writer = struct { try s.writeByteNTimes(' ', w.indent); for (liveness_condbr.else_deaths, 0..) |operand, i| { if (i != 0) try s.writeAll(" "); - try s.print("%{d}!", .{@intFromEnum(operand)}); + try s.print("{}!", .{operand}); } try s.writeAll("\n"); } @@ -884,7 +884,7 @@ const Writer = struct { try s.writeByteNTimes(' ', w.indent); for (deaths, 0..) |operand, i| { if (i != 0) try s.writeAll(" "); - try s.print("%{d}!", .{@intFromEnum(operand)}); + try s.print("{}!", .{operand}); } try s.writeAll("\n"); } @@ -910,7 +910,7 @@ const Writer = struct { try s.writeByteNTimes(' ', w.indent); for (deaths, 0..) |operand, i| { if (i != 0) try s.writeAll(" "); - try s.print("%{d}!", .{@intFromEnum(operand)}); + try s.print("{}!", .{operand}); } try s.writeAll("\n"); } @@ -994,7 +994,7 @@ const Writer = struct { dies: bool, ) @TypeOf(s).Error!void { _ = w; - try s.print("%{d}", .{@intFromEnum(inst)}); + try s.print("{}", .{inst}); if (dies) try s.writeByte('!'); } diff --git a/tools/lldb_pretty_printers.py b/tools/lldb_pretty_printers.py index fd7e9fa08a..d3ccf738e8 100644 --- a/tools/lldb_pretty_printers.py +++ b/tools/lldb_pretty_printers.py @@ -383,7 +383,7 @@ def InstRef_SummaryProvider(value, _=None): 'InternPool.Index(%d)' % value.unsigned if value.unsigned < 0x80000000 else 'instructions[%d]' % (value.unsigned - 0x80000000)) def InstIndex_SummaryProvider(value, _=None): - return 'instructions[%d]' % value.unsigned + return 'instructions[%d]' % value.unsigned if value.unsigned < 0x80000000 else 'temps[%d]' % (value.unsigned - 0x80000000) class zig_DeclIndex_SynthProvider: def __init__(self, value, _=None): self.value = value From beadf702b8d0421cc84c47ebc9644ce07e22c306 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Fri, 8 Nov 2024 08:32:00 -0500 Subject: [PATCH 02/25] x86_64: rewrite arithmetic --- lib/std/mem.zig | 4 +- src/arch/x86_64/CodeGen.zig | 1024 +++++++++++++++++++++++------------ src/arch/x86_64/Mir.zig | 2 +- src/register_manager.zig | 22 +- 4 files changed, 693 insertions(+), 359 deletions(-) diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 28288620f6..9e3f4f3936 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -883,8 +883,8 @@ fn SliceTo(comptime T: type, comptime end: std.meta.Elem(T)) type { @compileError("invalid type given to std.mem.sliceTo: " ++ @typeName(T)); } -/// Takes an array, a pointer to an array, a sentinel-terminated pointer, or a slice and -/// iterates searching for the first occurrence of `end`, returning the scanned slice. +/// Takes a pointer to an array, a sentinel-terminated pointer, or a slice and iterates searching for +/// the first occurrence of `end`, returning the scanned slice. /// If `end` is not found, the full length of the array/slice/sentinel terminated pointer is returned. /// If the pointer type is sentinel terminated and `end` matches that terminator, the /// resulting slice is also sentinel terminated. diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 3bbc80999a..80991c7aa6 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -767,7 +767,7 @@ const FrameAlloc = struct { .size = spill_size, .pad = @intCast(spill_size - abi_size), .alignment = ty.abiAlignment(zcu).maxStrict( - InternPool.Alignment.fromNonzeroByteUnits(@min(spill_size, 8)), + .fromNonzeroByteUnits(@min(spill_size, 8)), ), }); } @@ -806,7 +806,7 @@ pub fn generate( const gpa = zcu.gpa; const ip = &zcu.intern_pool; const func = zcu.funcInfo(func_index); - const fn_type = Type.fromInterned(func.ty); + const fn_type: Type = .fromInterned(func.ty); const mod = zcu.navFileScope(func.owner_nav).mod; var function: Self = .{ @@ -846,7 +846,7 @@ pub fn generate( try function.inst_tracking.ensureTotalCapacity(gpa, Temp.Index.max); for (0..Temp.Index.max) |temp_index| { const temp: Temp.Index = @enumFromInt(temp_index); - function.inst_tracking.putAssumeCapacityNoClobber(temp.toIndex(), InstTracking.init(.none)); + function.inst_tracking.putAssumeCapacityNoClobber(temp.toIndex(), .init(.none)); } wip_mir_log.debug("{}:", .{fmtNav(func.owner_nav, ip)}); @@ -854,11 +854,11 @@ pub fn generate( try function.frame_allocs.resize(gpa, FrameIndex.named_count); function.frame_allocs.set( @intFromEnum(FrameIndex.stack_frame), - FrameAlloc.init(.{ .size = 0, .alignment = .@"1" }), + .init(.{ .size = 0, .alignment = .@"1" }), ); function.frame_allocs.set( @intFromEnum(FrameIndex.call_frame), - FrameAlloc.init(.{ .size = 0, .alignment = .@"1" }), + .init(.{ .size = 0, .alignment = .@"1" }), ); const fn_info = zcu.typeToFunc(fn_type).?; @@ -871,20 +871,19 @@ pub fn generate( function.args = call_info.args; function.ret_mcv = call_info.return_value; - function.frame_allocs.set(@intFromEnum(FrameIndex.ret_addr), FrameAlloc.init(.{ + function.frame_allocs.set(@intFromEnum(FrameIndex.ret_addr), .init(.{ .size = Type.usize.abiSize(zcu), .alignment = Type.usize.abiAlignment(zcu).min(call_info.stack_align), })); - function.frame_allocs.set(@intFromEnum(FrameIndex.base_ptr), FrameAlloc.init(.{ + function.frame_allocs.set(@intFromEnum(FrameIndex.base_ptr), .init(.{ .size = Type.usize.abiSize(zcu), - .alignment = InternPool.Alignment.min( - call_info.stack_align, - InternPool.Alignment.fromNonzeroByteUnits(function.target.stackAlignment()), + .alignment = call_info.stack_align.min( + .fromNonzeroByteUnits(function.target.stackAlignment()), ), })); function.frame_allocs.set( @intFromEnum(FrameIndex.args_frame), - FrameAlloc.init(.{ + .init(.{ .size = call_info.stack_byte_count, .alignment = call_info.stack_align, }), @@ -1242,6 +1241,80 @@ fn addExtraAssumeCapacity(self: *Self, extra: anytype) u32 { return result; } +fn asmOps(self: *Self, tag: Mir.Inst.FixedTag, ops: [4]Operand) !void { + return switch (ops[0]) { + .none => self.asmOpOnly(tag), + .reg => |reg0| switch (ops[1]) { + .none => self.asmRegister(tag, reg0), + .reg => |reg1| switch (ops[2]) { + .none => self.asmRegisterRegister(tag, reg0, reg1), + .reg => |reg2| switch (ops[3]) { + .none => self.asmRegisterRegisterRegister(tag, reg0, reg1, reg2), + .reg => |reg3| self.asmRegisterRegisterRegisterRegister(tag, reg0, reg1, reg2, reg3), + .imm => |imm3| self.asmRegisterRegisterRegisterImmediate(tag, reg0, reg1, reg2, imm3), + else => error.InvalidInstruction, + }, + .mem => |mem2| switch (ops[3]) { + .none => self.asmRegisterRegisterMemory(tag, reg0, reg1, mem2), + .reg => |reg3| self.asmRegisterRegisterMemoryRegister(tag, reg0, reg1, mem2, reg3), + .imm => |imm3| self.asmRegisterRegisterMemoryImmediate(tag, reg0, reg1, mem2, imm3), + else => error.InvalidInstruction, + }, + .imm => |imm2| switch (ops[3]) { + .none => self.asmRegisterRegisterImmediate(tag, reg0, reg1, imm2), + else => error.InvalidInstruction, + }, + else => error.InvalidInstruction, + }, + .mem => |mem1| switch (ops[2]) { + .none => self.asmRegisterMemory(tag, reg0, mem1), + .reg => |reg2| switch (ops[3]) { + .none => self.asmRegisterMemoryRegister(tag, reg0, mem1, reg2), + else => error.InvalidInstruction, + }, + .imm => |imm2| switch (ops[3]) { + .none => self.asmRegisterMemoryImmediate(tag, reg0, mem1, imm2), + else => error.InvalidInstruction, + }, + else => error.InvalidInstruction, + }, + .imm => |imm1| switch (ops[2]) { + .none => self.asmRegisterImmediate(tag, reg0, imm1), + else => error.InvalidInstruction, + }, + else => error.InvalidInstruction, + }, + .mem => |mem0| switch (ops[1]) { + .none => self.asmMemory(tag, mem0), + .reg => |reg1| switch (ops[2]) { + .none => self.asmMemoryRegister(tag, mem0, reg1), + .reg => |reg2| switch (ops[3]) { + .none => self.asmMemoryRegisterRegister(tag, mem0, reg1, reg2), + else => error.InvalidInstruction, + }, + .imm => |imm2| switch (ops[3]) { + .none => self.asmMemoryRegisterImmediate(tag, mem0, reg1, imm2), + else => error.InvalidInstruction, + }, + else => error.InvalidInstruction, + }, + .imm => |imm1| switch (ops[2]) { + .none => self.asmMemoryImmediate(tag, mem0, imm1), + else => error.InvalidInstruction, + }, + else => error.InvalidInstruction, + }, + .imm => |imm0| switch (ops[1]) { + .none => self.asmImmediate(tag, imm0), + else => error.InvalidInstruction, + }, + .inst => |inst0| switch (ops[1]) { + .none => self.asmReloc(tag, inst0), + else => error.InvalidInstruction, + }, + }; +} + /// A `cc` of `.z_and_np` clobbers `reg2`! fn asmCmovccRegisterRegister(self: *Self, cc: Condition, reg1: Register, reg2: Register) !void { _ = try self.addInst(.{ @@ -1256,7 +1329,7 @@ fn asmCmovccRegisterRegister(self: *Self, cc: Condition, reg1: Register, reg2: R }, .data = .{ .rr = .{ .fixes = switch (cc) { - else => Mir.Inst.Fixes.fromCondition(cc), + else => .fromCondition(cc), .z_and_np, .nz_or_p => ._, }, .r1 = reg1, @@ -1280,7 +1353,7 @@ fn asmCmovccRegisterMemory(self: *Self, cc: Condition, reg: Register, m: Memory) }, .data = .{ .rx = .{ .fixes = switch (cc) { - else => Mir.Inst.Fixes.fromCondition(cc), + else => .fromCondition(cc), .z_and_np => unreachable, .nz_or_p => ._, }, @@ -1303,7 +1376,7 @@ fn asmSetccRegister(self: *Self, cc: Condition, reg: Register) !void { }, .data = switch (cc) { else => .{ .r = .{ - .fixes = Mir.Inst.Fixes.fromCondition(cc), + .fixes = .fromCondition(cc), .r1 = reg, } }, .z_and_np, .nz_or_p => .{ .rr = .{ @@ -1328,7 +1401,7 @@ fn asmSetccMemory(self: *Self, cc: Condition, m: Memory) !void { }, .data = switch (cc) { else => .{ .x = .{ - .fixes = Mir.Inst.Fixes.fromCondition(cc), + .fixes = .fromCondition(cc), .payload = payload, } }, .z_and_np, .nz_or_p => .{ .rx = .{ @@ -1362,7 +1435,7 @@ fn asmJccReloc(self: *Self, cc: Condition, target: Mir.Inst.Index) !Mir.Inst.Ind }, .data = .{ .inst = .{ .fixes = switch (cc) { - else => Mir.Inst.Fixes.fromCondition(cc), + else => .fromCondition(cc), .z_and_np, .nz_or_p => ._, }, .inst = target, @@ -1969,7 +2042,7 @@ fn gen(self: *Self) InnerError!void { // The address where to store the return value for the caller is in a // register which the callee is free to clobber. Therefore, we purposely // spill it to stack immediately. - const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(Type.usize, zcu)); + const frame_index = try self.allocFrameIndex(.initSpill(Type.usize, zcu)); try self.genSetMem( .{ .frame = frame_index }, 0, @@ -1986,7 +2059,7 @@ fn gen(self: *Self) InnerError!void { if (fn_info.is_var_args) switch (cc) { .x86_64_sysv => { const info = &self.va_info.sysv; - const reg_save_area_fi = try self.allocFrameIndex(FrameAlloc.init(.{ + const reg_save_area_fi = try self.allocFrameIndex(.init(.{ .size = abi.SysV.c_abi_int_param_regs.len * 8 + abi.SysV.c_abi_sse_param_regs.len * 16, .alignment = .@"16", @@ -2192,7 +2265,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { wip_mir_log.debug("{}", .{self.fmtAir(inst)}); verbose_tracking_log.debug("{}", .{self.fmtTracking()}); - self.reused_operands = @TypeOf(self.reused_operands).initEmpty(); + self.reused_operands = .initEmpty(); try self.inst_tracking.ensureUnusedCapacity(self.gpa, 1); try self.airArg(inst); @@ -2210,7 +2283,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { wip_mir_log.debug("{}", .{self.fmtAir(inst)}); verbose_tracking_log.debug("{}", .{self.fmtTracking()}); - self.reused_operands = @TypeOf(self.reused_operands).initEmpty(); + self.reused_operands = .initEmpty(); try self.inst_tracking.ensureUnusedCapacity(self.gpa, 1); switch (air_tags[@intFromEnum(inst)]) { // zig fmt: off @@ -2223,9 +2296,6 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .sub_wrap, .bool_and, .bool_or, - .bit_and, - .bit_or, - .xor, .min, .max, => |tag| try self.airBinOp(inst, tag), @@ -2391,6 +2461,55 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { try slot.moveTo(inst, self); }, .assembly => try self.airAsm(inst), + inline .bit_and, .bit_or, .xor => |tag| if (use_old or self.typeOfIndex(inst).abiSize(zcu) > @as( + u64, + if (!self.typeOfIndex(inst).isVector(zcu)) + 8 + else if (!self.hasFeature(.avx2)) + 16 + else + 32, + )) try self.airBinOp(inst, tag) else { + const bin_op = air_datas[@intFromEnum(inst)].bin_op; + var ops = try self.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); + try self.spillEflagsIfOccupied(); + const mir_tag: Mir.Inst.Tag = switch (tag) { + else => unreachable, + .bit_and => .@"and", + .bit_or => .@"or", + .xor => .xor, + }; + var res: [1]Temp = undefined; + try self.select(&res, &.{ &ops[0], &ops[1] }, &.{ + .{ .tag = .{ .vp_, mir_tag }, .ops = &.{ .ymm, .ymm, .mem }, .features = &.{.avx2} }, + .{ .tag = .{ .vp_, mir_tag }, .ops = &.{ .ymm, .mem, .ymm }, .commute = .{ 1, 2 }, .features = &.{.avx2} }, + .{ .tag = .{ .vp_, mir_tag }, .ops = &.{ .ymm, .ymm, .ymm }, .features = &.{.avx2} }, + .{ .tag = .{ .vp_, mir_tag }, .ops = &.{ .xmm, .xmm, .mem }, .features = &.{.avx} }, + .{ .tag = .{ .vp_, mir_tag }, .ops = &.{ .xmm, .mem, .xmm }, .commute = .{ 1, 2 }, .features = &.{.avx} }, + .{ .tag = .{ .vp_, mir_tag }, .ops = &.{ .xmm, .xmm, .xmm }, .features = &.{.avx} }, + .{ .tag = .{ .p_, mir_tag }, .ops = &.{ .xmm, .{ .match = 0 }, .mem }, .features = &.{.sse2} }, + .{ .tag = .{ .p_, mir_tag }, .ops = &.{ .xmm, .mem, .{ .match = 0 } }, .features = &.{.sse2} }, + .{ .tag = .{ .p_, mir_tag }, .ops = &.{ .xmm, .{ .match = 0 }, .xmm }, .features = &.{.sse2} }, + .{ .tag = .{ .p_, mir_tag }, .ops = &.{ .xmm, .xmm, .{ .match = 0 } }, .features = &.{.sse2} }, + .{ .tag = .{ .p_, mir_tag }, .ops = &.{ .mm, .{ .match = 0 }, .mem }, .features = &.{.mmx} }, + .{ .tag = .{ .p_, mir_tag }, .ops = &.{ .mm, .mem, .{ .match = 0 } }, .features = &.{.mmx} }, + .{ .tag = .{ .p_, mir_tag }, .ops = &.{ .mm, .{ .match = 0 }, .mm }, .features = &.{.mmx} }, + .{ .tag = .{ .p_, mir_tag }, .ops = &.{ .mm, .mm, .{ .match = 0 } }, .features = &.{.mmx} }, + .{ .tag = .{ ._, mir_tag }, .ops = &.{ .mem, .{ .match = 0 }, .simm32 } }, + .{ .tag = .{ ._, mir_tag }, .ops = &.{ .mem, .simm32, .{ .match = 0 } } }, + .{ .tag = .{ ._, mir_tag }, .ops = &.{ .mem, .{ .match = 0 }, .gpr } }, + .{ .tag = .{ ._, mir_tag }, .ops = &.{ .mem, .gpr, .{ .match = 0 } } }, + .{ .tag = .{ ._, mir_tag }, .ops = &.{ .gpr, .{ .match = 0 }, .simm32 } }, + .{ .tag = .{ ._, mir_tag }, .ops = &.{ .gpr, .simm32, .{ .match = 0 } } }, + .{ .tag = .{ ._, mir_tag }, .ops = &.{ .gpr, .{ .match = 0 }, .mem } }, + .{ .tag = .{ ._, mir_tag }, .ops = &.{ .gpr, .mem, .{ .match = 0 } } }, + .{ .tag = .{ ._, mir_tag }, .ops = &.{ .gpr, .{ .match = 0 }, .gpr } }, + .{ .tag = .{ ._, mir_tag }, .ops = &.{ .gpr, .gpr, .{ .match = 0 } } }, + }); + if (ops[0].index != res[0].index) try ops[0].die(self); + if (ops[1].index != res[0].index) try ops[1].die(self); + try res[0].moveTo(inst, self); + }, .block => if (use_old) try self.airBlock(inst) else { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; const extra = self.air.extraData(Air.Block, ty_pl.payload); @@ -2506,7 +2625,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { else if (opt_child_ty.isSlice(zcu)) .qword else - Memory.Size.fromSize(opt_child_abi_size)), + .fromSize(opt_child_abi_size)), .u(0), ); var is_null = try self.tempFromValue(self.typeOfIndex(inst), .{ .eflags = .e }); @@ -2529,7 +2648,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { else if (opt_child_ty.isSlice(zcu)) .qword else - Memory.Size.fromSize(opt_child_abi_size)), + .fromSize(opt_child_abi_size)), .u(0), ); var is_non_null = try self.tempFromValue(self.typeOfIndex(inst), .{ .eflags = .ne }); @@ -2646,31 +2765,51 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; const extra = self.air.extraData(Air.StructField, ty_pl.payload).data; var ops = try self.tempsFromOperands(inst, .{extra.struct_operand}); - try ops[0].toOffset(self.fieldOffset(self.typeOf(extra.struct_operand), self.typeOfIndex(inst), extra.field_index), self); + try ops[0].toOffset(self.fieldOffset( + self.typeOf(extra.struct_operand), + self.typeOfIndex(inst), + extra.field_index, + ), self); try ops[0].moveTo(inst, self); }, .struct_field_ptr_index_0 => if (use_old) try self.airStructFieldPtrIndex(inst, 0) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].toOffset(self.fieldOffset(self.typeOf(ty_op.operand), self.typeOfIndex(inst), 0), self); + try ops[0].toOffset(self.fieldOffset( + self.typeOf(ty_op.operand), + self.typeOfIndex(inst), + 0, + ), self); try ops[0].moveTo(inst, self); }, .struct_field_ptr_index_1 => if (use_old) try self.airStructFieldPtrIndex(inst, 1) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].toOffset(self.fieldOffset(self.typeOf(ty_op.operand), self.typeOfIndex(inst), 1), self); + try ops[0].toOffset(self.fieldOffset( + self.typeOf(ty_op.operand), + self.typeOfIndex(inst), + 1, + ), self); try ops[0].moveTo(inst, self); }, .struct_field_ptr_index_2 => if (use_old) try self.airStructFieldPtrIndex(inst, 2) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].toOffset(self.fieldOffset(self.typeOf(ty_op.operand), self.typeOfIndex(inst), 2), self); + try ops[0].toOffset(self.fieldOffset( + self.typeOf(ty_op.operand), + self.typeOfIndex(inst), + 2, + ), self); try ops[0].moveTo(inst, self); }, .struct_field_ptr_index_3 => if (use_old) try self.airStructFieldPtrIndex(inst, 3) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].toOffset(self.fieldOffset(self.typeOf(ty_op.operand), self.typeOfIndex(inst), 3), self); + try ops[0].toOffset(self.fieldOffset( + self.typeOf(ty_op.operand), + self.typeOfIndex(inst), + 3, + ), self); try ops[0].moveTo(inst, self); }, .slice => if (use_old) try self.airSlice(inst) else { @@ -2775,7 +2914,11 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; const extra = self.air.extraData(Air.FieldParentPtr, ty_pl.payload).data; var ops = try self.tempsFromOperands(inst, .{extra.field_ptr}); - try ops[0].toOffset(-self.fieldOffset(self.typeOfIndex(inst), self.typeOf(extra.field_ptr), extra.field_index), self); + try ops[0].toOffset(-self.fieldOffset( + self.typeOfIndex(inst), + self.typeOf(extra.field_ptr), + extra.field_index, + ), self); try ops[0].moveTo(inst, self); }, @@ -2813,7 +2956,7 @@ fn genLazy(self: *Self, lazy_sym: link.File.LazySymbol) InnerError!void { const ip = &zcu.intern_pool; switch (Type.fromInterned(lazy_sym.ty).zigTypeTag(zcu)) { .@"enum" => { - const enum_ty = Type.fromInterned(lazy_sym.ty); + const enum_ty: Type = .fromInterned(lazy_sym.ty); wip_mir_log.debug("{}.@tagName:", .{enum_ty.fmt(pt)}); const resolved_cc = abi.resolveCallingConvention(.auto, self.target.*); @@ -2919,7 +3062,7 @@ fn finishAirResult(self: *Self, inst: Air.Inst.Index, result: MCValue) void { else => unreachable, // Why didn't the result die? } else { tracking_log.debug("{} => {} (birth)", .{ inst, result }); - self.inst_tracking.putAssumeCapacityNoClobber(inst, InstTracking.init(result)); + self.inst_tracking.putAssumeCapacityNoClobber(inst, .init(result)); // In some cases, an operand may be reused as the result. // If that operand died and was a register, it was freed by // processDeath, so we have to "re-allocate" the register. @@ -3067,7 +3210,7 @@ fn allocMemPtr(self: *Self, inst: Air.Inst.Index) !FrameIndex { const zcu = pt.zcu; const ptr_ty = self.typeOfIndex(inst); const val_ty = ptr_ty.childType(zcu); - return self.allocFrameIndex(FrameAlloc.init(.{ + return self.allocFrameIndex(.init(.{ .size = std.math.cast(u32, val_ty.abiSize(zcu)) orelse { return self.fail("type '{}' too big to fit into stack frame", .{val_ty.fmt(pt)}); }, @@ -3107,35 +3250,49 @@ fn allocRegOrMemAdvanced(self: *Self, ty: Type, inst: ?Air.Inst.Index, reg_ok: b }, else => 8, })) { - if (self.register_manager.tryAllocReg(inst, self.regClassForType(ty))) |reg| { + if (self.register_manager.tryAllocReg(inst, self.regSetForType(ty))) |reg| { return MCValue{ .register = registerAlias(reg, abi_size) }; } } } - const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(ty, zcu)); + const frame_index = try self.allocFrameIndex(.initSpill(ty, zcu)); return .{ .load_frame = .{ .index = frame_index } }; } -fn regClassForType(self: *Self, ty: Type) RegisterManager.RegisterBitSet { +fn regClassForType(self: *Self, ty: Type) Register.Class { const pt = self.pt; const zcu = pt.zcu; return switch (ty.zigTypeTag(zcu)) { .float => switch (ty.floatBits(self.target.*)) { - 80 => abi.RegisterClass.x87, - else => abi.RegisterClass.sse, + 80 => .x87, + else => .sse, }, .vector => switch (ty.childType(zcu).toIntern()) { - .bool_type, .u1_type => abi.RegisterClass.gp, + .bool_type, .u1_type => .general_purpose, else => if (ty.isAbiInt(zcu) and ty.intInfo(zcu).bits == 1) - abi.RegisterClass.gp + .general_purpose else - abi.RegisterClass.sse, + .sse, }, - else => abi.RegisterClass.gp, + else => .general_purpose, }; } +fn regSetForRegClass(rc: Register.Class) RegisterManager.RegisterBitSet { + return switch (rc) { + .general_purpose => abi.RegisterClass.gp, + .segment, .ip => unreachable, + .x87 => abi.RegisterClass.x87, + .mmx => @panic("TODO"), + .sse => abi.RegisterClass.sse, + }; +} + +fn regSetForType(self: *Self, ty: Type) RegisterManager.RegisterBitSet { + return regSetForRegClass(self.regClassForType(ty)); +} + const State = struct { registers: RegisterManager.TrackedRegisters, reg_tracking: [RegisterManager.RegisterBitSet.bit_length]InstTracking, @@ -3292,7 +3449,7 @@ pub fn spillRegisters(self: *Self, comptime registers: []const Register) !void { /// allocated. A second call to `copyToTmpRegister` may return the same register. /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { - const reg = try self.register_manager.allocReg(null, self.regClassForType(ty)); + const reg = try self.register_manager.allocReg(null, self.regSetForType(ty)); try self.genSetReg(reg, ty, mcv, .{}); return reg; } @@ -3307,7 +3464,7 @@ fn copyToRegisterWithInstTracking( ty: Type, mcv: MCValue, ) !MCValue { - const reg: Register = try self.register_manager.allocReg(reg_owner, self.regClassForType(ty)); + const reg: Register = try self.register_manager.allocReg(reg_owner, self.regSetForType(ty)); try self.genSetReg(reg, ty, mcv, .{}); return MCValue{ .register = reg }; } @@ -3910,7 +4067,7 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { .storage = .{ .repeated_elem = mask_val.ip_index }, } }); - const splat_mcv = try self.genTypedValue(Value.fromInterned(splat_val)); + const splat_mcv = try self.genTypedValue(.fromInterned(splat_val)); const splat_addr_mcv: MCValue = switch (splat_mcv) { .memory, .indirect, .load_frame => splat_mcv.address(), else => .{ .register = try self.copyToTmpRegister(Type.usize, splat_mcv.address()) }, @@ -3923,7 +4080,7 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { .{ .vp_, .@"and" }, dst_alias, dst_alias, - try splat_addr_mcv.deref().mem(self, Memory.Size.fromSize(splat_abi_size)), + try splat_addr_mcv.deref().mem(self, .fromSize(splat_abi_size)), ); if (src_abi_size > 16) { const temp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse); @@ -3947,7 +4104,7 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegisterMemory( .{ .p_, .@"and" }, dst_alias, - try splat_addr_mcv.deref().mem(self, Memory.Size.fromSize(splat_abi_size)), + try splat_addr_mcv.deref().mem(self, .fromSize(splat_abi_size)), ); try self.asmRegisterRegister(mir_tag, dst_alias, dst_alias); } @@ -3992,7 +4149,7 @@ fn airSlice(self: *Self, inst: Air.Inst.Index) !void { const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; const slice_ty = self.typeOfIndex(inst); - const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(slice_ty, zcu)); + const frame_index = try self.allocFrameIndex(.initSpill(slice_ty, zcu)); const ptr_ty = self.typeOf(bin_op.lhs); try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, .{ .air_ref = bin_op.lhs }, .{}); @@ -4129,7 +4286,7 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void { state: State, reloc: Mir.Inst.Index, } = if (signed and tag == .div_floor) state: { - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(Type.usize, zcu)); + const frame_index = try self.allocFrameIndex(.initType(Type.usize, zcu)); try self.asmMemoryImmediate( .{ ._, .mov }, .{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{ .size = .qword } } }, @@ -4259,7 +4416,7 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void { defer for (dst_locks) |lock| self.register_manager.unlockReg(lock); const tmp_regs = - try self.register_manager.allocRegs(2, .{null} ** 2, abi.RegisterClass.gp); + try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp); const tmp_locks = self.register_manager.lockRegsAssumeUnused(2, tmp_regs); defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock); @@ -4698,8 +4855,7 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void { else => {}, } - const frame_index = - try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, zcu)); + const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu)); try self.genSetMem( .{ .frame = frame_index }, @intCast(tuple_ty.structFieldOffset(1, zcu)), @@ -4717,8 +4873,7 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void { break :result .{ .load_frame = .{ .index = frame_index } }; } - const frame_index = - try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, zcu)); + const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu)); try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc); break :result .{ .load_frame = .{ .index = frame_index } }; }, @@ -4776,8 +4931,7 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { else => {}, } - const frame_index = - try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, zcu)); + const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu)); try self.genSetMem( .{ .frame = frame_index }, @intCast(tuple_ty.structFieldOffset(1, zcu)), @@ -4796,7 +4950,7 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { } const frame_index = - try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, zcu)); + try self.allocFrameIndex(.initSpill(tuple_ty, zcu)); try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc); break :result .{ .load_frame = .{ .index = frame_index } }; }, @@ -4833,7 +4987,7 @@ fn genSetFrameTruncatedOverflowCompare( const rest_ty = try pt.intType(.unsigned, int_info.bits - hi_bits); const temp_regs = - try self.register_manager.allocRegs(3, .{null} ** 3, abi.RegisterClass.gp); + try self.register_manager.allocRegs(3, @splat(null), abi.RegisterClass.gp); const temp_locks = self.register_manager.lockRegsAssumeUnused(3, temp_regs); defer for (temp_locks) |lock| self.register_manager.unlockReg(lock); @@ -4916,7 +5070,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { const rhs_mcv = try self.resolveInst(bin_op.rhs); const temp_regs = - try self.register_manager.allocRegs(4, .{null} ** 4, abi.RegisterClass.gp); + try self.register_manager.allocRegs(4, @splat(null), abi.RegisterClass.gp); const temp_locks = self.register_manager.lockRegsAssumeUnused(4, temp_regs); defer for (temp_locks) |lock| self.register_manager.unlockReg(lock); @@ -5080,7 +5234,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); const tmp_regs = - try self.register_manager.allocRegs(4, .{null} ** 4, abi.RegisterClass.gp); + try self.register_manager.allocRegs(4, @splat(null), abi.RegisterClass.gp); const tmp_locks = self.register_manager.lockRegsAssumeUnused(4, tmp_regs); defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock); @@ -5217,7 +5371,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { self.eflags_inst = inst; break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } }; } else { - const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, zcu)); + const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu)); try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc); break :result .{ .load_frame = .{ .index = frame_index } }; }, @@ -5228,7 +5382,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { src_ty.fmt(pt), dst_ty.fmt(pt), }); - const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(tuple_ty, zcu)); + const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu)); if (dst_info.bits >= lhs_active_bits + rhs_active_bits) { try self.genSetMem( .{ .frame = frame_index }, @@ -5301,10 +5455,7 @@ fn genIntMulDivOpMir(self: *Self, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue }; switch (mat_rhs) { .register => |reg| try self.asmRegister(tag, registerAlias(reg, abi_size)), - .memory, .indirect, .load_frame => try self.asmMemory( - tag, - try mat_rhs.mem(self, Memory.Size.fromSize(abi_size)), - ), + .memory, .indirect, .load_frame => try self.asmMemory(tag, try mat_rhs.mem(self, .fromSize(abi_size))), else => unreachable, } if (tag[1] == .div and bit_size == 8) try self.asmRegisterRegister(.{ ._, .mov }, .dl, .ah); @@ -5575,7 +5726,7 @@ fn airShlShrBinOp(self: *Self, inst: Air.Inst.Index) !void { defer self.register_manager.unlockReg(shift_lock); const mask_ty = try pt.vectorType(.{ .len = 16, .child = .u8_type }); - const mask_mcv = try self.genTypedValue(Value.fromInterned(try pt.intern(.{ .aggregate = .{ + const mask_mcv = try self.genTypedValue(.fromInterned(try pt.intern(.{ .aggregate = .{ .ty = mask_ty.toIntern(), .storage = .{ .elems = &([1]InternPool.Index{ (try rhs_ty.childType(zcu).maxIntScalar(pt, Type.u8)).toIntern(), @@ -5811,7 +5962,7 @@ fn airUnwrapErrUnionErrPtr(self: *Self, inst: Air.Inst.Index) !void { .{ .base = .{ .reg = src_reg }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(err_abi_size), + .size = .fromSize(err_abi_size), .disp = err_off, } }, }, @@ -5853,7 +6004,7 @@ fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { .{ .base = .{ .reg = src_reg }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(err_abi_size), + .size = .fromSize(err_abi_size), .disp = err_off, } }, }, @@ -5909,7 +6060,7 @@ fn genUnwrapErrUnionPayloadMir( const eu_lock = self.register_manager.lockReg(reg); defer if (eu_lock) |lock| self.register_manager.unlockReg(lock); - const payload_in_gp = self.regClassForType(payload_ty).supersetOf(abi.RegisterClass.gp); + const payload_in_gp = self.regSetForType(payload_ty).supersetOf(abi.RegisterClass.gp); const result_mcv: MCValue = if (payload_in_gp and maybe_inst != null) try self.copyToRegisterWithInstTracking(maybe_inst.?, err_union_ty, err_union) else @@ -6043,7 +6194,7 @@ fn airWrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void { const result: MCValue = result: { if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .{ .immediate = 0 }; - const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(eu_ty, zcu)); + const frame_index = try self.allocFrameIndex(.initSpill(eu_ty, zcu)); const pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(pl_ty, zcu)); const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu)); try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, operand, .{}); @@ -6066,7 +6217,7 @@ fn airWrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { const result: MCValue = result: { if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result try self.resolveInst(ty_op.operand); - const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(eu_ty, zcu)); + const frame_index = try self.allocFrameIndex(.initSpill(eu_ty, zcu)); const pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(pl_ty, zcu)); const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu)); try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, .undef, .{}); @@ -6297,7 +6448,7 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { index_reg.to64(), ), .sse => { - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(array_ty, zcu)); + const frame_index = try self.allocFrameIndex(.initType(array_ty, zcu)); try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array_mcv, .{}); try self.asmMemoryRegister( .{ ._, .bt }, @@ -6342,7 +6493,7 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { switch (array_mcv) { .register => { - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(array_ty, zcu)); + const frame_index = try self.allocFrameIndex(.initType(array_ty, zcu)); try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array_mcv, .{}); try self.asmRegisterMemory( .{ ._, .lea }, @@ -7175,7 +7326,7 @@ fn genByteSwap( const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable; const temp_regs = - try self.register_manager.allocRegs(4, .{null} ** 4, abi.RegisterClass.gp); + try self.register_manager.allocRegs(4, @splat(null), abi.RegisterClass.gp); const temp_locks = self.register_manager.lockRegsAssumeUnused(4, temp_regs); defer for (temp_locks) |lock| self.register_manager.unlockReg(lock); @@ -7472,11 +7623,11 @@ fn floatSign(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, ty: Type) else => unreachable, }); const sign_mem: Memory = if (sign_mcv.isMemory()) - try sign_mcv.mem(self, Memory.Size.fromSize(abi_size)) + try sign_mcv.mem(self, .fromSize(abi_size)) else .{ .base = .{ .reg = try self.copyToTmpRegister(Type.usize, sign_mcv.address()) }, - .mod = .{ .rm = .{ .size = Memory.Size.fromSize(abi_size) } }, + .mod = .{ .rm = .{ .size = .fromSize(abi_size) } }, }; if (self.hasFeature(.avx)) try self.asmRegisterRegisterMemory( @@ -7657,7 +7808,7 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: Ro mir_tag, dst_alias, dst_alias, - try src_mcv.mem(self, Memory.Size.fromSize(abi_size)), + try src_mcv.mem(self, .fromSize(abi_size)), .u(@as(u5, @bitCast(mode))), ) else try self.asmRegisterRegisterRegisterImmediate( mir_tag, @@ -7672,7 +7823,7 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: Ro else => if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( mir_tag, dst_alias, - try src_mcv.mem(self, Memory.Size.fromSize(abi_size)), + try src_mcv.mem(self, .fromSize(abi_size)), .u(@as(u5, @bitCast(mode))), ) else try self.asmRegisterRegisterImmediate( mir_tag, @@ -7714,7 +7865,7 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void { .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory( .l, registerAlias(dst_mcv.register, cmov_abi_size), - try src_mcv.mem(self, Memory.Size.fromSize(cmov_abi_size)), + try src_mcv.mem(self, .fromSize(cmov_abi_size)), ), else => { const val_reg = try self.copyToTmpRegister(ty, src_mcv); @@ -7767,7 +7918,7 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void { const limb_len = std.math.divCeil(u31, abi_size, 8) catch unreachable; const tmp_regs = - try self.register_manager.allocRegs(3, .{null} ** 3, abi.RegisterClass.gp); + try self.register_manager.allocRegs(3, @splat(null), abi.RegisterClass.gp); const tmp_locks = self.register_manager.lockRegsAssumeUnused(3, tmp_regs); defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock); @@ -7868,7 +8019,7 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void { const dst_reg = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv.getReg().? else - try self.register_manager.allocReg(inst, self.regClassForType(ty)); + try self.register_manager.allocReg(inst, self.regSetForType(ty)); const dst_alias = registerAlias(dst_reg, abi_size); if (src_mcv.isMemory()) try self.asmRegisterMemory( mir_tag, @@ -7980,7 +8131,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { if (src_mcv.isMemory()) try self.asmRegisterMemory( .{ .v_ps, .cvtph2 }, wide_reg, - try src_mcv.mem(self, Memory.Size.fromSize( + try src_mcv.mem(self, .fromSize( @intCast(@divExact(wide_reg.bitSize(), 16)), )), ) else try self.asmRegisterRegister( @@ -8026,7 +8177,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { mir_tag, dst_reg, dst_reg, - try src_mcv.mem(self, Memory.Size.fromSize(abi_size)), + try src_mcv.mem(self, .fromSize(abi_size)), ) else try self.asmRegisterRegisterRegister( mir_tag, dst_reg, @@ -8039,7 +8190,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { else => if (src_mcv.isMemory()) try self.asmRegisterMemory( mir_tag, dst_reg, - try src_mcv.mem(self, Memory.Size.fromSize(abi_size)), + try src_mcv.mem(self, .fromSize(abi_size)), ) else try self.asmRegisterRegister( mir_tag, dst_reg, @@ -8136,7 +8287,7 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) Inn const zcu = pt.zcu; const ptr_info = ptr_ty.ptrInfo(zcu); - const val_ty = Type.fromInterned(ptr_info.child); + const val_ty: Type = .fromInterned(ptr_info.child); if (!val_ty.hasRuntimeBitsIgnoreComptime(zcu)) return; const val_abi_size: u32 = @intCast(val_ty.abiSize(zcu)); @@ -8204,7 +8355,7 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) Inn try self.asmRegisterMemory(.{ ._, .mov }, load_reg, .{ .base = .{ .reg = ptr_reg }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(load_abi_size), + .size = .fromSize(load_abi_size), .disp = val_byte_off, } }, }); @@ -8220,14 +8371,14 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) Inn try self.asmRegisterMemory(.{ ._, .mov }, dst_alias, .{ .base = .{ .reg = ptr_reg }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(val_abi_size), + .size = .fromSize(val_abi_size), .disp = val_byte_off, } }, }); try self.asmRegisterMemory(.{ ._, .mov }, tmp_reg, .{ .base = .{ .reg = ptr_reg }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(val_abi_size), + .size = .fromSize(val_abi_size), .disp = val_byte_off + limb_abi_size, } }, }); @@ -8297,8 +8448,8 @@ fn airLoad(self: *Self, inst: Air.Inst.Index) !void { const ptr_ty = self.typeOf(ty_op.operand); const elem_size = elem_ty.abiSize(zcu); - const elem_rc = self.regClassForType(elem_ty); - const ptr_rc = self.regClassForType(ptr_ty); + const elem_rc = self.regSetForType(elem_ty); + const ptr_rc = self.regSetForType(ptr_ty); const ptr_mcv = try self.resolveInst(ty_op.operand); const dst_mcv = if (elem_size <= 8 and elem_rc.supersetOf(ptr_rc) and @@ -8345,7 +8496,7 @@ fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) In const pt = self.pt; const zcu = pt.zcu; const ptr_info = ptr_ty.ptrInfo(zcu); - const src_ty = Type.fromInterned(ptr_info.child); + const src_ty: Type = .fromInterned(ptr_info.child); if (!src_ty.hasRuntimeBitsIgnoreComptime(zcu)) return; const limb_abi_size: u16 = @min(ptr_info.packed_offset.host_size, 8); @@ -8373,7 +8524,7 @@ fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) In const limb_mem: Memory = .{ .base = .{ .reg = ptr_reg }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(limb_abi_size), + .size = .fromSize(limb_abi_size), .disp = src_byte_off + limb_i * limb_abi_size, } }, }; @@ -8563,10 +8714,10 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { const index = extra.field_index; const container_ty = self.typeOf(operand); - const container_rc = self.regClassForType(container_ty); + const container_rc = self.regSetForType(container_ty); const field_ty = container_ty.fieldType(index, zcu); if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none; - const field_rc = self.regClassForType(field_ty); + const field_rc = self.regSetForType(field_ty); const field_is_gp = field_rc.supersetOf(abi.RegisterClass.gp); const src_mcv = try self.resolveInst(operand); @@ -8624,7 +8775,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { const dst_regs: [2]Register = if (field_rc.supersetOf(container_rc) and self.reuseOperand(inst, operand, 0, src_mcv)) src_regs else dst: { const dst_regs = - try self.register_manager.allocRegs(2, .{null} ** 2, field_rc); + try self.register_manager.allocRegs(2, @splat(null), field_rc); const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs); defer for (dst_locks) |lock| self.register_manager.unlockReg(lock); @@ -8786,7 +8937,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegisterMemory(.{ ._, .mov }, load_reg, .{ .base = .{ .frame = frame_addr.index }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(load_abi_size), + .size = .fromSize(load_abi_size), .disp = frame_addr.off + field_byte_off, } }, }); @@ -8807,7 +8958,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { .{ .base = .{ .frame = frame_addr.index }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(field_abi_size), + .size = .fromSize(field_abi_size), .disp = frame_addr.off + field_byte_off, } }, }, @@ -8815,7 +8966,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegisterMemory(.{ ._, .mov }, tmp_reg, .{ .base = .{ .frame = frame_addr.index }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(field_abi_size), + .size = .fromSize(field_abi_size), .disp = frame_addr.off + field_byte_off + limb_abi_size, } }, }); @@ -8977,12 +9128,12 @@ fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv: MC try self.genSetReg(addr_reg, Type.usize, dst_mcv.address(), .{}); try self.asmMemory(mir_tag, .{ .base = .{ .reg = addr_reg }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(abi_size), + .size = .fromSize(abi_size), } } }); }, .indirect, .load_frame => try self.asmMemory( mir_tag, - try dst_mcv.mem(self, Memory.Size.fromSize(abi_size)), + try dst_mcv.mem(self, .fromSize(abi_size)), ), } } @@ -9024,7 +9175,7 @@ fn genShiftBinOpMir( }; defer if (rcx_lock) |lock| self.register_manager.unlockReg(lock); - const temp_regs = try self.register_manager.allocRegs(4, .{null} ** 4, abi.RegisterClass.gp); + const temp_regs = try self.register_manager.allocRegs(4, @splat(null), abi.RegisterClass.gp); const temp_locks = self.register_manager.lockRegsAssumeUnused(4, temp_regs); defer for (temp_locks) |lock| self.register_manager.unlockReg(lock); @@ -9576,7 +9727,7 @@ fn genShiftBinOpMir( .memory => |addr| .{ .base = .{ .reg = .ds }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(abi_size), + .size = .fromSize(abi_size), .disp = std.math.cast(i32, @as(i64, @bitCast(addr))) orelse return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{ @tagName(lhs_mcv), @@ -9587,14 +9738,14 @@ fn genShiftBinOpMir( .indirect => |reg_off| .{ .base = .{ .reg = reg_off.reg }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(abi_size), + .size = .fromSize(abi_size), .disp = reg_off.off, } }, }, .load_frame => |frame_addr| .{ .base = .{ .frame = frame_addr.index }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(abi_size), + .size = .fromSize(abi_size), .disp = frame_addr.off, } }, }, @@ -9792,7 +9943,7 @@ fn genMulDivBinOp( ); const temp_regs = - try self.register_manager.allocRegs(4, .{null} ** 4, abi.RegisterClass.gp); + try self.register_manager.allocRegs(4, @splat(null), abi.RegisterClass.gp); const temp_locks = self.register_manager.lockRegsAssumeUnused(4, temp_regs); defer for (temp_locks) |lock| self.register_manager.unlockReg(lock); @@ -10208,7 +10359,7 @@ fn genBinOp( mir_tag, dst_reg, dst_reg, - try src_mcv.mem(self, Memory.Size.fromBitSize(float_bits)), + try src_mcv.mem(self, .fromBitSize(float_bits)), ) else try self.asmRegisterRegisterRegister( mir_tag, dst_reg, @@ -10227,7 +10378,7 @@ fn genBinOp( if (src_mcv.isMemory()) try self.asmRegisterMemory( mir_tag, dst_reg, - try src_mcv.mem(self, Memory.Size.fromBitSize(float_bits)), + try src_mcv.mem(self, .fromBitSize(float_bits)), ) else try self.asmRegisterRegister( mir_tag, dst_reg, @@ -10358,14 +10509,14 @@ fn genBinOp( const locks = self.register_manager.lockRegsAssumeUnused(2, lhs_regs); break :locks .{ locks[0], locks[1] }; }, - else => .{null} ** 2, + else => @splat(null), }; defer for (lhs_locks) |lhs_lock| if (lhs_lock) |lock| self.register_manager.unlockReg(lock); const rhs_locks: [2]?RegisterLock = switch (rhs_mcv) { .register => |rhs_reg| .{ self.register_manager.lockReg(rhs_reg), null }, .register_pair => |rhs_regs| self.register_manager.lockRegs(2, rhs_regs), - else => .{null} ** 2, + else => @splat(null), }; defer for (rhs_locks) |rhs_lock| if (rhs_lock) |lock| self.register_manager.unlockReg(lock); @@ -10398,7 +10549,7 @@ fn genBinOp( const dst_locks: [2]?RegisterLock = switch (dst_mcv) { .register => |dst_reg| .{ self.register_manager.lockReg(dst_reg), null }, .register_pair => |dst_regs| self.register_manager.lockRegs(2, dst_regs), - else => .{null} ** 2, + else => @splat(null), }; defer for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock); @@ -10414,7 +10565,7 @@ fn genBinOp( const src_locks: [2]?RegisterLock = switch (src_mcv) { .register => |src_reg| .{ self.register_manager.lockReg(src_reg), null }, .register_pair => |src_regs| self.register_manager.lockRegs(2, src_regs), - else => .{null} ** 2, + else => @splat(null), }; defer for (src_locks) |src_lock| if (src_lock) |lock| self.register_manager.unlockReg(lock); @@ -10472,11 +10623,7 @@ fn genBinOp( const dst_regs = switch (dst_mcv) { .register_pair => |dst_regs| dst_regs, else => dst: { - const dst_regs = try self.register_manager.allocRegs( - 2, - .{null} ** 2, - abi.RegisterClass.gp, - ); + const dst_regs = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp); const dst_regs_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs); defer for (dst_regs_locks) |lock| self.register_manager.unlockReg(lock); @@ -10624,21 +10771,21 @@ fn genBinOp( .memory => |addr| .{ .base = .{ .reg = .ds }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(cmov_abi_size), + .size = .fromSize(cmov_abi_size), .disp = @intCast(@as(i64, @bitCast(addr))), } }, }, .indirect => |reg_off| .{ .base = .{ .reg = reg_off.reg }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(cmov_abi_size), + .size = .fromSize(cmov_abi_size), .disp = reg_off.off, } }, }, .load_frame => |frame_addr| .{ .base = .{ .frame = frame_addr.index }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(cmov_abi_size), + .size = .fromSize(cmov_abi_size), .disp = frame_addr.off, } }, }, @@ -11433,8 +11580,8 @@ fn genBinOp( dst_reg, lhs_reg, try src_mcv.mem(self, switch (lhs_ty.zigTypeTag(zcu)) { - else => Memory.Size.fromSize(abi_size), - .vector => Memory.Size.fromBitSize(dst_reg.bitSize()), + else => .fromSize(abi_size), + .vector => .fromBitSize(dst_reg.bitSize()), }), ) else try self.asmRegisterRegisterRegister( mir_tag, @@ -11451,8 +11598,8 @@ fn genBinOp( mir_tag, dst_reg, try src_mcv.mem(self, switch (lhs_ty.zigTypeTag(zcu)) { - else => Memory.Size.fromSize(abi_size), - .vector => Memory.Size.fromBitSize(dst_reg.bitSize()), + else => .fromSize(abi_size), + .vector => .fromBitSize(dst_reg.bitSize()), }), ) else try self.asmRegisterRegister( mir_tag, @@ -11479,8 +11626,8 @@ fn genBinOp( dst_reg, lhs_reg, try src_mcv.mem(self, switch (lhs_ty.zigTypeTag(zcu)) { - else => Memory.Size.fromSize(abi_size), - .vector => Memory.Size.fromBitSize(dst_reg.bitSize()), + else => .fromSize(abi_size), + .vector => .fromBitSize(dst_reg.bitSize()), }), imm, ) else try self.asmRegisterRegisterRegisterImmediate( @@ -11499,8 +11646,8 @@ fn genBinOp( mir_tag, dst_reg, try src_mcv.mem(self, switch (lhs_ty.zigTypeTag(zcu)) { - else => Memory.Size.fromSize(abi_size), - .vector => Memory.Size.fromBitSize(dst_reg.bitSize()), + else => .fromSize(abi_size), + .vector => .fromBitSize(dst_reg.bitSize()), }), imm, ) else try self.asmRegisterRegisterImmediate( @@ -11707,11 +11854,11 @@ fn genBinOp( const unsigned_ty = try lhs_ty.toUnsigned(pt); const not_mcv = try self.genTypedValue(try unsigned_ty.maxInt(pt, unsigned_ty)); const not_mem: Memory = if (not_mcv.isMemory()) - try not_mcv.mem(self, Memory.Size.fromSize(abi_size)) + try not_mcv.mem(self, .fromSize(abi_size)) else .{ .base = .{ .reg = try self.copyToTmpRegister(Type.usize, not_mcv.address()), - }, .mod = .{ .rm = .{ .size = Memory.Size.fromSize(abi_size) } } }; + }, .mod = .{ .rm = .{ .size = .fromSize(abi_size) } } }; switch (mir_tag[0]) { .vp_b, .vp_d, .vp_q, .vp_w => try self.asmRegisterRegisterMemory( .{ .vp_, .xor }, @@ -11891,21 +12038,21 @@ fn genBinOpMir( .memory => |addr| .{ .base = .{ .reg = .ds }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(limb_abi_size), + .size = .fromSize(limb_abi_size), .disp = std.math.cast(i32, addr + off) orelse break :direct, } }, }, .indirect => |reg_off| .{ .base = .{ .reg = reg_off.reg }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(limb_abi_size), + .size = .fromSize(limb_abi_size), .disp = reg_off.off + off, } }, }, .load_frame => |frame_addr| .{ .base = .{ .frame = frame_addr.index }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(limb_abi_size), + .size = .fromSize(limb_abi_size), .disp = frame_addr.off + off, } }, }, @@ -12054,21 +12201,21 @@ fn genBinOpMir( => .{ .base = .{ .reg = dst_info.?.addr_reg }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(limb_abi_size), + .size = .fromSize(limb_abi_size), .disp = off, } }, }, .indirect => |reg_off| .{ .base = .{ .reg = reg_off.reg }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(limb_abi_size), + .size = .fromSize(limb_abi_size), .disp = reg_off.off + off, } }, }, .load_frame => |frame_addr| .{ .base = .{ .frame = frame_addr.index }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(limb_abi_size), + .size = .fromSize(limb_abi_size), .disp = frame_addr.off + off, } }, }, @@ -12277,7 +12424,7 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M .memory => |addr| .{ .base = .{ .reg = .ds }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(abi_size), + .size = .fromSize(abi_size), .disp = std.math.cast(i32, @as(i64, @bitCast(addr))) orelse return self.asmRegisterRegister( .{ .i_, .mul }, @@ -12292,14 +12439,14 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M .indirect => |reg_off| .{ .base = .{ .reg = reg_off.reg }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(abi_size), + .size = .fromSize(abi_size), .disp = reg_off.off, } }, }, .load_frame => |frame_addr| .{ .base = .{ .frame = frame_addr.index }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(abi_size), + .size = .fromSize(abi_size), .disp = frame_addr.off, } }, }, @@ -12475,7 +12622,7 @@ fn genLocalDebugInfo( self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op.operand, ), }; - const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(ty, self.pt.zcu)); + const frame_index = try self.allocFrameIndex(.initSpill(ty, self.pt.zcu)); try self.genSetMem(.{ .frame = frame_index }, 0, ty, mcv, .{}); try self.asmAirMemory(.dbg_local, inst, .{ .base = .{ .frame = frame_index }, @@ -12602,7 +12749,7 @@ fn genCall(self: *Self, info: union(enum) { const frame_indices = try allocator.alloc(FrameIndex, args.len); defer allocator.free(frame_indices); - var reg_locks = std.ArrayList(?RegisterLock).init(allocator); + var reg_locks: std.ArrayList(?RegisterLock) = .init(allocator); defer reg_locks.deinit(); try reg_locks.ensureTotalCapacity(16); defer for (reg_locks.items) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock); @@ -12612,7 +12759,7 @@ fn genCall(self: *Self, info: union(enum) { // We need a properly aligned and sized call frame to be able to call this function. { - const needed_call_frame = FrameAlloc.init(.{ + const needed_call_frame: FrameAlloc = .init(.{ .size = call_info.stack_byte_count, .alignment = call_info.stack_align, }); @@ -12647,7 +12794,7 @@ fn genCall(self: *Self, info: union(enum) { try reg_locks.appendSlice(&self.register_manager.lockRegs(2, regs)); }, .indirect => |reg_off| { - frame_index.* = try self.allocFrameIndex(FrameAlloc.initType(arg_ty, zcu)); + frame_index.* = try self.allocFrameIndex(.initType(arg_ty, zcu)); try self.genSetMem(.{ .frame = frame_index.* }, 0, arg_ty, src_arg, .{}); try self.register_manager.getReg(reg_off.reg, null); try reg_locks.append(self.register_manager.lockReg(reg_off.reg)); @@ -12718,8 +12865,8 @@ fn genCall(self: *Self, info: union(enum) { switch (call_info.return_value.long) { .none, .unreach => {}, .indirect => |reg_off| { - const ret_ty = Type.fromInterned(fn_info.return_type); - const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(ret_ty, zcu)); + const ret_ty: Type = .fromInterned(fn_info.return_type); + const frame_index = try self.allocFrameIndex(.initSpill(ret_ty, zcu)); try self.genSetReg(reg_off.reg, Type.usize, .{ .lea_frame = .{ .index = frame_index, .off = -reg_off.off }, }, .{}); @@ -12936,7 +13083,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: std.math.CompareOperator) !void self.register_manager.lockRegAssumeUnused(lhs_ro.reg), null, }, - else => .{null} ** 2, + else => @splat(null), }; defer for (lhs_locks) |lhs_lock| if (lhs_lock) |lock| self.register_manager.unlockReg(lock); @@ -12945,7 +13092,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: std.math.CompareOperator) !void .register => |rhs_reg| .{ self.register_manager.lockReg(rhs_reg), null }, .register_pair => |rhs_regs| self.register_manager.lockRegs(2, rhs_regs), .register_offset => |rhs_ro| .{ self.register_manager.lockReg(rhs_ro.reg), null }, - else => .{null} ** 2, + else => @splat(null), }; defer for (rhs_locks) |rhs_lock| if (rhs_lock) |lock| self.register_manager.unlockReg(lock); @@ -13080,7 +13227,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: std.math.CompareOperator) !void if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; defer if (src_lock) |lock| self.register_manager.unlockReg(lock); - break :result Condition.fromCompareOperator( + break :result .fromCompareOperator( if (ty.isAbiInt(zcu)) ty.intInfo(zcu).signedness else .unsigned, result_op: { const flipped_op = if (flipped) op.reverse() else op; @@ -13212,11 +13359,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: std.math.CompareOperator) !void defer if (src_info) |info| self.register_manager.unlockReg(info.addr_lock); - const regs = try self.register_manager.allocRegs( - 2, - .{null} ** 2, - abi.RegisterClass.gp, - ); + const regs = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp); const acc_reg = regs[0].to64(); const locks = self.register_manager.lockRegsAssumeUnused(2, regs); defer for (locks) |lock| self.register_manager.unlockReg(lock); @@ -13367,7 +13510,7 @@ fn airCmpVector(self: *Self, inst: Air.Inst.Index) !void { const extra = self.air.extraData(Air.VectorCmp, ty_pl.payload).data; const dst_mcv = try self.genBinOp( inst, - Air.Inst.Tag.fromCmpOp(extra.compareOperator(), false), + .fromCmpOp(extra.compareOperator(), false), extra.lhs, extra.rhs, ); @@ -13399,7 +13542,7 @@ fn airCmpLtErrorsLen(self: *Self, inst: Air.Inst.Index) !void { registerAlias(dst_reg, op_abi_size), .{ .base = .{ .reg = addr_reg }, - .mod = .{ .rm = .{ .size = Memory.Size.fromSize(op_abi_size) } }, + .mod = .{ .rm = .{ .size = .fromSize(op_abi_size) } }, }, ); @@ -13682,7 +13825,7 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC .{ .base = .{ .reg = addr_reg }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(some_abi_size), + .size = .fromSize(some_abi_size), .disp = some_info.off, } }, }, @@ -13699,14 +13842,14 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC .indirect => |reg_off| .{ .base = .{ .reg = reg_off.reg }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(some_abi_size), + .size = .fromSize(some_abi_size), .disp = reg_off.off + some_info.off, } }, }, .load_frame => |frame_addr| .{ .base = .{ .frame = frame_addr.index }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(some_abi_size), + .size = .fromSize(some_abi_size), .disp = frame_addr.off + some_info.off, } }, }, @@ -13745,7 +13888,7 @@ fn isNullPtr(self: *Self, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) .{ .base = .{ .reg = ptr_reg }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(some_abi_size), + .size = .fromSize(some_abi_size), .disp = some_info.off, } }, }, @@ -13968,7 +14111,7 @@ fn airBlock(self: *Self, inst: Air.Inst.Index) !void { fn lowerBlock(self: *Self, inst: Air.Inst.Index, body: []const Air.Inst.Index) !void { // A block is a setup to be able to jump to the end. const inst_tracking_i = self.inst_tracking.count(); - self.inst_tracking.putAssumeCapacityNoClobber(inst, InstTracking.init(.unreach)); + self.inst_tracking.putAssumeCapacityNoClobber(inst, .init(.unreach)); self.scope_generation += 1; try self.blocks.putNoClobber(self.gpa, inst, .{ .state = self.initRetroactiveState() }); @@ -14140,7 +14283,7 @@ fn airLoopSwitchBr(self: *Self, inst: Air.Inst.Index) !void { try self.genCopy(self.typeOf(switch_br.operand), mat_cond, condition, .{}); break :mat_cond mat_cond; }; - self.inst_tracking.putAssumeCapacityNoClobber(inst, InstTracking.init(mat_cond)); + self.inst_tracking.putAssumeCapacityNoClobber(inst, .init(mat_cond)); // If the condition dies here in this switch instruction, process // that death now instead of later as this has an effect on @@ -14265,7 +14408,7 @@ fn airBr(self: *Self, inst: Air.Inst.Index) !void { } if (first_br) { - block_tracking.* = InstTracking.init(block_result); + block_tracking.* = .init(block_result); try self.saveRetroactiveState(&block_data.state); } else try self.restoreState(block_data.state, &.{}, .{ .emit_instructions = true, @@ -14309,7 +14452,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { extra_i += inputs.len; var result: MCValue = .none; - var args = std.ArrayList(MCValue).init(self.gpa); + var args: std.ArrayList(MCValue) = .init(self.gpa); try args.ensureTotalCapacity(outputs.len + inputs.len); defer { for (args.items) |arg| if (arg.getReg()) |reg| self.register_manager.unlockReg(.{ @@ -14317,7 +14460,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { }); args.deinit(); } - var arg_map = std.StringHashMap(u8).init(self.gpa); + var arg_map: std.StringHashMap(u8) = .init(self.gpa); try arg_map.ensureTotalCapacity(@intCast(outputs.len + inputs.len)); defer arg_map.deinit(); @@ -14645,14 +14788,8 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { }); }; - const Operand = union(enum) { - none, - reg: Register, - mem: Memory, - imm: Immediate, - inst: Mir.Inst.Index, - }; - var ops: [4]Operand = .{.none} ** 4; + var ops: [4]Operand = @splat(.none); + var ops_len: usize = 0; var last_op = false; var op_it = std.mem.splitScalar(u8, mnem_it.rest(), ','); @@ -14847,8 +14984,12 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { try pending_relocs.append(self.gpa, @intCast(self.mir_instructions.len)); op.* = .{ .inst = label_gop.value_ptr.target }; } else return self.fail("invalid operand: '{s}'", .{op_str}); + ops_len += 1; } else if (op_it.next()) |op_str| return self.fail("extra operand: '{s}'", .{op_str}); + // convert from att syntax to intel syntax + std.mem.reverse(Operand, ops[0..ops_len]); + (if (prefix == .directive) switch (mnem_tag) { .@".cfi_def_cfa" => if (ops[0] == .reg and ops[1] == .imm and ops[2] == .none) self.asmPseudoRegisterImmediate(.pseudo_cfi_def_cfa_ri_s, ops[0].reg, ops[1].imm) @@ -14904,68 +15045,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { error.InvalidInstruction, .@".cfi_escape" => error.InvalidInstruction, else => unreachable, - } else switch (ops[0]) { - .none => self.asmOpOnly(mnem_fixed_tag), - .reg => |reg0| switch (ops[1]) { - .none => self.asmRegister(mnem_fixed_tag, reg0), - .reg => |reg1| switch (ops[2]) { - .none => self.asmRegisterRegister(mnem_fixed_tag, reg1, reg0), - .reg => |reg2| switch (ops[3]) { - .none => self.asmRegisterRegisterRegister(mnem_fixed_tag, reg2, reg1, reg0), - else => error.InvalidInstruction, - }, - .mem => |mem2| switch (ops[3]) { - .none => self.asmMemoryRegisterRegister(mnem_fixed_tag, mem2, reg1, reg0), - else => error.InvalidInstruction, - }, - else => error.InvalidInstruction, - }, - .mem => |mem1| switch (ops[2]) { - .none => self.asmMemoryRegister(mnem_fixed_tag, mem1, reg0), - else => error.InvalidInstruction, - }, - else => error.InvalidInstruction, - }, - .mem => |mem0| switch (ops[1]) { - .none => self.asmMemory(mnem_fixed_tag, mem0), - .reg => |reg1| switch (ops[2]) { - .none => self.asmRegisterMemory(mnem_fixed_tag, reg1, mem0), - else => error.InvalidInstruction, - }, - else => error.InvalidInstruction, - }, - .imm => |imm0| switch (ops[1]) { - .none => self.asmImmediate(mnem_fixed_tag, imm0), - .reg => |reg1| switch (ops[2]) { - .none => self.asmRegisterImmediate(mnem_fixed_tag, reg1, imm0), - .reg => |reg2| switch (ops[3]) { - .none => self.asmRegisterRegisterImmediate(mnem_fixed_tag, reg2, reg1, imm0), - .reg => |reg3| self.asmRegisterRegisterRegisterImmediate( - mnem_fixed_tag, - reg3, - reg2, - reg1, - imm0, - ), - else => error.InvalidInstruction, - }, - .mem => |mem2| switch (ops[3]) { - .none => self.asmMemoryRegisterImmediate(mnem_fixed_tag, mem2, reg1, imm0), - else => error.InvalidInstruction, - }, - else => error.InvalidInstruction, - }, - .mem => |mem1| switch (ops[2]) { - .none => self.asmMemoryImmediate(mnem_fixed_tag, mem1, imm0), - else => error.InvalidInstruction, - }, - else => error.InvalidInstruction, - }, - .inst => |inst0| switch (ops[1]) { - .none => self.asmReloc(mnem_fixed_tag, inst0), - else => error.InvalidInstruction, - }, - }) catch |err| switch (err) { + } else self.asmOps(mnem_fixed_tag, ops)) catch |err| switch (err) { error.InvalidInstruction => return self.fail( "invalid instruction: '{s} {s} {s} {s} {s}'", .{ @@ -15000,7 +15080,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { } simple: { - var buf = [1]Air.Inst.Ref{.none} ** (Liveness.bpi - 1); + var buf: [Liveness.bpi - 1]Air.Inst.Ref = @splat(.none); var buf_index: usize = 0; for (outputs) |output| { if (output == .none) continue; @@ -15583,7 +15663,7 @@ fn genSetReg( .load_frame => |frame_addr| try self.moveStrategy( ty, dst_reg.class(), - self.getFrameAddrAlignment(frame_addr).compare(.gte, InternPool.Alignment.fromLog2Units( + self.getFrameAddrAlignment(frame_addr).compare(.gte, .fromLog2Units( std.math.log2_int_ceil(u10, @divExact(dst_reg.bitSize(), 8)), )), ), @@ -15742,7 +15822,7 @@ fn genSetMem( try self.asmMemoryImmediate( .{ ._, .mov }, .{ .base = base, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(abi_size), + .size = .fromSize(abi_size), .disp = disp, } } }, immediate, @@ -15753,7 +15833,7 @@ fn genSetMem( try self.asmMemoryImmediate( .{ ._, .mov }, .{ .base = base, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(abi_size), + .size = .fromSize(abi_size), .disp = disp, } } }, .s(small), @@ -15796,11 +15876,11 @@ fn genSetMem( .general_purpose, .segment, .x87, .ip => @divExact(src_alias.bitSize(), 8), .mmx, .sse => abi_size, }); - const src_align = InternPool.Alignment.fromNonzeroByteUnits( + const src_align: InternPool.Alignment = .fromNonzeroByteUnits( std.math.ceilPowerOfTwoAssert(u32, src_size), ); if (src_size > mem_size) { - const frame_index = try self.allocFrameIndex(FrameAlloc.init(.{ + const frame_index = try self.allocFrameIndex(.init(.{ .size = src_size, .alignment = src_align, })); @@ -15808,7 +15888,7 @@ fn genSetMem( try (try self.moveStrategy(ty, src_alias.class(), true)).write( self, .{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{ - .size = Memory.Size.fromSize(src_size), + .size = .fromSize(src_size), } } }, src_alias, ); @@ -15828,7 +15908,7 @@ fn genSetMem( })).write( self, .{ .base = base, .mod = .{ .rm = .{ - .size = Memory.Size.fromBitSize(@min(self.memSize(ty).bitSize(), src_alias.bitSize())), + .size = .fromBitSize(@min(self.memSize(ty).bitSize(), src_alias.bitSize())), .disp = disp, } } }, src_alias, @@ -16037,7 +16117,7 @@ fn genLazySymbolRef( .mov => try self.asmRegisterMemory( .{ ._, tag }, reg.to64(), - Memory.initSib(.qword, .{ .base = .{ .reg = reg.to64() } }), + .initSib(.qword, .{ .base = .{ .reg = reg.to64() } }), ), else => unreachable, } @@ -16108,8 +16188,8 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void { else => if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv, }; - const dst_rc = self.regClassForType(dst_ty); - const src_rc = self.regClassForType(src_ty); + const dst_rc = self.regSetForType(dst_ty); + const src_rc = self.regSetForType(src_ty); const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; defer if (src_lock) |lock| self.register_manager.unlockReg(lock); @@ -16170,7 +16250,7 @@ fn airArrayToSlice(self: *Self, inst: Air.Inst.Index) !void { const array_ty = ptr_ty.childType(zcu); const array_len = array_ty.arrayLen(zcu); - const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(slice_ty, zcu)); + const frame_index = try self.allocFrameIndex(.initSpill(slice_ty, zcu)); try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, ptr, .{}); try self.genSetMem( .{ .frame = frame_index }, @@ -16236,7 +16316,7 @@ fn airFloatFromInt(self: *Self, inst: Air.Inst.Index) !void { if (src_bits < src_size * 8) try self.truncateRegister(src_ty, src_reg); - const dst_reg = try self.register_manager.allocReg(inst, self.regClassForType(dst_ty)); + const dst_reg = try self.register_manager.allocReg(inst, self.regSetForType(dst_ty)); const dst_mcv = MCValue{ .register = dst_reg }; const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); defer self.register_manager.unlockReg(dst_lock); @@ -16314,7 +16394,7 @@ fn airIntFromFloat(self: *Self, inst: Air.Inst.Index) !void { const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); defer self.register_manager.unlockReg(src_lock); - const dst_reg = try self.register_manager.allocReg(inst, self.regClassForType(dst_ty)); + const dst_reg = try self.register_manager.allocReg(inst, self.regSetForType(dst_ty)); const dst_mcv = MCValue{ .register = dst_reg }; const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); defer self.register_manager.unlockReg(dst_lock); @@ -16381,7 +16461,7 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void { defer if (new_lock) |lock| self.register_manager.unlockReg(lock); const ptr_mcv = try self.resolveInst(extra.ptr); - const mem_size = Memory.Size.fromSize(val_abi_size); + const mem_size: Memory.Size = .fromSize(val_abi_size); const ptr_mem: Memory = switch (ptr_mcv) { .immediate, .register, .register_offset, .lea_frame => try ptr_mcv.deref().mem(self, mem_size), else => .{ @@ -16448,7 +16528,7 @@ fn atomicOp( defer if (val_lock) |lock| self.register_manager.unlockReg(lock); const val_abi_size: u32 = @intCast(val_ty.abiSize(zcu)); - const mem_size = Memory.Size.fromSize(val_abi_size); + const mem_size: Memory.Size = .fromSize(val_abi_size); const ptr_mem: Memory = switch (ptr_mcv) { .immediate, .register, .register_offset, .lea_frame => try ptr_mcv.deref().mem(self, mem_size), else => .{ @@ -16637,7 +16717,7 @@ fn atomicOp( try self.asmCmovccRegisterMemory( cc, registerAlias(tmp_reg, cmov_abi_size), - try val_mcv.mem(self, Memory.Size.fromSize(cmov_abi_size)), + try val_mcv.mem(self, .fromSize(cmov_abi_size)), ); }, else => { @@ -17069,7 +17149,7 @@ fn airTagName(self: *Self, inst: Air.Inst.Index) !void { // We need a properly aligned and sized call frame to be able to call this function. { - const needed_call_frame = FrameAlloc.init(.{ + const needed_call_frame: FrameAlloc = .init(.{ .size = inst_ty.abiSize(zcu), .alignment = inst_ty.abiAlignment(zcu), }); @@ -17207,7 +17287,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; const vector_ty = self.typeOfIndex(inst); const vector_len = vector_ty.vectorLen(zcu); - const dst_rc = self.regClassForType(vector_ty); + const dst_rc = self.regSetForType(vector_ty); const scalar_ty = self.typeOf(ty_op.operand); const result: MCValue = result: { @@ -17675,7 +17755,7 @@ fn airSelect(self: *Self, inst: Air.Inst.Index) !void { .ty = mask_elem_ty.toIntern(), .storage = .{ .u64 = bit / elem_bits }, } }); - const mask_mcv = try self.genTypedValue(Value.fromInterned(try pt.intern(.{ .aggregate = .{ + const mask_mcv = try self.genTypedValue(.fromInterned(try pt.intern(.{ .aggregate = .{ .ty = mask_ty.toIntern(), .storage = .{ .elems = mask_elems[0..vec_len] }, } }))); @@ -17700,7 +17780,7 @@ fn airSelect(self: *Self, inst: Air.Inst.Index) !void { .ty = mask_elem_ty.toIntern(), .storage = .{ .u64 = @as(u32, 1) << @intCast(bit & (elem_bits - 1)) }, } }); - const mask_mcv = try self.genTypedValue(Value.fromInterned(try pt.intern(.{ .aggregate = .{ + const mask_mcv = try self.genTypedValue(.fromInterned(try pt.intern(.{ .aggregate = .{ .ty = mask_ty.toIntern(), .storage = .{ .elems = mask_elems[0..vec_len] }, } }))); @@ -17853,7 +17933,7 @@ fn airSelect(self: *Self, inst: Air.Inst.Index) !void { if (rhs_mcv.isMemory()) try self.asmRegisterMemory( .{ mir_fixes, .andn }, mask_alias, - try rhs_mcv.mem(self, Memory.Size.fromSize(abi_size)), + try rhs_mcv.mem(self, .fromSize(abi_size)), ) else try self.asmRegisterRegister( .{ mir_fixes, .andn }, mask_alias, @@ -17934,7 +18014,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { else self.hasFeature(.avx2)) 32 else 16)) break :unpck; - var sources = [1]?u1{null} ** 2; + var sources: [2]?u1 = @splat(null); for (mask_elems, 0..) |maybe_mask_elem, elem_index| { const mask_elem = maybe_mask_elem orelse continue; const mask_elem_index = @@ -17994,7 +18074,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { mir_tag, dst_alias, registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), - try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)), + try rhs_mcv.mem(self, .fromSize(max_abi_size)), ) else try self.asmRegisterRegisterRegister( mir_tag, dst_alias, @@ -18006,7 +18086,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemory( mir_tag, dst_alias, - try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)), + try rhs_mcv.mem(self, .fromSize(max_abi_size)), ) else try self.asmRegisterRegister( mir_tag, dst_alias, @@ -18023,7 +18103,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { if (max_abi_size > @as(u32, if (has_avx) 32 else 16)) break :pshufd; var control: u8 = 0b00_00_00_00; - var sources = [1]?u1{null} ** 1; + var sources: [1]?u1 = @splat(null); for (mask_elems, 0..) |maybe_mask_elem, elem_index| { const mask_elem = maybe_mask_elem orelse continue; const mask_elem_index: u3 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem); @@ -18055,7 +18135,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( .{ if (has_avx) .vp_d else .p_d, .shuf }, dst_alias, - try src_mcv.mem(self, Memory.Size.fromSize(max_abi_size)), + try src_mcv.mem(self, .fromSize(max_abi_size)), .u(control), ) else try self.asmRegisterRegisterImmediate( .{ if (has_avx) .vp_d else .p_d, .shuf }, @@ -18074,7 +18154,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { if (max_abi_size > @as(u32, if (has_avx) 32 else 16)) break :shufps; var control: u8 = 0b00_00_00_00; - var sources = [1]?u1{null} ** 2; + var sources: [2]?u1 = @splat(null); for (mask_elems, 0..) |maybe_mask_elem, elem_index| { const mask_elem = maybe_mask_elem orelse continue; const mask_elem_index: u3 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem); @@ -18112,7 +18192,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { .{ .v_ps, .shuf }, dst_alias, registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), - try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)), + try rhs_mcv.mem(self, .fromSize(max_abi_size)), .u(control), ) else try self.asmRegisterRegisterRegisterImmediate( .{ .v_ps, .shuf }, @@ -18126,7 +18206,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate( .{ ._ps, .shuf }, dst_alias, - try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)), + try rhs_mcv.mem(self, .fromSize(max_abi_size)), .u(control), ) else try self.asmRegisterRegisterImmediate( .{ ._ps, .shuf }, @@ -18145,7 +18225,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { if (max_abi_size > @as(u32, if (has_avx) 32 else 16)) break :shufpd; var control: u4 = 0b0_0_0_0; - var sources = [1]?u1{null} ** 2; + var sources: [2]?u1 = @splat(null); for (mask_elems, 0..) |maybe_mask_elem, elem_index| { const mask_elem = maybe_mask_elem orelse continue; const mask_elem_index: u2 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem); @@ -18179,7 +18259,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { .{ .v_pd, .shuf }, dst_alias, registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), - try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)), + try rhs_mcv.mem(self, .fromSize(max_abi_size)), .u(control), ) else try self.asmRegisterRegisterRegisterImmediate( .{ .v_pd, .shuf }, @@ -18193,7 +18273,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate( .{ ._pd, .shuf }, dst_alias, - try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)), + try rhs_mcv.mem(self, .fromSize(max_abi_size)), .u(control), ) else try self.asmRegisterRegisterImmediate( .{ ._pd, .shuf }, @@ -18249,7 +18329,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { .{ .vp_d, .blend }, registerAlias(dst_reg, dst_abi_size), registerAlias(lhs_reg, dst_abi_size), - try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)), + try rhs_mcv.mem(self, .fromSize(dst_abi_size)), .u(expanded_control), ) else try self.asmRegisterRegisterRegisterImmediate( .{ .vp_d, .blend }, @@ -18304,7 +18384,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { lhs_mcv.getReg().? else dst_reg, dst_abi_size), - try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)), + try rhs_mcv.mem(self, .fromSize(dst_abi_size)), .u(expanded_control), ) else try self.asmRegisterRegisterRegisterImmediate( .{ .vp_w, .blend }, @@ -18321,7 +18401,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate( .{ .p_w, .blend }, registerAlias(dst_reg, dst_abi_size), - try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)), + try rhs_mcv.mem(self, .fromSize(dst_abi_size)), .u(expanded_control), ) else try self.asmRegisterRegisterImmediate( .{ .p_w, .blend }, @@ -18365,7 +18445,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { lhs_mcv.getReg().? else dst_reg, dst_abi_size), - try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)), + try rhs_mcv.mem(self, .fromSize(dst_abi_size)), .u(expanded_control), ) else try self.asmRegisterRegisterRegisterImmediate( switch (elem_abi_size) { @@ -18390,7 +18470,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { else => unreachable, }, registerAlias(dst_reg, dst_abi_size), - try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)), + try rhs_mcv.mem(self, .fromSize(dst_abi_size)), .u(expanded_control), ) else try self.asmRegisterRegisterImmediate( switch (elem_abi_size) { @@ -18435,7 +18515,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { else try select_mask_elem_ty.minIntScalar(pt, select_mask_elem_ty)).toIntern(); } - const select_mask_mcv = try self.genTypedValue(Value.fromInterned(try pt.intern(.{ .aggregate = .{ + const select_mask_mcv = try self.genTypedValue(.fromInterned(try pt.intern(.{ .aggregate = .{ .ty = select_mask_ty.toIntern(), .storage = .{ .elems = select_mask_elems[0..mask_elems.len] }, } }))); @@ -18480,7 +18560,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { registerAlias(lhs_mcv.getReg().?, dst_abi_size) else dst_alias, - try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)), + try rhs_mcv.mem(self, .fromSize(dst_abi_size)), select_mask_alias, ) else try self.asmRegisterRegisterRegisterRegister( mir_tag, @@ -18497,7 +18577,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryRegister( mir_tag, dst_alias, - try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)), + try rhs_mcv.mem(self, .fromSize(dst_abi_size)), select_mask_alias, ) else try self.asmRegisterRegisterRegister( mir_tag, @@ -18540,7 +18620,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { if (lhs_mcv.isMemory()) try self.asmRegisterMemory( .{ mir_fixes, .andn }, mask_alias, - try lhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)), + try lhs_mcv.mem(self, .fromSize(dst_abi_size)), ) else try self.asmRegisterRegister( .{ mir_fixes, .andn }, mask_alias, @@ -18583,13 +18663,13 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { } }); } const lhs_mask_ty = try pt.vectorType(.{ .len = max_abi_size, .child = .u8_type }); - const lhs_mask_mcv = try self.genTypedValue(Value.fromInterned(try pt.intern(.{ .aggregate = .{ + const lhs_mask_mcv = try self.genTypedValue(.fromInterned(try pt.intern(.{ .aggregate = .{ .ty = lhs_mask_ty.toIntern(), .storage = .{ .elems = lhs_mask_elems[0..max_abi_size] }, } }))); const lhs_mask_mem: Memory = .{ .base = .{ .reg = try self.copyToTmpRegister(Type.usize, lhs_mask_mcv.address()) }, - .mod = .{ .rm = .{ .size = Memory.Size.fromSize(@max(max_abi_size, 16)) } }, + .mod = .{ .rm = .{ .size = .fromSize(@max(max_abi_size, 16)) } }, }; if (has_avx) try self.asmRegisterRegisterMemory( .{ .vp_b, .shuf }, @@ -18617,13 +18697,13 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { } }); } const rhs_mask_ty = try pt.vectorType(.{ .len = max_abi_size, .child = .u8_type }); - const rhs_mask_mcv = try self.genTypedValue(Value.fromInterned(try pt.intern(.{ .aggregate = .{ + const rhs_mask_mcv = try self.genTypedValue(.fromInterned(try pt.intern(.{ .aggregate = .{ .ty = rhs_mask_ty.toIntern(), .storage = .{ .elems = rhs_mask_elems[0..max_abi_size] }, } }))); const rhs_mask_mem: Memory = .{ .base = .{ .reg = try self.copyToTmpRegister(Type.usize, rhs_mask_mcv.address()) }, - .mod = .{ .rm = .{ .size = Memory.Size.fromSize(@max(max_abi_size, 16)) } }, + .mod = .{ .rm = .{ .size = .fromSize(@max(max_abi_size, 16)) } }, }; if (has_avx) try self.asmRegisterRegisterMemory( .{ .vp_b, .shuf }, @@ -18696,7 +18776,7 @@ fn airReduce(self: *Self, inst: Air.Inst.Index) !void { .Or => { if (operand_mcv.isMemory()) try self.asmMemoryImmediate( .{ ._, .@"test" }, - try operand_mcv.mem(self, Memory.Size.fromSize(abi_size)), + try operand_mcv.mem(self, .fromSize(abi_size)), .u(mask), ) else { const operand_reg = registerAlias(if (operand_mcv.isRegister()) @@ -18745,7 +18825,7 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { const result: MCValue = result: { switch (result_ty.zigTypeTag(zcu)) { .@"struct" => { - const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(result_ty, zcu)); + const frame_index = try self.allocFrameIndex(.initSpill(result_ty, zcu)); if (result_ty.containerLayout(zcu) == .@"packed") { const struct_obj = zcu.typeToStruct(result_ty).?; try self.genInlineMemset( @@ -18885,7 +18965,7 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { } break :result .{ .register = dst_reg }; } else { - const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(result_ty, zcu)); + const frame_index = try self.allocFrameIndex(.initSpill(result_ty, zcu)); const elem_size: u32 = @intCast(elem_ty.abiSize(zcu)); for (elements, 0..) |elem, elem_i| { @@ -18918,7 +18998,7 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { }; if (elements.len <= Liveness.bpi - 1) { - var buf = [1]Air.Inst.Ref{.none} ** (Liveness.bpi - 1); + var buf: [Liveness.bpi - 1]Air.Inst.Ref = @splat(.none); @memcpy(buf[0..elements.len], elements); return self.finishAir(inst, result, buf); } @@ -18952,7 +19032,7 @@ fn airUnionInit(self: *Self, inst: Air.Inst.Index) !void { const union_obj = zcu.typeToUnion(union_ty).?; const field_name = union_obj.loadTagType(ip).names.get(ip)[extra.field_index]; - const tag_ty = Type.fromInterned(union_obj.enum_tag_ty); + const tag_ty: Type = .fromInterned(union_obj.enum_tag_ty); const field_index = tag_ty.enumFieldIndex(field_name, zcu).?; const tag_val = try pt.enumValueFieldIndex(tag_ty, field_index); const tag_int_val = try tag_val.intFromEnum(tag_ty, pt); @@ -19010,10 +19090,10 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { } var mcvs: [3]MCValue = undefined; - var locks = [1]?RegisterManager.RegisterLock{null} ** 3; + var locks: [3]?RegisterManager.RegisterLock = @splat(null); defer for (locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock); - var order = [1]u2{0} ** 3; - var unused = std.StaticBitSet(3).initFull(); + var order: [3]u2 = @splat(0); + var unused: std.StaticBitSet(3) = .initFull(); for (ops, &mcvs, &locks, 0..) |op, *mcv, *lock, op_i| { const op_index: u2 = @intCast(op_i); mcv.* = try self.resolveInst(op); @@ -19139,7 +19219,7 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { mir_tag, mop1_reg, mop2_reg, - try mops[2].mem(self, Memory.Size.fromSize(abi_size)), + try mops[2].mem(self, .fromSize(abi_size)), ); break :result mops[0]; }; @@ -19158,7 +19238,7 @@ fn airVaStart(self: *Self, inst: Air.Inst.Index) !void { )) { .x86_64_sysv => result: { const info = self.va_info.sysv; - const dst_fi = try self.allocFrameIndex(FrameAlloc.initSpill(va_list_ty, zcu)); + const dst_fi = try self.allocFrameIndex(.initSpill(va_list_ty, zcu)); var field_off: u31 = 0; // gp_offset: c_uint, try self.genSetMem( @@ -19221,7 +19301,7 @@ fn airVaArg(self: *Self, inst: Air.Inst.Index) !void { try self.spillEflagsIfOccupied(); const tmp_regs = - try self.register_manager.allocRegs(2, .{null} ** 2, abi.RegisterClass.gp); + try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp); const offset_reg = tmp_regs[0].to32(); const addr_reg = tmp_regs[1].to64(); const tmp_locks = self.register_manager.lockRegsAssumeUnused(2, tmp_regs); @@ -19426,8 +19506,8 @@ fn resolveInst(self: *Self, ref: Air.Inst.Ref) InnerError!MCValue { } else mcv: { const ip_index = ref.toInterned().?; const gop = try self.const_tracking.getOrPut(self.gpa, ip_index); - if (!gop.found_existing) gop.value_ptr.* = InstTracking.init(init: { - const const_mcv = try self.genTypedValue(Value.fromInterned(ip_index)); + if (!gop.found_existing) gop.value_ptr.* = .init(init: { + const const_mcv = try self.genTypedValue(.fromInterned(ip_index)); switch (const_mcv) { .lea_tlv => |tlv_sym| switch (self.bin_file.tag) { .elf, .macho => { @@ -19436,7 +19516,7 @@ fn resolveInst(self: *Self, ref: Air.Inst.Ref) InnerError!MCValue { } else { try self.spillRegisters(&.{.rax}); } - const frame_index = try self.allocFrameIndex(FrameAlloc.init(.{ + const frame_index = try self.allocFrameIndex(.init(.{ .size = 8, .alignment = .@"8", })); @@ -19539,9 +19619,8 @@ fn resolveCallingConventionValues( const param_types = try self.gpa.alloc(Type, fn_info.param_types.len + var_args.len); defer self.gpa.free(param_types); - for (param_types[0..fn_info.param_types.len], fn_info.param_types.get(ip)) |*dest, src| { - dest.* = Type.fromInterned(src); - } + for (param_types[0..fn_info.param_types.len], fn_info.param_types.get(ip)) |*dest, src| + dest.* = .fromInterned(src); for (param_types[fn_info.param_types.len..], var_args) |*param_ty, arg_ty| param_ty.* = self.promoteVarArg(arg_ty); @@ -19556,13 +19635,13 @@ fn resolveCallingConventionValues( }; errdefer self.gpa.free(result.args); - const ret_ty = Type.fromInterned(fn_info.return_type); + const ret_ty: Type = .fromInterned(fn_info.return_type); const resolved_cc = abi.resolveCallingConvention(cc, self.target.*); switch (cc) { .naked => { assert(result.args.len == 0); - result.return_value = InstTracking.init(.unreach); + result.return_value = .init(.unreach); result.stack_align = .@"8"; }, .x86_64_sysv, .x86_64_win => |cc_opts| { @@ -19583,10 +19662,10 @@ fn resolveCallingConventionValues( // Return values if (ret_ty.zigTypeTag(zcu) == .noreturn) { - result.return_value = InstTracking.init(.unreach); + result.return_value = .init(.unreach); } else if (!ret_ty.hasRuntimeBitsIgnoreComptime(zcu)) { // TODO: is this even possible for C calling convention? - result.return_value = InstTracking.init(.none); + result.return_value = .init(.none); } else { var ret_tracking: [2]InstTracking = undefined; var ret_tracking_i: usize = 0; @@ -19604,7 +19683,7 @@ fn resolveCallingConventionValues( ); ret_int_reg_i += 1; - ret_tracking[ret_tracking_i] = InstTracking.init(.{ .register = ret_int_reg }); + ret_tracking[ret_tracking_i] = .init(.{ .register = ret_int_reg }); ret_tracking_i += 1; }, .sse, .float, .float_combine, .win_i128 => { @@ -19614,18 +19693,17 @@ fn resolveCallingConventionValues( ); ret_sse_reg_i += 1; - ret_tracking[ret_tracking_i] = InstTracking.init(.{ .register = ret_sse_reg }); + ret_tracking[ret_tracking_i] = .init(.{ .register = ret_sse_reg }); ret_tracking_i += 1; }, .sseup => assert(ret_tracking[ret_tracking_i - 1].short.register.class() == .sse), .x87 => { - ret_tracking[ret_tracking_i] = InstTracking.init(.{ .register = .st0 }); + ret_tracking[ret_tracking_i] = .init(.{ .register = .st0 }); ret_tracking_i += 1; }, .x87up => assert(ret_tracking[ret_tracking_i - 1].short.register.class() == .x87), .complex_x87 => { - ret_tracking[ret_tracking_i] = - InstTracking.init(.{ .register_pair = .{ .st0, .st1 } }); + ret_tracking[ret_tracking_i] = .init(.{ .register_pair = .{ .st0, .st1 } }); ret_tracking_i += 1; }, .memory => { @@ -19645,8 +19723,9 @@ fn resolveCallingConventionValues( result.return_value = switch (ret_tracking_i) { else => unreachable, 1 => ret_tracking[0], - 2 => InstTracking.init(.{ .register_pair = .{ - ret_tracking[0].short.register, ret_tracking[1].short.register, + 2 => .init(.{ .register_pair = .{ + ret_tracking[0].short.register, + ret_tracking[1].short.register, } }), }; } @@ -19769,9 +19848,9 @@ fn resolveCallingConventionValues( // Return values if (ret_ty.zigTypeTag(zcu) == .noreturn) { - result.return_value = InstTracking.init(.unreach); + result.return_value = .init(.unreach); } else if (!ret_ty.hasRuntimeBitsIgnoreComptime(zcu)) { - result.return_value = InstTracking.init(.none); + result.return_value = .init(.none); } else { const ret_reg = abi.getCAbiIntReturnRegs(resolved_cc)[0]; const ret_ty_size: u31 = @intCast(ret_ty.abiSize(zcu)); @@ -19884,8 +19963,8 @@ fn registerAlias(reg: Register, size_bytes: u32) Register { fn memSize(self: *Self, ty: Type) Memory.Size { const zcu = self.pt.zcu; return switch (ty.zigTypeTag(zcu)) { - .float => Memory.Size.fromBitSize(ty.floatBits(self.target.*)), - else => Memory.Size.fromSize(@intCast(ty.abiSize(zcu))), + .float => .fromBitSize(ty.floatBits(self.target.*)), + else => .fromSize(@intCast(ty.abiSize(zcu))), }; } @@ -20140,7 +20219,7 @@ const Temp = struct { .register => |reg| { const new_reg = try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); - new_temp_index.tracking(self).* = InstTracking.init(.{ .register = new_reg }); + new_temp_index.tracking(self).* = .init(.{ .register = new_reg }); try self.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{ .base = .{ .reg = reg.to64() }, .mod = .{ .rm = .{ @@ -20152,7 +20231,7 @@ const Temp = struct { .register_offset => |reg_off| { const new_reg = try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); - new_temp_index.tracking(self).* = InstTracking.init(.{ .register = new_reg }); + new_temp_index.tracking(self).* = .init(.{ .register = new_reg }); try self.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{ .base = .{ .reg = reg_off.reg.to64() }, .mod = .{ .rm = .{ @@ -20161,14 +20240,14 @@ const Temp = struct { } }, }); }, - .lea_symbol => |sym_off| new_temp_index.tracking(self).* = InstTracking.init(.{ .lea_symbol = .{ + .lea_symbol => |sym_off| new_temp_index.tracking(self).* = .init(.{ .lea_symbol = .{ .sym_index = sym_off.sym_index, .off = sym_off.off + off, } }), .load_frame => |frame_addr| { const new_reg = try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); - new_temp_index.tracking(self).* = InstTracking.init(.{ .register_offset = .{ + new_temp_index.tracking(self).* = .init(.{ .register_offset = .{ .reg = new_reg, .off = off, } }); @@ -20180,7 +20259,7 @@ const Temp = struct { } }, }); }, - .lea_frame => |frame_addr| new_temp_index.tracking(self).* = InstTracking.init(.{ .lea_frame = .{ + .lea_frame => |frame_addr| new_temp_index.tracking(self).* = .init(.{ .lea_frame = .{ .index = frame_addr.index, .off = frame_addr.off + off, } }), @@ -20198,7 +20277,7 @@ const Temp = struct { else => {}, .register => |reg| { try self.freeValue(temp_tracking.long); - temp_tracking.* = InstTracking.init(.{ .register_offset = .{ + temp_tracking.* = .init(.{ .register_offset = .{ .reg = reg, .off = off, } }); @@ -20206,7 +20285,7 @@ const Temp = struct { }, .register_offset => |reg_off| { try self.freeValue(temp_tracking.long); - temp_tracking.* = InstTracking.init(.{ .register_offset = .{ + temp_tracking.* = .init(.{ .register_offset = .{ .reg = reg_off.reg, .off = reg_off.off + off, } }); @@ -20214,7 +20293,7 @@ const Temp = struct { }, .lea_symbol => |sym_off| { assert(std.meta.eql(temp_tracking.long.lea_symbol, sym_off)); - temp_tracking.* = InstTracking.init(.{ .lea_symbol = .{ + temp_tracking.* = .init(.{ .lea_symbol = .{ .sym_index = sym_off.sym_index, .off = sym_off.off + off, } }); @@ -20222,7 +20301,7 @@ const Temp = struct { }, .lea_frame => |frame_addr| { assert(std.meta.eql(temp_tracking.long.lea_frame, frame_addr)); - temp_tracking.* = InstTracking.init(.{ .lea_frame = .{ + temp_tracking.* = .init(.{ .lea_frame = .{ .index = frame_addr.index, .off = frame_addr.off + off, } }); @@ -20243,26 +20322,26 @@ const Temp = struct { else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }), .immediate => |imm| { assert(limb_index == 0); - new_temp_index.tracking(self).* = InstTracking.init(.{ .immediate = imm }); + new_temp_index.tracking(self).* = .init(.{ .immediate = imm }); }, .register => |reg| { assert(limb_index == 0); const new_reg = try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); - new_temp_index.tracking(self).* = InstTracking.init(.{ .register = new_reg }); + new_temp_index.tracking(self).* = .init(.{ .register = new_reg }); try self.asmRegisterRegister(.{ ._, .mov }, new_reg.to64(), reg.to64()); }, .register_pair => |regs| { const new_reg = try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); - new_temp_index.tracking(self).* = InstTracking.init(.{ .register = new_reg }); + new_temp_index.tracking(self).* = .init(.{ .register = new_reg }); try self.asmRegisterRegister(.{ ._, .mov }, new_reg.to64(), regs[limb_index].to64()); }, .register_offset => |reg_off| { assert(limb_index == 0); const new_reg = try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); - new_temp_index.tracking(self).* = InstTracking.init(.{ .register = new_reg }); + new_temp_index.tracking(self).* = .init(.{ .register = new_reg }); try self.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{ .base = .{ .reg = reg_off.reg.to64() }, .mod = .{ .rm = .{ @@ -20274,7 +20353,7 @@ const Temp = struct { .load_symbol => |sym_off| { const new_reg = try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); - new_temp_index.tracking(self).* = InstTracking.init(.{ .register = new_reg }); + new_temp_index.tracking(self).* = .init(.{ .register = new_reg }); try self.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{ .base = .{ .reloc = sym_off.sym_index }, .mod = .{ .rm = .{ @@ -20285,12 +20364,12 @@ const Temp = struct { }, .lea_symbol => |sym_off| { assert(limb_index == 0); - new_temp_index.tracking(self).* = InstTracking.init(.{ .lea_symbol = sym_off }); + new_temp_index.tracking(self).* = .init(.{ .lea_symbol = sym_off }); }, .load_frame => |frame_addr| { const new_reg = try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); - new_temp_index.tracking(self).* = InstTracking.init(.{ .register = new_reg }); + new_temp_index.tracking(self).* = .init(.{ .register = new_reg }); try self.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{ .base = .{ .frame = frame_addr.index }, .mod = .{ .rm = .{ @@ -20301,7 +20380,7 @@ const Temp = struct { }, .lea_frame => |frame_addr| { assert(limb_index == 0); - new_temp_index.tracking(self).* = InstTracking.init(.{ .lea_frame = frame_addr }); + new_temp_index.tracking(self).* = .init(.{ .lea_frame = frame_addr }); }, } self.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); @@ -20328,13 +20407,13 @@ const Temp = struct { } for (regs, 0..) |reg, reg_index| if (reg_index != limb_index) self.register_manager.freeReg(reg); - temp_tracking.* = InstTracking.init(.{ .register = regs[limb_index] }); + temp_tracking.* = .init(.{ .register = regs[limb_index] }); self.temp_type[@intFromEnum(temp_index)] = Type.usize; return; }, .load_symbol => |sym_off| { assert(std.meta.eql(temp_tracking.long.load_symbol, sym_off)); - temp_tracking.* = InstTracking.init(.{ .load_symbol = .{ + temp_tracking.* = .init(.{ .load_symbol = .{ .sym_index = sym_off.sym_index, .off = sym_off.off + @as(u31, limb_index) * 8, } }); @@ -20343,7 +20422,7 @@ const Temp = struct { }, .load_frame => |frame_addr| if (!frame_addr.index.isNamed()) { assert(std.meta.eql(temp_tracking.long.load_frame, frame_addr)); - temp_tracking.* = InstTracking.init(.{ .load_frame = .{ + temp_tracking.* = .init(.{ .load_frame = .{ .index = frame_addr.index, .off = frame_addr.off + @as(u31, limb_index) * 8, } }); @@ -20371,7 +20450,7 @@ const Temp = struct { const new_temp_index = self.next_temp_index; self.temp_type[@intFromEnum(new_temp_index)] = ty; try self.genSetReg(new_reg, ty, val, .{}); - new_temp_index.tracking(self).* = InstTracking.init(.{ .register = new_reg }); + new_temp_index.tracking(self).* = .init(.{ .register = new_reg }); try temp.die(self); self.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); temp.* = .{ .index = new_temp_index.toIndex() }; @@ -20390,9 +20469,32 @@ const Temp = struct { const new_temp_index = self.next_temp_index; self.temp_type[@intFromEnum(new_temp_index)] = ty; const new_reg = - try self.register_manager.allocReg(new_temp_index.toIndex(), self.regClassForType(ty)); + try self.register_manager.allocReg(new_temp_index.toIndex(), self.regSetForType(ty)); try self.genSetReg(new_reg, ty, val, .{}); - new_temp_index.tracking(self).* = InstTracking.init(.{ .register = new_reg }); + new_temp_index.tracking(self).* = .init(.{ .register = new_reg }); + try temp.die(self); + self.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); + temp.* = .{ .index = new_temp_index.toIndex() }; + return true; + } + + fn toRegClass(temp: *Temp, rc: Register.Class, self: *Self) !bool { + const val, const ty = switch (temp.unwrap(self)) { + .ref => |ref| .{ temp.tracking(self).short, self.typeOf(ref) }, + .temp => |temp_index| val: { + const temp_tracking = temp_index.tracking(self); + switch (temp_tracking.short) { + else => {}, + .register => |reg| if (reg.class() == rc) return false, + } + break :val .{ temp_tracking.short, temp_index.typeOf(self) }; + }, + }; + const new_temp_index = self.next_temp_index; + self.temp_type[@intFromEnum(new_temp_index)] = ty; + const new_reg = try self.register_manager.allocReg(new_temp_index.toIndex(), regSetForRegClass(rc)); + try self.genSetReg(new_reg, ty, val, .{}); + new_temp_index.tracking(self).* = .init(.{ .register = new_reg }); try temp.die(self); self.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); temp.* = .{ .index = new_temp_index.toIndex() }; @@ -20414,7 +20516,7 @@ const Temp = struct { assert(self.reuseTemp(result_temp.index, first_temp.index, first_temp_tracking)); assert(self.reuseTemp(result_temp.index, second_temp.index, second_temp_tracking)); self.temp_type[@intFromEnum(result_temp_index)] = Type.slice_const_u8; - result_temp_index.tracking(self).* = InstTracking.init(result); + result_temp_index.tracking(self).* = .init(result); first_temp.* = result_temp; } @@ -20520,13 +20622,43 @@ const Temp = struct { try self.asmOpOnly(.{ .@"rep _sb", .mov }); } + // i, m, r + fn add(lhs: *Temp, rhs: *Temp, self: *Self) !Temp { + const res_index = self.next_temp_index; + var res: Temp = .{ .index = res_index.toIndex() }; + try self.select(&.{ &res, lhs, rhs }, .{ ._, .add }, &.{ + .{ .ops = &.{ .{ .match = 1 }, .r, .i } }, + .{ .ops = &.{ .{ .match = 1 }, .m, .i } }, + .{ .ops = &.{ .{ .match = 1 }, .r, .m } }, + .{ .ops = &.{ .{ .match = 1 }, .m, .r } }, + .{ .ops = &.{ .{ .match = 1 }, .r, .r } }, + }); + self.next_temp_index = @enumFromInt(@intFromEnum(res_index) + 1); + self.temp_type[@intFromEnum(res_index)] = lhs.typeOf(self); + return res; + } + + fn mul(lhs: *Temp, rhs: *Temp, self: *Self) !Temp { + const res_index = self.next_temp_index; + var res: Temp = .{ .index = self.next_temp_index.toIndex() }; + try self.select(&.{ &res, lhs, rhs }, .{ .i_, .mul }, &.{ + .{ .ops = &.{ .r, .m, .i } }, + .{ .ops = &.{ .r, .r, .i } }, + .{ .ops = &.{ .{ .match = 1 }, .r, .m } }, + .{ .ops = &.{ .{ .match = 1 }, .r, .r } }, + }); + self.next_temp_index = @enumFromInt(@intFromEnum(res_index) + 1); + self.temp_type[@intFromEnum(res_index)] = lhs.typeOf(self); + return res; + } + fn moveTo(temp: Temp, inst: Air.Inst.Index, self: *Self) !void { if (self.liveness.isUnused(inst)) try temp.die(self) else switch (temp.unwrap(self)) { .ref => { const result = try self.allocRegOrMem(inst, true); try self.genCopy(self.typeOfIndex(inst), result, temp.tracking(self).short, .{}); tracking_log.debug("{} => {} (birth)", .{ inst, result }); - self.inst_tracking.putAssumeCapacityNoClobber(inst, InstTracking.init(result)); + self.inst_tracking.putAssumeCapacityNoClobber(inst, .init(result)); }, .temp => |temp_index| { const temp_tracking = temp_index.tracking(self); @@ -20548,7 +20680,7 @@ const Temp = struct { _, fn toIndex(index: Index) Air.Inst.Index { - return Air.Inst.Index.fromTargetIndex(@intFromEnum(index)); + return .fromTargetIndex(@intFromEnum(index)); } fn fromIndex(index: Air.Inst.Index) Index { @@ -20629,7 +20761,19 @@ fn reuseTemp( fn tempAlloc(self: *Self, ty: Type) !Temp { const temp_index = self.next_temp_index; - temp_index.tracking(self).* = InstTracking.init(try self.allocRegOrMemAdvanced(ty, temp_index.toIndex(), true)); + temp_index.tracking(self).* = .init( + try self.allocRegOrMemAdvanced(ty, temp_index.toIndex(), true), + ); + self.temp_type[@intFromEnum(temp_index)] = ty; + self.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); + return .{ .index = temp_index.toIndex() }; +} + +fn tempAllocReg(self: *Self, ty: Type, rc: RegisterManager.RegisterBitSet) !Temp { + const temp_index = self.next_temp_index; + temp_index.tracking(self).* = .init( + .{ .register = try self.register_manager.allocReg(temp_index.toIndex(), rc) }, + ); self.temp_type[@intFromEnum(temp_index)] = ty; self.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); return .{ .index = temp_index.toIndex() }; @@ -20637,7 +20781,7 @@ fn tempAlloc(self: *Self, ty: Type) !Temp { fn tempFromValue(self: *Self, ty: Type, value: MCValue) !Temp { const temp_index = self.next_temp_index; - temp_index.tracking(self).* = InstTracking.init(value); + temp_index.tracking(self).* = .init(value); self.temp_type[@intFromEnum(temp_index)] = ty; try self.getValue(value, temp_index.toIndex()); self.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); @@ -20657,8 +20801,8 @@ fn tempFromOperand( if (op_ref.toIndex()) |op_inst| return .{ .index = op_inst }; const val = op_ref.toInterned().?; const gop = try self.const_tracking.getOrPut(self.gpa, val); - if (!gop.found_existing) gop.value_ptr.* = InstTracking.init(init: { - const const_mcv = try self.genTypedValue(Value.fromInterned(val)); + if (!gop.found_existing) gop.value_ptr.* = .init(init: { + const const_mcv = try self.genTypedValue(.fromInterned(val)); switch (const_mcv) { .lea_tlv => |tlv_sym| switch (self.bin_file.tag) { .elf, .macho => { @@ -20667,7 +20811,7 @@ fn tempFromOperand( } else { try self.spillRegisters(&.{.rax}); } - const frame_index = try self.allocFrameIndex(FrameAlloc.init(.{ + const frame_index = try self.allocFrameIndex(.init(.{ .size = 8, .alignment = .@"8", })); @@ -20685,7 +20829,7 @@ fn tempFromOperand( else => break :init const_mcv, } }); - return self.tempFromValue(Type.fromInterned(ip.typeOf(val)), gop.value_ptr.short); + return self.tempFromValue(.fromInterned(ip.typeOf(val)), gop.value_ptr.short); } const temp_index = self.next_temp_index; @@ -20706,3 +20850,193 @@ inline fn tempsFromOperands(self: *Self, inst: Air.Inst.Index, op_refs: anytype) } return temps; } + +const Operand = union(enum) { + none, + reg: Register, + mem: Memory, + imm: Immediate, + inst: Mir.Inst.Index, +}; + +const Pattern = struct { + tag: Mir.Inst.FixedTag, + ops: []const Op, + commute: struct { u8, u8 } = .{ 0, 0 }, + features: []const std.Target.x86.Feature = &.{}, + + const Op = union(enum) { + /// match another operand + match: u8, + /// any general purpose register + gpr, + /// any 64-bit mmx register + mm, + /// any 128-bit sse register + xmm, + /// any 256-bit sse register + ymm, + /// any memory + mem, + /// specific immediate + imm: i8, + /// any immediate signed extended from 32 bits + simm32, + + fn matches(op: Op, is_mut: bool, temp: Temp, self: *Self) bool { + return switch (op) { + .match => unreachable, + .gpr => switch (temp.tracking(self).short) { + .register => |reg| reg.class() == .general_purpose, + .register_offset => |reg_off| reg_off.reg.class() == .general_purpose and + reg_off.off == 0, + else => self.regClassForType(temp.typeOf(self)) == .general_purpose, + }, + .mm => switch (temp.tracking(self).short) { + .register => |reg| reg.class() == .mmx, + .register_offset => |reg_off| reg_off.reg.class() == .mmx and reg_off.off == 0, + else => self.regClassForType(temp.typeOf(self)) == .mmx, + }, + .xmm => switch (temp.tracking(self).short) { + .register => |reg| reg.class() == .sse, + .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0, + else => self.regClassForType(temp.typeOf(self)) == .sse, + }, + .ymm => switch (temp.tracking(self).short) { + .register => |reg| reg.class() == .sse, + .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0, + else => self.regClassForType(temp.typeOf(self)) == .sse, + } and temp.typeOf(self).abiSize(self.pt.zcu) > 16, + .mem => (!is_mut or temp.isMut(self)) and temp.tracking(self).short.isMemory(), + .imm => |specific_imm| if (is_mut) unreachable else switch (temp.tracking(self).short) { + .immediate => |imm| @as(i64, @bitCast(imm)) == specific_imm, + else => false, + }, + .simm32 => if (is_mut) unreachable else switch (temp.tracking(self).short) { + .immediate => |imm| temp.typeOf(self).abiSize(self.pt.zcu) <= 4 or + std.math.cast(i32, @as(i64, @bitCast(imm))) != null, + else => false, + }, + }; + } + }; +}; +fn select(self: *Self, dst_temps: []Temp, src_temps: []const *Temp, patterns: []const Pattern) !void { + patterns: for (patterns) |pattern| { + for (pattern.features) |feature| if (!self.hasFeature(feature)) continue :patterns; + for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| if (!switch (src_op) { + .match => |match_index| pattern.ops[match_index], + else => src_op, + }.matches(src_op == .match, src_temp.*, self)) continue :patterns; + while (true) for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| { + if (switch (switch (src_op) { + .match => |match_index| pattern.ops[match_index], + else => src_op, + }) { + .match => unreachable, + .gpr => try src_temp.toRegClass(.general_purpose, self), + .mm => try src_temp.toRegClass(.mmx, self), + .xmm, .ymm => try src_temp.toRegClass(.sse, self), + .mem, .imm, .simm32 => false, + }) break; + } else break; + var mir_ops: [4]Operand = @splat(.none); + var mir_ops_len = dst_temps.len; + for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| { + const mir_op, const matched_src_op = op: switch (src_op) { + .match => |match_index| { + dst_temps[match_index] = src_temp.*; + break :op .{ &mir_ops[match_index], pattern.ops[match_index] }; + }, + else => { + defer mir_ops_len += 1; + break :op .{ &mir_ops[mir_ops_len], src_op }; + }, + }; + const src_mcv = src_temp.tracking(self).short; + mir_op.* = switch (matched_src_op) { + .match => unreachable, + .gpr => .{ .reg = registerAlias( + src_mcv.register, + @intCast(src_temp.typeOf(self).abiSize(self.pt.zcu)), + ) }, + .mm => .{ .reg = src_mcv.register }, + .xmm => .{ .reg = src_mcv.register.to128() }, + .ymm => .{ .reg = src_mcv.register.to256() }, + .mem => .{ .mem = try src_mcv.mem(self, self.memSize(src_temp.typeOf(self))) }, + .imm => |imm| .{ .imm = .s(imm) }, + .simm32 => switch (src_temp.typeOf(self).abiSize(self.pt.zcu)) { + else => unreachable, + 1 => .{ .imm = if (std.math.cast(i8, @as(i64, @bitCast(src_mcv.immediate)))) |small| + .s(small) + else + .u(@as(u8, @intCast(src_mcv.immediate))) }, + 2 => .{ .imm = if (std.math.cast(i16, @as(i64, @bitCast(src_mcv.immediate)))) |small| + .s(small) + else + .u(@as(u16, @intCast(src_mcv.immediate))) }, + 3...8 => .{ .imm = if (std.math.cast(i32, @as(i64, @bitCast(src_mcv.immediate)))) |small| + .s(small) + else + .u(@as(u32, @intCast(src_mcv.immediate))) }, + }, + }; + } + for ( + pattern.ops[0..dst_temps.len], + dst_temps, + mir_ops[0..dst_temps.len], + ) |dst_op, *dst_temp, *mir_op| { + if (mir_op.* != .none) continue; + const ty = src_temps[0].typeOf(self); + switch (dst_op) { + .match => |match_index| { + dst_temp.* = dst_temps[match_index]; + mir_op.* = mir_ops[match_index]; + }, + .gpr => { + dst_temp.* = try self.tempAllocReg(ty, abi.RegisterClass.gp); + mir_op.* = .{ .reg = registerAlias( + dst_temp.tracking(self).short.register, + @intCast(ty.abiSize(self.pt.zcu)), + ) }; + }, + .mm => @panic("TODO"), + .xmm => { + dst_temp.* = try self.tempAllocReg(ty, abi.RegisterClass.sse); + mir_op.* = .{ .reg = dst_temp.tracking(self).short.register.to128() }; + }, + .ymm => { + dst_temp.* = try self.tempAllocReg(ty, abi.RegisterClass.sse); + mir_op.* = .{ .reg = dst_temp.tracking(self).short.register.to256() }; + }, + .mem => @panic("TODO"), + .imm, .simm32 => unreachable, // unmodifiable destination + } + } + std.mem.swap(Operand, &mir_ops[pattern.commute[0]], &mir_ops[pattern.commute[1]]); + self.asmOps(pattern.tag, mir_ops) catch |err| switch (err) { + error.InvalidInstruction => { + const fixes = @tagName(pattern.tag[0]); + const fixes_replace = std.mem.indexOfScalar(u8, fixes, '_').?; + return self.fail( + "invalid instruction: '{s}{s}{s} {s} {s} {s} {s}'", + .{ + fixes[0..fixes_replace], + @tagName(pattern.tag[1]), + fixes[fixes_replace + 1 ..], + @tagName(mir_ops[0]), + @tagName(mir_ops[1]), + @tagName(mir_ops[2]), + @tagName(mir_ops[3]), + }, + ); + }, + else => |e| return e, + }; + return; + } + log.err("failed to select:", .{}); + for (src_temps) |src_temp| log.err("{}", .{src_temp.tracking(self)}); + return self.fail("failed to select", .{}); +} diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index a7f308b7b4..d00d5b2e8a 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -22,7 +22,7 @@ pub const Inst = struct { /// ___ @"_", - /// Integer __ + /// Integer ___ i_, /// ___ Left diff --git a/src/register_manager.zig b/src/register_manager.zig index 48b12a59d2..9450c92d25 100644 --- a/src/register_manager.zig +++ b/src/register_manager.zig @@ -41,12 +41,12 @@ pub fn RegisterManager( registers: TrackedRegisters = undefined, /// Tracks which registers are free (in which case the /// corresponding bit is set to 1) - free_registers: RegisterBitSet = RegisterBitSet.initFull(), + free_registers: RegisterBitSet = .initFull(), /// Tracks all registers allocated in the course of this /// function - allocated_registers: RegisterBitSet = RegisterBitSet.initEmpty(), + allocated_registers: RegisterBitSet = .initEmpty(), /// Tracks registers which are locked from being allocated - locked_registers: RegisterBitSet = RegisterBitSet.initEmpty(), + locked_registers: RegisterBitSet = .initEmpty(), const Self = @This(); @@ -420,8 +420,8 @@ const MockRegister1 = enum(u2) { &MockRegister1.allocatable_registers, ); - const gp: RM.RegisterBitSet = blk: { - var set = RM.RegisterBitSet.initEmpty(); + const gp = blk: { + var set: RM.RegisterBitSet = .initEmpty(); set.setRangeValue(.{ .start = 0, .end = allocatable_registers.len, @@ -448,8 +448,8 @@ const MockRegister2 = enum(u2) { &MockRegister2.allocatable_registers, ); - const gp: RM.RegisterBitSet = blk: { - var set = RM.RegisterBitSet.initEmpty(); + const gp = blk: { + var set: RM.RegisterBitSet = .initEmpty(); set.setRangeValue(.{ .start = 0, .end = allocatable_registers.len, @@ -489,16 +489,16 @@ const MockRegister3 = enum(u3) { &MockRegister3.allocatable_registers, ); - const gp: RM.RegisterBitSet = blk: { - var set = RM.RegisterBitSet.initEmpty(); + const gp = blk: { + var set: RM.RegisterBitSet = .initEmpty(); set.setRangeValue(.{ .start = 0, .end = gp_regs.len, }, true); break :blk set; }; - const ext: RM.RegisterBitSet = blk: { - var set = RM.RegisterBitSet.initEmpty(); + const ext = blk: { + var set: RM.RegisterBitSet = .initEmpty(); set.setRangeValue(.{ .start = gp_regs.len, .end = allocatable_registers.len, From 7c713251cadf831fb2009ae8908b099f218cf42c Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sat, 9 Nov 2024 06:09:42 -0500 Subject: [PATCH 03/25] x86_64: looped instructions --- lib/std/crypto/chacha20.zig | 4 +- lib/std/crypto/tls/Client.zig | 63 +- lib/std/http/protocol.zig | 1 - src/arch/x86_64/CodeGen.zig | 2064 ++++++++++++++++++++------------- src/arch/x86_64/bits.zig | 8 +- 5 files changed, 1274 insertions(+), 866 deletions(-) diff --git a/lib/std/crypto/chacha20.zig b/lib/std/crypto/chacha20.zig index 59d37db824..564df2933f 100644 --- a/lib/std/crypto/chacha20.zig +++ b/lib/std/crypto/chacha20.zig @@ -499,11 +499,9 @@ fn ChaChaNonVecImpl(comptime rounds_nb: usize) type { fn ChaChaImpl(comptime rounds_nb: usize) type { switch (builtin.cpu.arch) { .x86_64 => { - if (builtin.zig_backend == .stage2_x86_64) return ChaChaNonVecImpl(rounds_nb); - const has_avx2 = std.Target.x86.featureSetHas(builtin.cpu.features, .avx2); const has_avx512f = std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f); - if (has_avx512f) return ChaChaVecImpl(rounds_nb, 4); + if (builtin.zig_backend != .stage2_x86_64 and has_avx512f) return ChaChaVecImpl(rounds_nb, 4); if (has_avx2) return ChaChaVecImpl(rounds_nb, 2); return ChaChaVecImpl(rounds_nb, 1); }, diff --git a/lib/std/crypto/tls/Client.zig b/lib/std/crypto/tls/Client.zig index 9aff6d82c9..09e5e0fdd5 100644 --- a/lib/std/crypto/tls/Client.zig +++ b/lib/std/crypto/tls/Client.zig @@ -356,14 +356,7 @@ pub fn init(stream: anytype, options: Options) InitError(@TypeOf(stream))!Client if (ciphertext.len > cleartext_fragment_buf.len) return error.TlsRecordOverflow; const cleartext = cleartext_fragment_buf[0..ciphertext.len]; const auth_tag = record_decoder.array(P.AEAD.tag_length).*; - const nonce = if (builtin.zig_backend == .stage2_x86_64 and - P.AEAD.nonce_length > comptime std.simd.suggestVectorLength(u8) orelse 1) - nonce: { - var nonce = pv.server_handshake_iv; - const operand = std.mem.readInt(u64, nonce[nonce.len - 8 ..], .big); - std.mem.writeInt(u64, nonce[nonce.len - 8 ..], operand ^ read_seq, .big); - break :nonce nonce; - } else nonce: { + const nonce = nonce: { const V = @Vector(P.AEAD.nonce_length, u8); const pad = [1]u8{0} ** (P.AEAD.nonce_length - 8); const operand: V = pad ++ @as([8]u8, @bitCast(big(read_seq))); @@ -400,14 +393,7 @@ pub fn init(stream: anytype, options: Options) InitError(@TypeOf(stream))!Client const record_iv = record_decoder.array(P.record_iv_length).*; const masked_read_seq = read_seq & comptime std.math.shl(u64, std.math.maxInt(u64), 8 * P.record_iv_length); - const nonce: [P.AEAD.nonce_length]u8 = if (builtin.zig_backend == .stage2_x86_64 and - P.AEAD.nonce_length > comptime std.simd.suggestVectorLength(u8) orelse 1) - nonce: { - var nonce = pv.app_cipher.server_write_IV ++ record_iv; - const operand = std.mem.readInt(u64, nonce[nonce.len - 8 ..], .big); - std.mem.writeInt(u64, nonce[nonce.len - 8 ..], operand ^ masked_read_seq, .big); - break :nonce nonce; - } else nonce: { + const nonce: [P.AEAD.nonce_length]u8 = nonce: { const V = @Vector(P.AEAD.nonce_length, u8); const pad = [1]u8{0} ** (P.AEAD.nonce_length - 8); const operand: V = pad ++ @as([8]u8, @bitCast(big(masked_read_seq))); @@ -750,14 +736,7 @@ pub fn init(stream: anytype, options: Options) InitError(@TypeOf(stream))!Client .app_cipher = std.mem.bytesToValue(P.Tls_1_2, &key_block), } }; const pv = &p.version.tls_1_2; - const nonce: [P.AEAD.nonce_length]u8 = if (builtin.zig_backend == .stage2_x86_64 and - P.AEAD.nonce_length > comptime std.simd.suggestVectorLength(u8) orelse 1) - nonce: { - var nonce = pv.app_cipher.client_write_IV ++ pv.app_cipher.client_salt; - const operand = std.mem.readInt(u64, nonce[nonce.len - 8 ..], .big); - std.mem.writeInt(u64, nonce[nonce.len - 8 ..], operand ^ write_seq, .big); - break :nonce nonce; - } else nonce: { + const nonce: [P.AEAD.nonce_length]u8 = nonce: { const V = @Vector(P.AEAD.nonce_length, u8); const pad = [1]u8{0} ** (P.AEAD.nonce_length - 8); const operand: V = pad ++ @as([8]u8, @bitCast(big(write_seq))); @@ -1043,14 +1022,7 @@ fn prepareCiphertextRecord( ciphertext_end += ciphertext_len; const auth_tag = ciphertext_buf[ciphertext_end..][0..P.AEAD.tag_length]; ciphertext_end += auth_tag.len; - const nonce = if (builtin.zig_backend == .stage2_x86_64 and - P.AEAD.nonce_length > comptime std.simd.suggestVectorLength(u8) orelse 1) - nonce: { - var nonce = pv.client_iv; - const operand = std.mem.readInt(u64, nonce[nonce.len - 8 ..], .big); - std.mem.writeInt(u64, nonce[nonce.len - 8 ..], operand ^ c.write_seq, .big); - break :nonce nonce; - } else nonce: { + const nonce = nonce: { const V = @Vector(P.AEAD.nonce_length, u8); const pad = [1]u8{0} ** (P.AEAD.nonce_length - 8); const operand: V = pad ++ std.mem.toBytes(big(c.write_seq)); @@ -1098,14 +1070,7 @@ fn prepareCiphertextRecord( const ad = std.mem.toBytes(big(c.write_seq)) ++ record_header[0 .. 1 + 2] ++ int(u16, message_len); const record_iv = ciphertext_buf[ciphertext_end..][0..P.record_iv_length]; ciphertext_end += P.record_iv_length; - const nonce: [P.AEAD.nonce_length]u8 = if (builtin.zig_backend == .stage2_x86_64 and - P.AEAD.nonce_length > comptime std.simd.suggestVectorLength(u8) orelse 1) - nonce: { - var nonce = pv.client_write_IV ++ pv.client_salt; - const operand = std.mem.readInt(u64, nonce[nonce.len - 8 ..], .big); - std.mem.writeInt(u64, nonce[nonce.len - 8 ..], operand ^ c.write_seq, .big); - break :nonce nonce; - } else nonce: { + const nonce: [P.AEAD.nonce_length]u8 = nonce: { const V = @Vector(P.AEAD.nonce_length, u8); const pad = [1]u8{0} ** (P.AEAD.nonce_length - 8); const operand: V = pad ++ @as([8]u8, @bitCast(big(c.write_seq))); @@ -1374,14 +1339,7 @@ pub fn readvAdvanced(c: *Client, stream: anytype, iovecs: []const std.posix.iove const ciphertext = frag[in..][0..ciphertext_len]; in += ciphertext_len; const auth_tag = frag[in..][0..P.AEAD.tag_length].*; - const nonce = if (builtin.zig_backend == .stage2_x86_64 and - P.AEAD.nonce_length > comptime std.simd.suggestVectorLength(u8) orelse 1) - nonce: { - var nonce = pv.server_iv; - const operand = std.mem.readInt(u64, nonce[nonce.len - 8 ..], .big); - std.mem.writeInt(u64, nonce[nonce.len - 8 ..], operand ^ c.read_seq, .big); - break :nonce nonce; - } else nonce: { + const nonce = nonce: { const V = @Vector(P.AEAD.nonce_length, u8); const pad = [1]u8{0} ** (P.AEAD.nonce_length - 8); const operand: V = pad ++ std.mem.toBytes(big(c.read_seq)); @@ -1409,14 +1367,7 @@ pub fn readvAdvanced(c: *Client, stream: anytype, iovecs: []const std.posix.iove in += P.record_iv_length; const masked_read_seq = c.read_seq & comptime std.math.shl(u64, std.math.maxInt(u64), 8 * P.record_iv_length); - const nonce: [P.AEAD.nonce_length]u8 = if (builtin.zig_backend == .stage2_x86_64 and - P.AEAD.nonce_length > comptime std.simd.suggestVectorLength(u8) orelse 1) - nonce: { - var nonce = pv.server_write_IV ++ record_iv; - const operand = std.mem.readInt(u64, nonce[nonce.len - 8 ..], .big); - std.mem.writeInt(u64, nonce[nonce.len - 8 ..], operand ^ masked_read_seq, .big); - break :nonce nonce; - } else nonce: { + const nonce: [P.AEAD.nonce_length]u8 = nonce: { const V = @Vector(P.AEAD.nonce_length, u8); const pad = [1]u8{0} ** (P.AEAD.nonce_length - 8); const operand: V = pad ++ @as([8]u8, @bitCast(big(masked_read_seq))); diff --git a/lib/std/http/protocol.zig b/lib/std/http/protocol.zig index c56d3a24a1..fc00a68ec3 100644 --- a/lib/std/http/protocol.zig +++ b/lib/std/http/protocol.zig @@ -4,7 +4,6 @@ const testing = std.testing; const mem = std.mem; const assert = std.debug.assert; -const use_vectors = builtin.zig_backend != .stage2_x86_64; pub const State = enum { invalid, diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 80991c7aa6..9631173b9f 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -104,7 +104,7 @@ const Owner = union(enum) { nav_index: InternPool.Nav.Index, lazy_sym: link.File.LazySymbol, - fn getSymbolIndex(owner: Owner, ctx: *Self) !u32 { + fn getSymbolIndex(owner: Owner, ctx: *CodeGen) !u32 { const pt = ctx.pt; switch (owner) { .nav_index => |nav_index| if (ctx.bin_file.cast(.elf)) |elf_file| { @@ -394,7 +394,7 @@ pub const MCValue = union(enum) { }; } - fn mem(mcv: MCValue, function: *Self, size: Memory.Size) !Memory { + fn mem(mcv: MCValue, function: *CodeGen, mod_rm: Memory.Mod.Rm) !Memory { return switch (mcv) { .none, .unreach, @@ -420,22 +420,28 @@ pub const MCValue = union(enum) { .memory => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr| .{ .base = .{ .reg = .ds }, .mod = .{ .rm = .{ - .size = size, - .disp = small_addr, + .size = mod_rm.size, + .index = mod_rm.index, + .scale = mod_rm.scale, + .disp = small_addr + mod_rm.disp, } }, } else .{ .base = .{ .reg = .ds }, .mod = .{ .off = addr } }, .indirect => |reg_off| .{ .base = .{ .reg = reg_off.reg }, .mod = .{ .rm = .{ - .size = size, - .disp = reg_off.off, + .size = mod_rm.size, + .index = mod_rm.index, + .scale = mod_rm.scale, + .disp = reg_off.off + mod_rm.disp, } }, }, .load_frame => |frame_addr| .{ .base = .{ .frame = frame_addr.index }, .mod = .{ .rm = .{ - .size = size, - .disp = frame_addr.off, + .size = mod_rm.size, + .index = mod_rm.index, + .scale = mod_rm.scale, + .disp = frame_addr.off + mod_rm.disp, } }, }, .load_symbol => |sym_off| { @@ -443,12 +449,14 @@ pub const MCValue = union(enum) { return .{ .base = .{ .reloc = sym_off.sym_index }, .mod = .{ .rm = .{ - .size = size, - .disp = sym_off.off, + .size = mod_rm.size, + .index = mod_rm.index, + .scale = mod_rm.scale, + .disp = sym_off.off + mod_rm.disp, } }, }; }, - .air_ref => |ref| (try function.resolveInst(ref)).mem(function, size), + .air_ref => |ref| (try function.resolveInst(ref)).mem(function, mod_rm), }; } @@ -537,7 +545,7 @@ const InstTracking = struct { return self.short.getCondition(); } - fn spill(self: *InstTracking, function: *Self, inst: Air.Inst.Index) !void { + fn spill(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index) !void { if (std.meta.eql(self.long, self.short)) return; // Already spilled // Allocate or reuse frame index switch (self.long) { @@ -586,7 +594,7 @@ const InstTracking = struct { }; } - fn trackSpill(self: *InstTracking, function: *Self, inst: Air.Inst.Index) !void { + fn trackSpill(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index) !void { try function.freeValue(self.short); self.reuseFrame(); tracking_log.debug("{} => {} (spilled)", .{ inst, self.* }); @@ -633,7 +641,7 @@ const InstTracking = struct { fn materialize( self: *InstTracking, - function: *Self, + function: *CodeGen, inst: Air.Inst.Index, target: InstTracking, ) !void { @@ -643,7 +651,7 @@ const InstTracking = struct { fn materializeUnsafe( self: InstTracking, - function: *Self, + function: *CodeGen, inst: Air.Inst.Index, target: InstTracking, ) !void { @@ -675,7 +683,7 @@ const InstTracking = struct { } } - fn die(self: *InstTracking, function: *Self, inst: Air.Inst.Index) !void { + fn die(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index) !void { if (self.short == .dead) return; try function.freeValue(self.short); self.short = .{ .dead = function.scope_generation }; @@ -684,7 +692,7 @@ const InstTracking = struct { fn reuse( self: *InstTracking, - function: *Self, + function: *CodeGen, new_inst: ?Air.Inst.Index, old_inst: Air.Inst.Index, ) void { @@ -692,7 +700,7 @@ const InstTracking = struct { tracking_log.debug("{?} => {} (reuse {})", .{ new_inst, self.*, old_inst }); } - fn liveOut(self: *InstTracking, function: *Self, inst: Air.Inst.Index) void { + fn liveOut(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index) void { for (self.getRegs()) |reg| { if (function.register_manager.isRegFree(reg)) { tracking_log.debug("{} => {} (live-out)", .{ inst, self.* }); @@ -789,7 +797,7 @@ const BlockData = struct { } }; -const Self = @This(); +const CodeGen = @This(); pub fn generate( bin_file: *link.File, @@ -809,7 +817,7 @@ pub fn generate( const fn_type: Type = .fromInterned(func.ty); const mod = zcu.navFileScope(func.owner_nav).mod; - var function: Self = .{ + var function: CodeGen = .{ .gpa = gpa, .pt = pt, .air = air, @@ -962,7 +970,7 @@ pub fn generateLazy( const gpa = comp.gpa; // This function is for generating global code, so we use the root module. const mod = comp.root_mod; - var function: Self = .{ + var function: CodeGen = .{ .gpa = gpa, .pt = pt, .air = undefined, @@ -1050,7 +1058,7 @@ fn fmtNav(nav_index: InternPool.Nav.Index, ip: *const InternPool) std.fmt.Format } const FormatAirData = struct { - self: *Self, + self: *CodeGen, inst: Air.Inst.Index, }; fn formatAir( @@ -1066,12 +1074,12 @@ fn formatAir( data.self.liveness, ); } -fn fmtAir(self: *Self, inst: Air.Inst.Index) std.fmt.Formatter(formatAir) { +fn fmtAir(self: *CodeGen, inst: Air.Inst.Index) std.fmt.Formatter(formatAir) { return .{ .data = .{ .self = self, .inst = inst } }; } const FormatWipMirData = struct { - self: *Self, + self: *CodeGen, inst: Mir.Inst.Index, }; fn formatWipMir( @@ -1192,12 +1200,12 @@ fn formatWipMir( } } } -fn fmtWipMir(self: *Self, inst: Mir.Inst.Index) std.fmt.Formatter(formatWipMir) { +fn fmtWipMir(self: *CodeGen, inst: Mir.Inst.Index) std.fmt.Formatter(formatWipMir) { return .{ .data = .{ .self = self, .inst = inst } }; } const FormatTrackingData = struct { - self: *Self, + self: *CodeGen, }; fn formatTracking( data: FormatTrackingData, @@ -1208,26 +1216,26 @@ fn formatTracking( var it = data.self.inst_tracking.iterator(); while (it.next()) |entry| try writer.print("\n{} = {}", .{ entry.key_ptr.*, entry.value_ptr.* }); } -fn fmtTracking(self: *Self) std.fmt.Formatter(formatTracking) { +fn fmtTracking(self: *CodeGen) std.fmt.Formatter(formatTracking) { return .{ .data = .{ .self = self } }; } -fn addInst(self: *Self, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index { +fn addInst(self: *CodeGen, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index { const gpa = self.gpa; try self.mir_instructions.ensureUnusedCapacity(gpa, 1); const result_index: Mir.Inst.Index = @intCast(self.mir_instructions.len); self.mir_instructions.appendAssumeCapacity(inst); - wip_mir_log.debug("{}", .{self.fmtWipMir(result_index)}); + if (inst.ops != .pseudo_dead_none) wip_mir_log.debug("{}", .{self.fmtWipMir(result_index)}); return result_index; } -fn addExtra(self: *Self, extra: anytype) Allocator.Error!u32 { +fn addExtra(self: *CodeGen, extra: anytype) Allocator.Error!u32 { const fields = std.meta.fields(@TypeOf(extra)); try self.mir_extra.ensureUnusedCapacity(self.gpa, fields.len); return self.addExtraAssumeCapacity(extra); } -fn addExtraAssumeCapacity(self: *Self, extra: anytype) u32 { +fn addExtraAssumeCapacity(self: *CodeGen, extra: anytype) u32 { const fields = std.meta.fields(@TypeOf(extra)); const result: u32 = @intCast(self.mir_extra.items.len); inline for (fields) |field| { @@ -1241,7 +1249,7 @@ fn addExtraAssumeCapacity(self: *Self, extra: anytype) u32 { return result; } -fn asmOps(self: *Self, tag: Mir.Inst.FixedTag, ops: [4]Operand) !void { +fn asmOps(self: *CodeGen, tag: Mir.Inst.FixedTag, ops: [4]Operand) !void { return switch (ops[0]) { .none => self.asmOpOnly(tag), .reg => |reg0| switch (ops[1]) { @@ -1316,7 +1324,7 @@ fn asmOps(self: *Self, tag: Mir.Inst.FixedTag, ops: [4]Operand) !void { } /// A `cc` of `.z_and_np` clobbers `reg2`! -fn asmCmovccRegisterRegister(self: *Self, cc: Condition, reg1: Register, reg2: Register) !void { +fn asmCmovccRegisterRegister(self: *CodeGen, cc: Condition, reg1: Register, reg2: Register) !void { _ = try self.addInst(.{ .tag = switch (cc) { else => .cmov, @@ -1339,7 +1347,7 @@ fn asmCmovccRegisterRegister(self: *Self, cc: Condition, reg1: Register, reg2: R } /// A `cc` of `.z_and_np` is not supported by this encoding! -fn asmCmovccRegisterMemory(self: *Self, cc: Condition, reg: Register, m: Memory) !void { +fn asmCmovccRegisterMemory(self: *CodeGen, cc: Condition, reg: Register, m: Memory) !void { _ = try self.addInst(.{ .tag = switch (cc) { else => .cmov, @@ -1363,7 +1371,7 @@ fn asmCmovccRegisterMemory(self: *Self, cc: Condition, reg: Register, m: Memory) }); } -fn asmSetccRegister(self: *Self, cc: Condition, reg: Register) !void { +fn asmSetccRegister(self: *CodeGen, cc: Condition, reg: Register) !void { _ = try self.addInst(.{ .tag = switch (cc) { else => .set, @@ -1387,7 +1395,7 @@ fn asmSetccRegister(self: *Self, cc: Condition, reg: Register) !void { }); } -fn asmSetccMemory(self: *Self, cc: Condition, m: Memory) !void { +fn asmSetccMemory(self: *CodeGen, cc: Condition, m: Memory) !void { const payload = try self.addExtra(Mir.Memory.encode(m)); _ = try self.addInst(.{ .tag = switch (cc) { @@ -1412,7 +1420,7 @@ fn asmSetccMemory(self: *Self, cc: Condition, m: Memory) !void { }); } -fn asmJmpReloc(self: *Self, target: Mir.Inst.Index) !Mir.Inst.Index { +fn asmJmpReloc(self: *CodeGen, target: Mir.Inst.Index) !Mir.Inst.Index { return self.addInst(.{ .tag = .jmp, .ops = .inst, @@ -1422,7 +1430,7 @@ fn asmJmpReloc(self: *Self, target: Mir.Inst.Index) !Mir.Inst.Index { }); } -fn asmJccReloc(self: *Self, cc: Condition, target: Mir.Inst.Index) !Mir.Inst.Index { +fn asmJccReloc(self: *CodeGen, cc: Condition, target: Mir.Inst.Index) !Mir.Inst.Index { return self.addInst(.{ .tag = switch (cc) { else => .j, @@ -1443,7 +1451,7 @@ fn asmJccReloc(self: *Self, cc: Condition, target: Mir.Inst.Index) !Mir.Inst.Ind }); } -fn asmReloc(self: *Self, tag: Mir.Inst.FixedTag, target: Mir.Inst.Index) !void { +fn asmReloc(self: *CodeGen, tag: Mir.Inst.FixedTag, target: Mir.Inst.Index) !void { _ = try self.addInst(.{ .tag = tag[1], .ops = .inst, @@ -1454,7 +1462,7 @@ fn asmReloc(self: *Self, tag: Mir.Inst.FixedTag, target: Mir.Inst.Index) !void { }); } -fn asmPlaceholder(self: *Self) !Mir.Inst.Index { +fn asmPlaceholder(self: *CodeGen) !Mir.Inst.Index { return self.addInst(.{ .tag = .pseudo, .ops = .pseudo_dead_none, @@ -1464,7 +1472,7 @@ fn asmPlaceholder(self: *Self) !Mir.Inst.Index { const MirTagAir = enum { dbg_local }; -fn asmAir(self: *Self, tag: MirTagAir, inst: Air.Inst.Index) !void { +fn asmAir(self: *CodeGen, tag: MirTagAir, inst: Air.Inst.Index) !void { _ = try self.addInst(.{ .tag = .pseudo, .ops = switch (tag) { @@ -1474,7 +1482,7 @@ fn asmAir(self: *Self, tag: MirTagAir, inst: Air.Inst.Index) !void { }); } -fn asmAirImmediate(self: *Self, tag: MirTagAir, inst: Air.Inst.Index, imm: Immediate) !void { +fn asmAirImmediate(self: *CodeGen, tag: MirTagAir, inst: Air.Inst.Index, imm: Immediate) !void { switch (imm) { .signed => |s| _ = try self.addInst(.{ .tag = .pseudo, @@ -1528,7 +1536,7 @@ fn asmAirImmediate(self: *Self, tag: MirTagAir, inst: Air.Inst.Index, imm: Immed } fn asmAirRegisterImmediate( - self: *Self, + self: *CodeGen, tag: MirTagAir, inst: Air.Inst.Index, reg: Register, @@ -1550,7 +1558,7 @@ fn asmAirRegisterImmediate( } fn asmAirFrameAddress( - self: *Self, + self: *CodeGen, tag: MirTagAir, inst: Air.Inst.Index, frame_addr: bits.FrameAddr, @@ -1567,7 +1575,7 @@ fn asmAirFrameAddress( }); } -fn asmAirMemory(self: *Self, tag: MirTagAir, inst: Air.Inst.Index, m: Memory) !void { +fn asmAirMemory(self: *CodeGen, tag: MirTagAir, inst: Air.Inst.Index, m: Memory) !void { _ = try self.addInst(.{ .tag = .pseudo, .ops = switch (tag) { @@ -1580,7 +1588,7 @@ fn asmAirMemory(self: *Self, tag: MirTagAir, inst: Air.Inst.Index, m: Memory) !v }); } -fn asmOpOnly(self: *Self, tag: Mir.Inst.FixedTag) !void { +fn asmOpOnly(self: *CodeGen, tag: Mir.Inst.FixedTag) !void { _ = try self.addInst(.{ .tag = tag[1], .ops = .none, @@ -1590,7 +1598,7 @@ fn asmOpOnly(self: *Self, tag: Mir.Inst.FixedTag) !void { }); } -fn asmPseudo(self: *Self, ops: Mir.Inst.Ops) !void { +fn asmPseudo(self: *CodeGen, ops: Mir.Inst.Ops) !void { assert(std.mem.startsWith(u8, @tagName(ops), "pseudo_") and std.mem.endsWith(u8, @tagName(ops), "_none")); _ = try self.addInst(.{ @@ -1600,7 +1608,7 @@ fn asmPseudo(self: *Self, ops: Mir.Inst.Ops) !void { }); } -fn asmPseudoRegister(self: *Self, ops: Mir.Inst.Ops, reg: Register) !void { +fn asmPseudoRegister(self: *CodeGen, ops: Mir.Inst.Ops, reg: Register) !void { assert(std.mem.startsWith(u8, @tagName(ops), "pseudo_") and std.mem.endsWith(u8, @tagName(ops), "_r")); _ = try self.addInst(.{ @@ -1610,7 +1618,7 @@ fn asmPseudoRegister(self: *Self, ops: Mir.Inst.Ops, reg: Register) !void { }); } -fn asmPseudoImmediate(self: *Self, ops: Mir.Inst.Ops, imm: Immediate) !void { +fn asmPseudoImmediate(self: *CodeGen, ops: Mir.Inst.Ops, imm: Immediate) !void { assert(std.mem.startsWith(u8, @tagName(ops), "pseudo_") and std.mem.endsWith(u8, @tagName(ops), "_i_s")); _ = try self.addInst(.{ @@ -1620,7 +1628,7 @@ fn asmPseudoImmediate(self: *Self, ops: Mir.Inst.Ops, imm: Immediate) !void { }); } -fn asmPseudoRegisterRegister(self: *Self, ops: Mir.Inst.Ops, reg1: Register, reg2: Register) !void { +fn asmPseudoRegisterRegister(self: *CodeGen, ops: Mir.Inst.Ops, reg1: Register, reg2: Register) !void { assert(std.mem.startsWith(u8, @tagName(ops), "pseudo_") and std.mem.endsWith(u8, @tagName(ops), "_rr")); _ = try self.addInst(.{ @@ -1630,7 +1638,7 @@ fn asmPseudoRegisterRegister(self: *Self, ops: Mir.Inst.Ops, reg1: Register, reg }); } -fn asmPseudoRegisterImmediate(self: *Self, ops: Mir.Inst.Ops, reg: Register, imm: Immediate) !void { +fn asmPseudoRegisterImmediate(self: *CodeGen, ops: Mir.Inst.Ops, reg: Register, imm: Immediate) !void { assert(std.mem.startsWith(u8, @tagName(ops), "pseudo_") and std.mem.endsWith(u8, @tagName(ops), "_ri_s")); _ = try self.addInst(.{ @@ -1640,7 +1648,7 @@ fn asmPseudoRegisterImmediate(self: *Self, ops: Mir.Inst.Ops, reg: Register, imm }); } -fn asmRegister(self: *Self, tag: Mir.Inst.FixedTag, reg: Register) !void { +fn asmRegister(self: *CodeGen, tag: Mir.Inst.FixedTag, reg: Register) !void { _ = try self.addInst(.{ .tag = tag[1], .ops = .r, @@ -1651,7 +1659,7 @@ fn asmRegister(self: *Self, tag: Mir.Inst.FixedTag, reg: Register) !void { }); } -fn asmImmediate(self: *Self, tag: Mir.Inst.FixedTag, imm: Immediate) !void { +fn asmImmediate(self: *CodeGen, tag: Mir.Inst.FixedTag, imm: Immediate) !void { _ = try self.addInst(.{ .tag = tag[1], .ops = switch (imm) { @@ -1676,7 +1684,7 @@ fn asmImmediate(self: *Self, tag: Mir.Inst.FixedTag, imm: Immediate) !void { }); } -fn asmRegisterRegister(self: *Self, tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register) !void { +fn asmRegisterRegister(self: *CodeGen, tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register) !void { _ = try self.addInst(.{ .tag = tag[1], .ops = .rr, @@ -1688,7 +1696,7 @@ fn asmRegisterRegister(self: *Self, tag: Mir.Inst.FixedTag, reg1: Register, reg2 }); } -fn asmRegisterImmediate(self: *Self, tag: Mir.Inst.FixedTag, reg: Register, imm: Immediate) !void { +fn asmRegisterImmediate(self: *CodeGen, tag: Mir.Inst.FixedTag, reg: Register, imm: Immediate) !void { const ops: Mir.Inst.Ops, const i: u32 = switch (imm) { .signed => |s| .{ .ri_s, @bitCast(s) }, .unsigned => |u| if (std.math.cast(u32, u)) |small| @@ -1709,7 +1717,7 @@ fn asmRegisterImmediate(self: *Self, tag: Mir.Inst.FixedTag, reg: Register, imm: } fn asmRegisterRegisterRegister( - self: *Self, + self: *CodeGen, tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register, @@ -1728,7 +1736,7 @@ fn asmRegisterRegisterRegister( } fn asmRegisterRegisterRegisterRegister( - self: *Self, + self: *CodeGen, tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register, @@ -1749,7 +1757,7 @@ fn asmRegisterRegisterRegisterRegister( } fn asmRegisterRegisterRegisterImmediate( - self: *Self, + self: *CodeGen, tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register, @@ -1774,7 +1782,7 @@ fn asmRegisterRegisterRegisterImmediate( } fn asmRegisterRegisterImmediate( - self: *Self, + self: *CodeGen, tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register, @@ -1801,7 +1809,7 @@ fn asmRegisterRegisterImmediate( } fn asmRegisterRegisterMemory( - self: *Self, + self: *CodeGen, tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register, @@ -1820,7 +1828,7 @@ fn asmRegisterRegisterMemory( } fn asmRegisterRegisterMemoryRegister( - self: *Self, + self: *CodeGen, tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register, @@ -1840,7 +1848,7 @@ fn asmRegisterRegisterMemoryRegister( }); } -fn asmMemory(self: *Self, tag: Mir.Inst.FixedTag, m: Memory) !void { +fn asmMemory(self: *CodeGen, tag: Mir.Inst.FixedTag, m: Memory) !void { _ = try self.addInst(.{ .tag = tag[1], .ops = .m, @@ -1851,7 +1859,7 @@ fn asmMemory(self: *Self, tag: Mir.Inst.FixedTag, m: Memory) !void { }); } -fn asmRegisterMemory(self: *Self, tag: Mir.Inst.FixedTag, reg: Register, m: Memory) !void { +fn asmRegisterMemory(self: *CodeGen, tag: Mir.Inst.FixedTag, reg: Register, m: Memory) !void { _ = try self.addInst(.{ .tag = tag[1], .ops = .rm, @@ -1864,7 +1872,7 @@ fn asmRegisterMemory(self: *Self, tag: Mir.Inst.FixedTag, reg: Register, m: Memo } fn asmRegisterMemoryRegister( - self: *Self, + self: *CodeGen, tag: Mir.Inst.FixedTag, reg1: Register, m: Memory, @@ -1883,7 +1891,7 @@ fn asmRegisterMemoryRegister( } fn asmRegisterMemoryImmediate( - self: *Self, + self: *CodeGen, tag: Mir.Inst.FixedTag, reg: Register, m: Memory, @@ -1928,7 +1936,7 @@ fn asmRegisterMemoryImmediate( } fn asmRegisterRegisterMemoryImmediate( - self: *Self, + self: *CodeGen, tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register, @@ -1948,7 +1956,7 @@ fn asmRegisterRegisterMemoryImmediate( }); } -fn asmMemoryRegister(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, reg: Register) !void { +fn asmMemoryRegister(self: *CodeGen, tag: Mir.Inst.FixedTag, m: Memory, reg: Register) !void { _ = try self.addInst(.{ .tag = tag[1], .ops = .mr, @@ -1960,7 +1968,7 @@ fn asmMemoryRegister(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, reg: Regist }); } -fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, imm: Immediate) !void { +fn asmMemoryImmediate(self: *CodeGen, tag: Mir.Inst.FixedTag, m: Memory, imm: Immediate) !void { const payload = try self.addExtra(Mir.Imm32{ .imm = switch (imm) { .signed => |s| @bitCast(s), .unsigned => |u| @intCast(u), @@ -1982,7 +1990,7 @@ fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, imm: Immed } fn asmMemoryRegisterRegister( - self: *Self, + self: *CodeGen, tag: Mir.Inst.FixedTag, m: Memory, reg1: Register, @@ -2001,7 +2009,7 @@ fn asmMemoryRegisterRegister( } fn asmMemoryRegisterImmediate( - self: *Self, + self: *CodeGen, tag: Mir.Inst.FixedTag, m: Memory, reg: Register, @@ -2019,7 +2027,7 @@ fn asmMemoryRegisterImmediate( }); } -fn gen(self: *Self) InnerError!void { +fn gen(self: *CodeGen) InnerError!void { const pt = self.pt; const zcu = pt.zcu; const fn_info = zcu.typeToFunc(self.fn_type).?; @@ -2229,7 +2237,7 @@ fn gen(self: *Self) InnerError!void { }); } -fn checkInvariantsAfterAirInst(self: *Self) void { +fn checkInvariantsAfterAirInst(self: *CodeGen) void { assert(!self.register_manager.lockedRegsExist()); if (std.debug.runtime_safety) { @@ -2245,13 +2253,13 @@ fn checkInvariantsAfterAirInst(self: *Self) void { } } -fn genBodyBlock(self: *Self, body: []const Air.Inst.Index) InnerError!void { +fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void { try self.asmPseudo(.pseudo_dbg_enter_block_none); try self.genBody(body); try self.asmPseudo(.pseudo_dbg_leave_block_none); } -fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { +fn genBody(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void { const pt = self.pt; const zcu = pt.zcu; const ip = &zcu.intern_pool; @@ -2461,15 +2469,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { try slot.moveTo(inst, self); }, .assembly => try self.airAsm(inst), - inline .bit_and, .bit_or, .xor => |tag| if (use_old or self.typeOfIndex(inst).abiSize(zcu) > @as( - u64, - if (!self.typeOfIndex(inst).isVector(zcu)) - 8 - else if (!self.hasFeature(.avx2)) - 16 - else - 32, - )) try self.airBinOp(inst, tag) else { + inline .bit_and, .bit_or, .xor => |tag| if (use_old) try self.airBinOp(inst, tag) else { const bin_op = air_datas[@intFromEnum(inst)].bin_op; var ops = try self.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); try self.spillEflagsIfOccupied(); @@ -2481,30 +2481,109 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { }; var res: [1]Temp = undefined; try self.select(&res, &.{ &ops[0], &ops[1] }, &.{ - .{ .tag = .{ .vp_, mir_tag }, .ops = &.{ .ymm, .ymm, .mem }, .features = &.{.avx2} }, - .{ .tag = .{ .vp_, mir_tag }, .ops = &.{ .ymm, .mem, .ymm }, .commute = .{ 1, 2 }, .features = &.{.avx2} }, - .{ .tag = .{ .vp_, mir_tag }, .ops = &.{ .ymm, .ymm, .ymm }, .features = &.{.avx2} }, - .{ .tag = .{ .vp_, mir_tag }, .ops = &.{ .xmm, .xmm, .mem }, .features = &.{.avx} }, - .{ .tag = .{ .vp_, mir_tag }, .ops = &.{ .xmm, .mem, .xmm }, .commute = .{ 1, 2 }, .features = &.{.avx} }, - .{ .tag = .{ .vp_, mir_tag }, .ops = &.{ .xmm, .xmm, .xmm }, .features = &.{.avx} }, - .{ .tag = .{ .p_, mir_tag }, .ops = &.{ .xmm, .{ .match = 0 }, .mem }, .features = &.{.sse2} }, - .{ .tag = .{ .p_, mir_tag }, .ops = &.{ .xmm, .mem, .{ .match = 0 } }, .features = &.{.sse2} }, - .{ .tag = .{ .p_, mir_tag }, .ops = &.{ .xmm, .{ .match = 0 }, .xmm }, .features = &.{.sse2} }, - .{ .tag = .{ .p_, mir_tag }, .ops = &.{ .xmm, .xmm, .{ .match = 0 } }, .features = &.{.sse2} }, - .{ .tag = .{ .p_, mir_tag }, .ops = &.{ .mm, .{ .match = 0 }, .mem }, .features = &.{.mmx} }, - .{ .tag = .{ .p_, mir_tag }, .ops = &.{ .mm, .mem, .{ .match = 0 } }, .features = &.{.mmx} }, - .{ .tag = .{ .p_, mir_tag }, .ops = &.{ .mm, .{ .match = 0 }, .mm }, .features = &.{.mmx} }, - .{ .tag = .{ .p_, mir_tag }, .ops = &.{ .mm, .mm, .{ .match = 0 } }, .features = &.{.mmx} }, - .{ .tag = .{ ._, mir_tag }, .ops = &.{ .mem, .{ .match = 0 }, .simm32 } }, - .{ .tag = .{ ._, mir_tag }, .ops = &.{ .mem, .simm32, .{ .match = 0 } } }, - .{ .tag = .{ ._, mir_tag }, .ops = &.{ .mem, .{ .match = 0 }, .gpr } }, - .{ .tag = .{ ._, mir_tag }, .ops = &.{ .mem, .gpr, .{ .match = 0 } } }, - .{ .tag = .{ ._, mir_tag }, .ops = &.{ .gpr, .{ .match = 0 }, .simm32 } }, - .{ .tag = .{ ._, mir_tag }, .ops = &.{ .gpr, .simm32, .{ .match = 0 } } }, - .{ .tag = .{ ._, mir_tag }, .ops = &.{ .gpr, .{ .match = 0 }, .mem } }, - .{ .tag = .{ ._, mir_tag }, .ops = &.{ .gpr, .mem, .{ .match = 0 } } }, - .{ .tag = .{ ._, mir_tag }, .ops = &.{ .gpr, .{ .match = 0 }, .gpr } }, - .{ .tag = .{ ._, mir_tag }, .ops = &.{ .gpr, .gpr, .{ .match = 0 } } }, + .{ + .required_features = &.{.avx2}, + .mir_tag = .{ .vp_, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .ymm, .ymm, .mem } }, + .{ .ops = &.{ .ymm, .mem, .ymm }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .ymm, .ymm, .ymm } }, + }, + }, + .{ + .required_features = &.{.avx}, + .mir_tag = .{ .vp_, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .xmm, .xmm, .mem } }, + .{ .ops = &.{ .xmm, .mem, .xmm }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .xmm, .xmm, .xmm } }, + }, + }, + .{ + .required_features = &.{.sse2}, + .mir_tag = .{ .p_, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .xmm, .{ .implicit = 0 }, .mem } }, + .{ .ops = &.{ .xmm, .mem, .{ .implicit = 0 } } }, + .{ .ops = &.{ .xmm, .{ .implicit = 0 }, .xmm } }, + .{ .ops = &.{ .xmm, .xmm, .{ .implicit = 0 } } }, + }, + }, + .{ + .required_features = &.{.mmx}, + .mir_tag = .{ .p_, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .mm, .{ .implicit = 0 }, .mem } }, + .{ .ops = &.{ .mm, .mem, .{ .implicit = 0 } } }, + .{ .ops = &.{ .mm, .{ .implicit = 0 }, .mm } }, + .{ .ops = &.{ .mm, .mm, .{ .implicit = 0 } } }, + }, + }, + .{ + .mir_tag = .{ ._, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .mem, .{ .implicit = 0 }, .simm32 } }, + .{ .ops = &.{ .mem, .simm32, .{ .implicit = 0 } } }, + .{ .ops = &.{ .mem, .{ .implicit = 0 }, .gpr } }, + .{ .ops = &.{ .mem, .gpr, .{ .implicit = 0 } } }, + .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .simm32 } }, + .{ .ops = &.{ .gpr, .simm32, .{ .implicit = 0 } } }, + .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .mem } }, + .{ .ops = &.{ .gpr, .mem, .{ .implicit = 0 } } }, + .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .gpr } }, + .{ .ops = &.{ .gpr, .gpr, .{ .implicit = 0 } } }, + }, + }, + + .{ + .required_features = &.{.avx2}, + .loop = .bitwise, + .mir_tag = .{ .vp_, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .ymm_limb, .{ .explicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .ymm_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .ymm_limb, .ymm_limb, .mem_limb } }, + }, + }, + .{ + .required_features = &.{.avx}, + .loop = .bitwise, + .mir_tag = .{ .vp_, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .xmm_limb, .{ .explicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .xmm_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .xmm_limb, .xmm_limb, .mem_limb } }, + }, + }, + .{ + .required_features = &.{.sse2}, + .loop = .bitwise, + .mir_tag = .{ .p_, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .xmm_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .xmm_limb, .mem_limb, .{ .implicit = 0 } } }, + }, + }, + .{ + .required_features = &.{.mmx}, + .loop = .bitwise, + .mir_tag = .{ .p_, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .mm_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .mm_limb, .mem_limb, .{ .implicit = 0 } } }, + }, + }, + .{ + .loop = .bitwise, + .mir_tag = .{ ._, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .mem_limb, .{ .implicit = 0 }, .gpr_limb } }, + .{ .ops = &.{ .mem_limb, .gpr_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .gpr_limb, .mem_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .gpr_limb } }, + }, + }, }); if (ops[0].index != res[0].index) try ops[0].die(self); if (ops[1].index != res[0].index) try ops[1].die(self); @@ -2620,12 +2699,12 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { while (try ops[0].toLea(self)) {} try self.asmMemoryImmediate( .{ ._, .cmp }, - try ops[0].tracking(self).short.deref().mem(self, if (!opt_repr_is_pl) + try ops[0].tracking(self).short.deref().mem(self, .{ .size = if (!opt_repr_is_pl) .byte else if (opt_child_ty.isSlice(zcu)) .qword else - .fromSize(opt_child_abi_size)), + .fromSize(opt_child_abi_size) }), .u(0), ); var is_null = try self.tempFromValue(self.typeOfIndex(inst), .{ .eflags = .e }); @@ -2643,12 +2722,12 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { while (try ops[0].toLea(self)) {} try self.asmMemoryImmediate( .{ ._, .cmp }, - try ops[0].tracking(self).short.deref().mem(self, if (!opt_repr_is_pl) + try ops[0].tracking(self).short.deref().mem(self, .{ .size = if (!opt_repr_is_pl) .byte else if (opt_child_ty.isSlice(zcu)) .qword else - .fromSize(opt_child_abi_size)), + .fromSize(opt_child_abi_size) }), .u(0), ); var is_non_null = try self.tempFromValue(self.typeOfIndex(inst), .{ .eflags = .ne }); @@ -2666,7 +2745,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { while (try ops[0].toLea(self)) {} try self.asmMemoryImmediate( .{ ._, .cmp }, - try ops[0].tracking(self).short.deref().mem(self, self.memSize(eu_err_ty)), + try ops[0].tracking(self).short.deref().mem(self, .{ .size = self.memSize(eu_err_ty) }), .u(0), ); var is_err = try self.tempFromValue(self.typeOfIndex(inst), .{ .eflags = .ne }); @@ -2684,7 +2763,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { while (try ops[0].toLea(self)) {} try self.asmMemoryImmediate( .{ ._, .cmp }, - try ops[0].tracking(self).short.deref().mem(self, self.memSize(eu_err_ty)), + try ops[0].tracking(self).short.deref().mem(self, .{ .size = self.memSize(eu_err_ty) }), .u(0), ); var is_non_err = try self.tempFromValue(self.typeOfIndex(inst), .{ .eflags = .e }); @@ -2950,7 +3029,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { verbose_tracking_log.debug("{}", .{self.fmtTracking()}); } -fn genLazy(self: *Self, lazy_sym: link.File.LazySymbol) InnerError!void { +fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void { const pt = self.pt; const zcu = pt.zcu; const ip = &zcu.intern_pool; @@ -3017,7 +3096,7 @@ fn genLazy(self: *Self, lazy_sym: link.File.LazySymbol) InnerError!void { } } -fn getValue(self: *Self, value: MCValue, inst: ?Air.Inst.Index) !void { +fn getValue(self: *CodeGen, value: MCValue, inst: ?Air.Inst.Index) !void { for (value.getRegs()) |reg| try self.register_manager.getReg(reg, inst); switch (value) { else => {}, @@ -3025,21 +3104,23 @@ fn getValue(self: *Self, value: MCValue, inst: ?Air.Inst.Index) !void { } } -fn getValueIfFree(self: *Self, value: MCValue, inst: ?Air.Inst.Index) void { +fn getValueIfFree(self: *CodeGen, value: MCValue, inst: ?Air.Inst.Index) void { for (value.getRegs()) |reg| if (self.register_manager.isRegFree(reg)) self.register_manager.getRegAssumeFree(reg, inst); } -fn freeValue(self: *Self, value: MCValue) !void { +fn freeReg(self: *CodeGen, reg: Register) !void { + self.register_manager.freeReg(reg); + if (reg.class() == .x87) try self.asmRegister(.{ .f_, .free }, reg); +} + +fn freeValue(self: *CodeGen, value: MCValue) !void { switch (value) { - .register => |reg| { - self.register_manager.freeReg(reg); - if (reg.class() == .x87) try self.asmRegister(.{ .f_, .free }, reg); - }, - .register_pair => |regs| for (regs) |reg| self.register_manager.freeReg(reg), - .register_offset => |reg_off| self.register_manager.freeReg(reg_off.reg), + .register => |reg| try self.freeReg(reg), + .register_pair => |regs| for (regs) |reg| try self.freeReg(reg), + .register_offset, .indirect => |reg_off| try self.freeReg(reg_off.reg), .register_overflow => |reg_ov| { - self.register_manager.freeReg(reg_ov.reg); + try self.freeReg(reg_ov.reg); self.eflags_inst = null; }, .eflags => self.eflags_inst = null, @@ -3047,16 +3128,16 @@ fn freeValue(self: *Self, value: MCValue) !void { } } -fn feed(self: *Self, bt: *Liveness.BigTomb, operand: Air.Inst.Ref) !void { +fn feed(self: *CodeGen, bt: *Liveness.BigTomb, operand: Air.Inst.Ref) !void { if (bt.feed()) if (operand.toIndex()) |inst| try self.processDeath(inst); } /// Asserts there is already capacity to insert into top branch inst_table. -fn processDeath(self: *Self, inst: Air.Inst.Index) !void { +fn processDeath(self: *CodeGen, inst: Air.Inst.Index) !void { try self.inst_tracking.getPtr(inst).?.die(self, inst); } -fn finishAirResult(self: *Self, inst: Air.Inst.Index, result: MCValue) void { +fn finishAirResult(self: *CodeGen, inst: Air.Inst.Index, result: MCValue) void { if (self.liveness.isUnused(inst) and self.air.instructions.items(.tag)[@intFromEnum(inst)] != .arg) switch (result) { .none, .dead, .unreach => {}, else => unreachable, // Why didn't the result die? @@ -3071,7 +3152,7 @@ fn finishAirResult(self: *Self, inst: Air.Inst.Index, result: MCValue) void { } fn finishAir( - self: *Self, + self: *CodeGen, inst: Air.Inst.Index, result: MCValue, operands: [Liveness.bpi - 1]Air.Inst.Ref, @@ -3092,7 +3173,7 @@ const FrameLayout = struct { }; fn setFrameLoc( - self: *Self, + self: *CodeGen, frame_index: FrameIndex, base: Register, offset: *i32, @@ -3107,7 +3188,7 @@ fn setFrameLoc( offset.* += self.frame_allocs.items(.abi_size)[frame_i]; } -fn computeFrameLayout(self: *Self, cc: std.builtin.CallingConvention) !FrameLayout { +fn computeFrameLayout(self: *CodeGen, cc: std.builtin.CallingConvention) !FrameLayout { const frame_allocs_len = self.frame_allocs.len; try self.frame_locs.resize(self.gpa, frame_allocs_len); const stack_frame_order = try self.gpa.alloc(FrameIndex, frame_allocs_len - FrameIndex.named_count); @@ -3173,16 +3254,16 @@ fn computeFrameLayout(self: *Self, cc: std.builtin.CallingConvention) !FrameLayo }; } -fn getFrameAddrAlignment(self: *Self, frame_addr: bits.FrameAddr) InternPool.Alignment { +fn getFrameAddrAlignment(self: *CodeGen, frame_addr: bits.FrameAddr) InternPool.Alignment { const alloc_align = self.frame_allocs.get(@intFromEnum(frame_addr.index)).abi_align; return @enumFromInt(@min(@intFromEnum(alloc_align), @ctz(frame_addr.off))); } -fn getFrameAddrSize(self: *Self, frame_addr: bits.FrameAddr) u32 { +fn getFrameAddrSize(self: *CodeGen, frame_addr: bits.FrameAddr) u32 { return self.frame_allocs.get(@intFromEnum(frame_addr.index)).abi_size - @as(u31, @intCast(frame_addr.off)); } -fn allocFrameIndex(self: *Self, alloc: FrameAlloc) !FrameIndex { +fn allocFrameIndex(self: *CodeGen, alloc: FrameAlloc) !FrameIndex { const frame_allocs_slice = self.frame_allocs.slice(); const frame_size = frame_allocs_slice.items(.abi_size); const frame_align = frame_allocs_slice.items(.abi_align); @@ -3205,7 +3286,7 @@ fn allocFrameIndex(self: *Self, alloc: FrameAlloc) !FrameIndex { } /// Use a pointer instruction as the basis for allocating stack memory. -fn allocMemPtr(self: *Self, inst: Air.Inst.Index) !FrameIndex { +fn allocMemPtr(self: *CodeGen, inst: Air.Inst.Index) !FrameIndex { const pt = self.pt; const zcu = pt.zcu; const ptr_ty = self.typeOfIndex(inst); @@ -3218,15 +3299,15 @@ fn allocMemPtr(self: *Self, inst: Air.Inst.Index) !FrameIndex { })); } -fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue { +fn allocRegOrMem(self: *CodeGen, inst: Air.Inst.Index, reg_ok: bool) !MCValue { return self.allocRegOrMemAdvanced(self.typeOfIndex(inst), inst, reg_ok); } -fn allocTempRegOrMem(self: *Self, elem_ty: Type, reg_ok: bool) !MCValue { +fn allocTempRegOrMem(self: *CodeGen, elem_ty: Type, reg_ok: bool) !MCValue { return self.allocRegOrMemAdvanced(elem_ty, null, reg_ok); } -fn allocRegOrMemAdvanced(self: *Self, ty: Type, inst: ?Air.Inst.Index, reg_ok: bool) !MCValue { +fn allocRegOrMemAdvanced(self: *CodeGen, ty: Type, inst: ?Air.Inst.Index, reg_ok: bool) !MCValue { const pt = self.pt; const zcu = pt.zcu; const abi_size = std.math.cast(u32, ty.abiSize(zcu)) orelse { @@ -3260,7 +3341,7 @@ fn allocRegOrMemAdvanced(self: *Self, ty: Type, inst: ?Air.Inst.Index, reg_ok: b return .{ .load_frame = .{ .index = frame_index } }; } -fn regClassForType(self: *Self, ty: Type) Register.Class { +fn regClassForType(self: *CodeGen, ty: Type) Register.Class { const pt = self.pt; const zcu = pt.zcu; return switch (ty.zigTypeTag(zcu)) { @@ -3289,7 +3370,7 @@ fn regSetForRegClass(rc: Register.Class) RegisterManager.RegisterBitSet { }; } -fn regSetForType(self: *Self, ty: Type) RegisterManager.RegisterBitSet { +fn regSetForType(self: *CodeGen, ty: Type) RegisterManager.RegisterBitSet { return regSetForRegClass(self.regClassForType(ty)); } @@ -3301,14 +3382,14 @@ const State = struct { scope_generation: u32, }; -fn initRetroactiveState(self: *Self) State { +fn initRetroactiveState(self: *CodeGen) State { var state: State = undefined; state.inst_tracking_len = @intCast(self.inst_tracking.count()); state.scope_generation = self.scope_generation; return state; } -fn saveRetroactiveState(self: *Self, state: *State) !void { +fn saveRetroactiveState(self: *CodeGen, state: *State) !void { try self.spillEflagsIfOccupied(); const free_registers = self.register_manager.free_registers; var it = free_registers.iterator(.{ .kind = .unset }); @@ -3320,13 +3401,13 @@ fn saveRetroactiveState(self: *Self, state: *State) !void { state.free_registers = free_registers; } -fn saveState(self: *Self) !State { +fn saveState(self: *CodeGen) !State { var state = self.initRetroactiveState(); try self.saveRetroactiveState(&state); return state; } -fn restoreState(self: *Self, state: State, deaths: []const Air.Inst.Index, comptime opts: struct { +fn restoreState(self: *CodeGen, state: State, deaths: []const Air.Inst.Index, comptime opts: struct { emit_instructions: bool, update_tracking: bool, resurrect: bool, @@ -3414,7 +3495,7 @@ fn restoreState(self: *Self, state: State, deaths: []const Air.Inst.Index, compt } } -pub fn spillInstruction(self: *Self, reg: Register, inst: Air.Inst.Index) !void { +pub fn spillInstruction(self: *CodeGen, reg: Register, inst: Air.Inst.Index) !void { const tracking = self.inst_tracking.getPtr(inst) orelse return; for (tracking.getRegs()) |tracked_reg| { if (tracked_reg.id() == reg.id()) break; @@ -3423,7 +3504,7 @@ pub fn spillInstruction(self: *Self, reg: Register, inst: Air.Inst.Index) !void try tracking.trackSpill(self, inst); } -pub fn spillEflagsIfOccupied(self: *Self) !void { +pub fn spillEflagsIfOccupied(self: *CodeGen) !void { if (self.eflags_inst) |inst| { self.eflags_inst = null; const tracking = self.inst_tracking.getPtr(inst).?; @@ -3433,7 +3514,7 @@ pub fn spillEflagsIfOccupied(self: *Self) !void { } } -pub fn spillCallerPreservedRegs(self: *Self, cc: std.builtin.CallingConvention) !void { +pub fn spillCallerPreservedRegs(self: *CodeGen, cc: std.builtin.CallingConvention) !void { switch (cc) { .x86_64_sysv => try self.spillRegisters(abi.getCallerPreservedRegs(.{ .x86_64_sysv = .{} })), .x86_64_win => try self.spillRegisters(abi.getCallerPreservedRegs(.{ .x86_64_win = .{} })), @@ -3441,14 +3522,14 @@ pub fn spillCallerPreservedRegs(self: *Self, cc: std.builtin.CallingConvention) } } -pub fn spillRegisters(self: *Self, comptime registers: []const Register) !void { +pub fn spillRegisters(self: *CodeGen, comptime registers: []const Register) !void { inline for (registers) |reg| try self.register_manager.getKnownReg(reg, null); } /// Copies a value to a register without tracking the register. The register is not considered /// allocated. A second call to `copyToTmpRegister` may return the same register. /// This can have a side effect of spilling instructions to the stack to free up a register. -fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { +fn copyToTmpRegister(self: *CodeGen, ty: Type, mcv: MCValue) !Register { const reg = try self.register_manager.allocReg(null, self.regSetForType(ty)); try self.genSetReg(reg, ty, mcv, .{}); return reg; @@ -3459,7 +3540,7 @@ fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { /// This can have a side effect of spilling instructions to the stack to free up a register. /// WARNING make sure that the allocated register matches the returned MCValue from an instruction! fn copyToRegisterWithInstTracking( - self: *Self, + self: *CodeGen, reg_owner: Air.Inst.Index, ty: Type, mcv: MCValue, @@ -3469,12 +3550,12 @@ fn copyToRegisterWithInstTracking( return MCValue{ .register = reg }; } -fn airAlloc(self: *Self, inst: Air.Inst.Index) !void { +fn airAlloc(self: *CodeGen, inst: Air.Inst.Index) !void { const result = MCValue{ .lea_frame = .{ .index = try self.allocMemPtr(inst) } }; return self.finishAir(inst, result, .{ .none, .none, .none }); } -fn airRetPtr(self: *Self, inst: Air.Inst.Index) !void { +fn airRetPtr(self: *CodeGen, inst: Air.Inst.Index) !void { const result: MCValue = switch (self.ret_mcv.long) { else => unreachable, .none => .{ .lea_frame = .{ .index = try self.allocMemPtr(inst) } }, @@ -3490,7 +3571,7 @@ fn airRetPtr(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ .none, .none, .none }); } -fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { +fn airFptrunc(self: *CodeGen, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; const dst_ty = self.typeOfIndex(inst); const dst_bits = dst_ty.floatBits(self.target.*); @@ -3562,7 +3643,7 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { .{ .v_ss, .cvtsd2 }, dst_reg, dst_reg, - try src_mcv.mem(self, .qword), + try src_mcv.mem(self, .{ .size = .qword }), ) else try self.asmRegisterRegisterRegister( .{ .v_ss, .cvtsd2 }, dst_reg, @@ -3574,7 +3655,7 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { ) else if (src_mcv.isMemory()) try self.asmRegisterMemory( .{ ._ss, .cvtsd2 }, dst_reg, - try src_mcv.mem(self, .qword), + try src_mcv.mem(self, .{ .size = .qword }), ) else try self.asmRegisterRegister( .{ ._ss, .cvtsd2 }, dst_reg, @@ -3589,7 +3670,7 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn airFpext(self: *Self, inst: Air.Inst.Index) !void { +fn airFpext(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -3674,7 +3755,7 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { .{ .v_sd, .cvtss2 }, dst_alias, dst_alias, - try src_mcv.mem(self, self.memSize(src_ty)), + try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }), ) else try self.asmRegisterRegisterRegister( .{ .v_sd, .cvtss2 }, dst_alias, @@ -3687,7 +3768,7 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { 2...4 => if (src_mcv.isMemory()) try self.asmRegisterMemory( .{ .v_pd, .cvtps2 }, dst_alias, - try src_mcv.mem(self, self.memSize(src_ty)), + try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }), ) else try self.asmRegisterRegister( .{ .v_pd, .cvtps2 }, dst_alias, @@ -3704,7 +3785,7 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { else => break :result null, }, dst_alias, - try src_mcv.mem(self, self.memSize(src_ty)), + try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }), ) else try self.asmRegisterRegister( switch (vec_len) { 1 => .{ ._sd, .cvtss2 }, @@ -3725,7 +3806,7 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { +fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -3841,7 +3922,7 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { if (src_mcv.isMemory()) try self.asmRegisterMemory( mir_tag, dst_alias, - try src_mcv.mem(self, self.memSize(src_ty)), + try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }), ) else try self.asmRegisterRegister( mir_tag, dst_alias, @@ -3984,7 +4065,7 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { +fn airTrunc(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -4080,7 +4161,7 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { .{ .vp_, .@"and" }, dst_alias, dst_alias, - try splat_addr_mcv.deref().mem(self, .fromSize(splat_abi_size)), + try splat_addr_mcv.deref().mem(self, .{ .size = .fromSize(splat_abi_size) }), ); if (src_abi_size > 16) { const temp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse); @@ -4104,7 +4185,7 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegisterMemory( .{ .p_, .@"and" }, dst_alias, - try splat_addr_mcv.deref().mem(self, .fromSize(splat_abi_size)), + try splat_addr_mcv.deref().mem(self, .{ .size = .fromSize(splat_abi_size) }), ); try self.asmRegisterRegister(mir_tag, dst_alias, dst_alias); } @@ -4130,7 +4211,7 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn airIntFromBool(self: *Self, inst: Air.Inst.Index) !void { +fn airIntFromBool(self: *CodeGen, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; const ty = self.typeOfIndex(inst); @@ -4143,7 +4224,7 @@ fn airIntFromBool(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); } -fn airSlice(self: *Self, inst: Air.Inst.Index) !void { +fn airSlice(self: *CodeGen, inst: Air.Inst.Index) !void { const zcu = self.pt.zcu; const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; @@ -4167,13 +4248,13 @@ fn airSlice(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } -fn airUnOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { +fn airUnOp(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; const dst_mcv = try self.genUnOp(inst, tag, ty_op.operand); return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } -fn airBinOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { +fn airBinOp(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { const pt = self.pt; const zcu = pt.zcu; const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; @@ -4208,14 +4289,14 @@ fn airBinOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); } -fn airPtrArithmetic(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { +fn airPtrArithmetic(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; const dst_mcv = try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs); return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); } -fn activeIntBits(self: *Self, dst_air: Air.Inst.Ref) u16 { +fn activeIntBits(self: *CodeGen, dst_air: Air.Inst.Ref) u16 { const pt = self.pt; const zcu = pt.zcu; const air_tag = self.air.instructions.items(.tag); @@ -4250,7 +4331,7 @@ fn activeIntBits(self: *Self, dst_air: Air.Inst.Ref) u16 { return dst_info.bits; } -fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void { +fn airMulDivBinOp(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; @@ -4314,7 +4395,7 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void { if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory( .{ ._, .mov }, tmp_reg, - try mat_lhs_mcv.address().offset(8).deref().mem(self, .qword), + try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), ) else try self.asmRegisterRegister( .{ ._, .mov }, tmp_reg, @@ -4338,7 +4419,7 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void { if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory( .{ ._, .xor }, tmp_reg, - try mat_rhs_mcv.address().offset(8).deref().mem(self, .qword), + try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), ) else try self.asmRegisterRegister( .{ ._, .xor }, tmp_reg, @@ -4441,12 +4522,12 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegisterMemory( .{ ._, .add }, tmp_regs[0], - try mat_rhs_mcv.mem(self, .qword), + try mat_rhs_mcv.mem(self, .{ .size = .qword }), ); try self.asmRegisterMemory( .{ ._, .adc }, tmp_regs[1], - try mat_rhs_mcv.address().offset(8).deref().mem(self, .qword), + try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), ); } else for ( [_]Mir.Inst.Tag{ .add, .adc }, @@ -4476,7 +4557,7 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } -fn airAddSat(self: *Self, inst: Air.Inst.Index) !void { +fn airAddSat(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; @@ -4577,7 +4658,7 @@ fn airAddSat(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); } -fn airSubSat(self: *Self, inst: Air.Inst.Index) !void { +fn airSubSat(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; @@ -4671,7 +4752,7 @@ fn airSubSat(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); } -fn airMulSat(self: *Self, inst: Air.Inst.Index) !void { +fn airMulSat(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; @@ -4715,7 +4796,7 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void { if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory( .{ ._, .mov }, tmp_reg, - try mat_lhs_mcv.address().offset(8).deref().mem(self, .qword), + try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), ) else try self.asmRegisterRegister( .{ ._, .mov }, tmp_reg, @@ -4739,7 +4820,7 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void { if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory( .{ ._, .xor }, tmp_reg, - try mat_rhs_mcv.address().offset(8).deref().mem(self, .qword), + try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), ) else try self.asmRegisterRegister( .{ ._, .xor }, tmp_reg, @@ -4748,7 +4829,7 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegisterImmediate(.{ ._r, .sa }, tmp_reg, .u(63)); try self.asmRegister(.{ ._, .not }, tmp_reg); - try self.asmMemoryImmediate(.{ ._, .cmp }, try overflow.mem(self, .dword), .s(0)); + try self.asmMemoryImmediate(.{ ._, .cmp }, try overflow.mem(self, .{ .size = .dword }), .s(0)); try self.freeValue(overflow); try self.asmCmovccRegisterRegister(.ne, dst_mcv.register_pair[0], tmp_reg); try self.asmRegisterImmediate(.{ ._c, .bt }, tmp_reg, .u(63)); @@ -4818,7 +4899,7 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } -fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void { +fn airAddSubWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; @@ -4883,7 +4964,7 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } -fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { +fn airShlWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; @@ -4961,7 +5042,7 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { } fn genSetFrameTruncatedOverflowCompare( - self: *Self, + self: *CodeGen, tuple_ty: Type, frame_index: FrameIndex, src_mcv: MCValue, @@ -5039,7 +5120,7 @@ fn genSetFrameTruncatedOverflowCompare( ); } -fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { +fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; @@ -5214,7 +5295,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { ); try self.asmMemoryImmediate( .{ ._, .cmp }, - try overflow.mem(self, self.memSize(Type.c_int)), + try overflow.mem(self, .{ .size = self.memSize(Type.c_int) }), .s(0), ); try self.genSetMem( @@ -5270,7 +5351,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory( .{ ._, .mov }, .rax, - try mat_lhs_mcv.mem(self, .qword), + try mat_lhs_mcv.mem(self, .{ .size = .qword }), ) else try self.asmRegisterRegister( .{ ._, .mov }, .rax, @@ -5279,7 +5360,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory( .{ ._, .mov }, tmp_regs[0], - try mat_rhs_mcv.address().offset(8).deref().mem(self, .qword), + try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), ) else try self.asmRegisterRegister( .{ ._, .mov }, tmp_regs[0], @@ -5290,7 +5371,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegisterRegister(.{ .i_, .mul }, tmp_regs[0], .rax); try self.asmSetccRegister(.o, tmp_regs[2].to8()); if (mat_rhs_mcv.isMemory()) - try self.asmMemory(.{ ._, .mul }, try mat_rhs_mcv.mem(self, .qword)) + try self.asmMemory(.{ ._, .mul }, try mat_rhs_mcv.mem(self, .{ .size = .qword })) else try self.asmRegister(.{ ._, .mul }, mat_rhs_mcv.register_pair[0]); try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_regs[0]); @@ -5299,7 +5380,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory( .{ ._, .mov }, tmp_regs[0], - try mat_lhs_mcv.address().offset(8).deref().mem(self, .qword), + try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), ) else try self.asmRegisterRegister( .{ ._, .mov }, tmp_regs[0], @@ -5316,7 +5397,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory( .{ .i_, .mul }, tmp_regs[0], - try mat_rhs_mcv.mem(self, .qword), + try mat_rhs_mcv.mem(self, .{ .size = .qword }), ) else try self.asmRegisterRegister( .{ .i_, .mul }, tmp_regs[0], @@ -5416,7 +5497,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { /// Generates signed or unsigned integer multiplication/division. /// Clobbers .rax and .rdx registers. /// Quotient is saved in .rax and remainder in .rdx. -fn genIntMulDivOpMir(self: *Self, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue, rhs: MCValue) !void { +fn genIntMulDivOpMir(self: *CodeGen, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue, rhs: MCValue) !void { const pt = self.pt; const abi_size: u32 = @intCast(ty.abiSize(pt.zcu)); const bit_size: u32 = @intCast(self.regBitSize(ty)); @@ -5455,7 +5536,10 @@ fn genIntMulDivOpMir(self: *Self, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue }; switch (mat_rhs) { .register => |reg| try self.asmRegister(tag, registerAlias(reg, abi_size)), - .memory, .indirect, .load_frame => try self.asmMemory(tag, try mat_rhs.mem(self, .fromSize(abi_size))), + .memory, .indirect, .load_frame => try self.asmMemory( + tag, + try mat_rhs.mem(self, .{ .size = .fromSize(abi_size) }), + ), else => unreachable, } if (tag[1] == .div and bit_size == 8) try self.asmRegisterRegister(.{ ._, .mov }, .dl, .ah); @@ -5463,7 +5547,7 @@ fn genIntMulDivOpMir(self: *Self, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue /// Always returns a register. /// Clobbers .rax and .rdx registers. -fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCValue { +fn genInlineIntDivFloor(self: *CodeGen, ty: Type, lhs: MCValue, rhs: MCValue) !MCValue { const pt = self.pt; const zcu = pt.zcu; const abi_size: u32 = @intCast(ty.abiSize(zcu)); @@ -5516,7 +5600,7 @@ fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCVa return MCValue{ .register = divisor }; } -fn airShlShrBinOp(self: *Self, inst: Air.Inst.Index) !void { +fn airShlShrBinOp(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; @@ -5784,14 +5868,14 @@ fn airShlShrBinOp(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } -fn airShlSat(self: *Self, inst: Air.Inst.Index) !void { +fn airShlSat(self: *CodeGen, inst: Air.Inst.Index) !void { const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; _ = bin_op; return self.fail("TODO implement shl_sat for {}", .{self.target.cpu.arch}); //return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } -fn airOptionalPayload(self: *Self, inst: Air.Inst.Index) !void { +fn airOptionalPayload(self: *CodeGen, inst: Air.Inst.Index) !void { const zcu = self.pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; const result: MCValue = result: { @@ -5824,7 +5908,7 @@ fn airOptionalPayload(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn airOptionalPayloadPtr(self: *Self, inst: Air.Inst.Index) !void { +fn airOptionalPayloadPtr(self: *CodeGen, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; const dst_ty = self.typeOfIndex(inst); @@ -5837,7 +5921,7 @@ fn airOptionalPayloadPtr(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } -fn airOptionalPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { +fn airOptionalPayloadPtrSet(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -5878,7 +5962,7 @@ fn airOptionalPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn airUnwrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { +fn airUnwrapErrUnionErr(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -5923,7 +6007,7 @@ fn airUnwrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn airUnwrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void { +fn airUnwrapErrUnionPayload(self: *CodeGen, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; const operand_ty = self.typeOf(ty_op.operand); const operand = try self.resolveInst(ty_op.operand); @@ -5932,7 +6016,7 @@ fn airUnwrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void { } // *(E!T) -> E -fn airUnwrapErrUnionErrPtr(self: *Self, inst: Air.Inst.Index) !void { +fn airUnwrapErrUnionErrPtr(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -5972,7 +6056,7 @@ fn airUnwrapErrUnionErrPtr(self: *Self, inst: Air.Inst.Index) !void { } // *(E!T) -> *T -fn airUnwrapErrUnionPayloadPtr(self: *Self, inst: Air.Inst.Index) !void { +fn airUnwrapErrUnionPayloadPtr(self: *CodeGen, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; const operand_ty = self.typeOf(ty_op.operand); const operand = try self.resolveInst(ty_op.operand); @@ -5980,7 +6064,7 @@ fn airUnwrapErrUnionPayloadPtr(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { +fn airErrUnionPayloadPtrSet(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -6037,7 +6121,7 @@ fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { } fn genUnwrapErrUnionPayloadMir( - self: *Self, + self: *CodeGen, maybe_inst: ?Air.Inst.Index, err_union_ty: Type, err_union: MCValue, @@ -6087,7 +6171,7 @@ fn genUnwrapErrUnionPayloadMir( } fn genUnwrapErrUnionPayloadPtrMir( - self: *Self, + self: *CodeGen, maybe_inst: ?Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue, @@ -6110,23 +6194,23 @@ fn genUnwrapErrUnionPayloadPtrMir( return result; } -fn airErrReturnTrace(self: *Self, inst: Air.Inst.Index) !void { +fn airErrReturnTrace(self: *CodeGen, inst: Air.Inst.Index) !void { _ = inst; return self.fail("TODO implement airErrReturnTrace for {}", .{self.target.cpu.arch}); //return self.finishAir(inst, result, .{ .none, .none, .none }); } -fn airSetErrReturnTrace(self: *Self, inst: Air.Inst.Index) !void { +fn airSetErrReturnTrace(self: *CodeGen, inst: Air.Inst.Index) !void { _ = inst; return self.fail("TODO implement airSetErrReturnTrace for {}", .{self.target.cpu.arch}); } -fn airSaveErrReturnTraceIndex(self: *Self, inst: Air.Inst.Index) !void { +fn airSaveErrReturnTraceIndex(self: *CodeGen, inst: Air.Inst.Index) !void { _ = inst; return self.fail("TODO implement airSaveErrReturnTraceIndex for {}", .{self.target.cpu.arch}); } -fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void { +fn airWrapOptional(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -6181,7 +6265,7 @@ fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void { } /// T to E!T -fn airWrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void { +fn airWrapErrUnionPayload(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -6205,7 +6289,7 @@ fn airWrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void { } /// E to E!T -fn airWrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { +fn airWrapErrUnionErr(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -6228,7 +6312,7 @@ fn airWrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn airSlicePtr(self: *Self, inst: Air.Inst.Index) !void { +fn airSlicePtr(self: *CodeGen, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; const result = result: { const src_mcv = try self.resolveInst(ty_op.operand); @@ -6251,7 +6335,7 @@ fn airSlicePtr(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn airSliceLen(self: *Self, inst: Air.Inst.Index) !void { +fn airSliceLen(self: *CodeGen, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; const result = result: { const src_mcv = try self.resolveInst(ty_op.operand); @@ -6279,7 +6363,7 @@ fn airSliceLen(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn airPtrSliceLenPtr(self: *Self, inst: Air.Inst.Index) !void { +fn airPtrSliceLenPtr(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -6314,7 +6398,7 @@ fn airPtrSliceLenPtr(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } -fn airPtrSlicePtrPtr(self: *Self, inst: Air.Inst.Index) !void { +fn airPtrSlicePtrPtr(self: *CodeGen, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; const dst_ty = self.typeOfIndex(inst); @@ -6327,7 +6411,7 @@ fn airPtrSlicePtrPtr(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } -fn elemOffset(self: *Self, index_ty: Type, index: MCValue, elem_size: u64) !Register { +fn elemOffset(self: *CodeGen, index_ty: Type, index: MCValue, elem_size: u64) !Register { const reg: Register = blk: { switch (index) { .immediate => |imm| { @@ -6347,7 +6431,7 @@ fn elemOffset(self: *Self, index_ty: Type, index: MCValue, elem_size: u64) !Regi return reg; } -fn genSliceElemPtr(self: *Self, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue { +fn genSliceElemPtr(self: *CodeGen, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue { const pt = self.pt; const zcu = pt.zcu; const slice_ty = self.typeOf(lhs); @@ -6384,7 +6468,7 @@ fn genSliceElemPtr(self: *Self, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue { return MCValue{ .register = addr_reg.to64() }; } -fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void { +fn airSliceElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; @@ -6403,14 +6487,14 @@ fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } -fn airSliceElemPtr(self: *Self, inst: Air.Inst.Index) !void { +fn airSliceElemPtr(self: *CodeGen, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; const dst_mcv = try self.genSliceElemPtr(extra.lhs, extra.rhs); return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none }); } -fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { +fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; @@ -6463,7 +6547,7 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { }, .load_frame => try self.asmMemoryRegister( .{ ._, .bt }, - try array_mcv.mem(self, .qword), + try array_mcv.mem(self, .{ .size = .qword }), index_reg.to64(), ), .memory, .load_symbol, .load_direct, .load_got, .load_tlv => try self.asmMemoryRegister( @@ -6540,7 +6624,7 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } -fn airPtrElemVal(self: *Self, inst: Air.Inst.Index) !void { +fn airPtrElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; @@ -6591,7 +6675,7 @@ fn airPtrElemVal(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } -fn airPtrElemPtr(self: *Self, inst: Air.Inst.Index) !void { +fn airPtrElemPtr(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; @@ -6637,7 +6721,7 @@ fn airPtrElemPtr(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, .none }); } -fn airSetUnionTag(self: *Self, inst: Air.Inst.Index) !void { +fn airSetUnionTag(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; @@ -6682,7 +6766,7 @@ fn airSetUnionTag(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none }); } -fn airGetUnionTag(self: *Self, inst: Air.Inst.Index) !void { +fn airGetUnionTag(self: *CodeGen, inst: Air.Inst.Index) !void { const zcu = self.pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -6739,7 +6823,7 @@ fn airGetUnionTag(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } -fn airClz(self: *Self, inst: Air.Inst.Index) !void { +fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -6936,7 +7020,7 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn airCtz(self: *Self, inst: Air.Inst.Index) !void { +fn airCtz(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -7098,7 +7182,7 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn airPopCount(self: *Self, inst: Air.Inst.Index) !void { +fn airPopCount(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -7157,7 +7241,7 @@ fn airPopCount(self: *Self, inst: Air.Inst.Index) !void { } fn genPopCount( - self: *Self, + self: *CodeGen, dst_reg: Register, src_ty: Type, src_mcv: MCValue, @@ -7249,7 +7333,7 @@ fn genPopCount( } fn genByteSwap( - self: *Self, + self: *CodeGen, inst: Air.Inst.Index, src_ty: Type, src_mcv: MCValue, @@ -7308,7 +7392,7 @@ fn genByteSwap( try self.asmRegisterMemory( .{ ._, if (has_movbe) .movbe else .mov }, dst_reg.to64(), - try src_mcv.address().offset(@intCast(limb_index * 8)).deref().mem(self, .qword), + try src_mcv.address().offset(@intCast(limb_index * 8)).deref().mem(self, .{ .size = .qword }), ); if (!has_movbe) try self.asmRegister(.{ ._, .bswap }, dst_reg.to64()); } else { @@ -7414,7 +7498,7 @@ fn genByteSwap( return dst_mcv; } -fn airByteSwap(self: *Self, inst: Air.Inst.Index) !void { +fn airByteSwap(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -7437,7 +7521,7 @@ fn airByteSwap(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } -fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { +fn airBitReverse(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -7560,7 +7644,7 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } -fn floatSign(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, ty: Type) !void { +fn floatSign(self: *CodeGen, inst: Air.Inst.Index, operand: Air.Inst.Ref, ty: Type) !void { const pt = self.pt; const zcu = pt.zcu; const tag = self.air.instructions.items(.tag)[@intFromEnum(inst)]; @@ -7623,7 +7707,7 @@ fn floatSign(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, ty: Type) else => unreachable, }); const sign_mem: Memory = if (sign_mcv.isMemory()) - try sign_mcv.mem(self, .fromSize(abi_size)) + try sign_mcv.mem(self, .{ .size = .fromSize(abi_size) }) else .{ .base = .{ .reg = try self.copyToTmpRegister(Type.usize, sign_mcv.address()) }, @@ -7688,7 +7772,7 @@ fn floatSign(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, ty: Type) return self.finishAir(inst, result, .{ operand, .none, .none }); } -fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void { +fn airFloatSign(self: *CodeGen, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; const ty = self.typeOf(un_op); return self.floatSign(inst, un_op, ty); @@ -7713,7 +7797,7 @@ const RoundMode = packed struct(u5) { } = .normal, }; -fn airRound(self: *Self, inst: Air.Inst.Index, mode: RoundMode) !void { +fn airRound(self: *CodeGen, inst: Air.Inst.Index, mode: RoundMode) !void { const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; const ty = self.typeOf(un_op); @@ -7737,7 +7821,7 @@ fn airRound(self: *Self, inst: Air.Inst.Index, mode: RoundMode) !void { return self.finishAir(inst, result, .{ un_op, .none, .none }); } -fn getRoundTag(self: *Self, ty: Type) ?Mir.Inst.FixedTag { +fn getRoundTag(self: *CodeGen, ty: Type) ?Mir.Inst.FixedTag { const pt = self.pt; const zcu = pt.zcu; return if (self.hasFeature(.sse4_1)) switch (ty.zigTypeTag(zcu)) { @@ -7770,7 +7854,7 @@ fn getRoundTag(self: *Self, ty: Type) ?Mir.Inst.FixedTag { } else null; } -fn genRoundLibcall(self: *Self, ty: Type, src_mcv: MCValue, mode: RoundMode) !MCValue { +fn genRoundLibcall(self: *CodeGen, ty: Type, src_mcv: MCValue, mode: RoundMode) !MCValue { const pt = self.pt; const zcu = pt.zcu; if (self.getRoundTag(ty)) |_| return .none; @@ -7795,7 +7879,7 @@ fn genRoundLibcall(self: *Self, ty: Type, src_mcv: MCValue, mode: RoundMode) !MC } }, &.{ty}, &.{src_mcv}); } -fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: RoundMode) !void { +fn genRound(self: *CodeGen, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: RoundMode) !void { const pt = self.pt; const mir_tag = self.getRoundTag(ty) orelse { const result = try self.genRoundLibcall(ty, src_mcv, mode); @@ -7808,7 +7892,7 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: Ro mir_tag, dst_alias, dst_alias, - try src_mcv.mem(self, .fromSize(abi_size)), + try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }), .u(@as(u5, @bitCast(mode))), ) else try self.asmRegisterRegisterRegisterImmediate( mir_tag, @@ -7823,7 +7907,7 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: Ro else => if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( mir_tag, dst_alias, - try src_mcv.mem(self, .fromSize(abi_size)), + try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }), .u(@as(u5, @bitCast(mode))), ) else try self.asmRegisterRegisterImmediate( mir_tag, @@ -7837,7 +7921,7 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: Ro } } -fn airAbs(self: *Self, inst: Air.Inst.Index) !void { +fn airAbs(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -7865,7 +7949,7 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void { .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory( .l, registerAlias(dst_mcv.register, cmov_abi_size), - try src_mcv.mem(self, .fromSize(cmov_abi_size)), + try src_mcv.mem(self, .{ .size = .fromSize(cmov_abi_size) }), ), else => { const val_reg = try self.copyToTmpRegister(ty, src_mcv); @@ -7931,7 +8015,7 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void { try self.asmMemoryImmediate( .{ ._, .cmp }, - try dst_mcv.address().offset((limb_len - 1) * 8).deref().mem(self, .qword), + try dst_mcv.address().offset((limb_len - 1) * 8).deref().mem(self, .{ .size = .qword }), .u(0), ); const positive = try self.asmJccReloc(.ns, undefined); @@ -8024,7 +8108,7 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void { if (src_mcv.isMemory()) try self.asmRegisterMemory( mir_tag, dst_alias, - try src_mcv.mem(self, self.memSize(ty)), + try src_mcv.mem(self, .{ .size = self.memSize(ty) }), ) else try self.asmRegisterRegister( mir_tag, dst_alias, @@ -8038,7 +8122,7 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { +fn airSqrt(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; @@ -8131,9 +8215,9 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { if (src_mcv.isMemory()) try self.asmRegisterMemory( .{ .v_ps, .cvtph2 }, wide_reg, - try src_mcv.mem(self, .fromSize( + try src_mcv.mem(self, .{ .size = .fromSize( @intCast(@divExact(wide_reg.bitSize(), 16)), - )), + ) }), ) else try self.asmRegisterRegister( .{ .v_ps, .cvtph2 }, wide_reg, @@ -8177,7 +8261,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { mir_tag, dst_reg, dst_reg, - try src_mcv.mem(self, .fromSize(abi_size)), + try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }), ) else try self.asmRegisterRegisterRegister( mir_tag, dst_reg, @@ -8190,7 +8274,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { else => if (src_mcv.isMemory()) try self.asmRegisterMemory( mir_tag, dst_reg, - try src_mcv.mem(self, .fromSize(abi_size)), + try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }), ) else try self.asmRegisterRegister( mir_tag, dst_reg, @@ -8205,7 +8289,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ un_op, .none, .none }); } -fn airUnaryMath(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { +fn airUnaryMath(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; const ty = self.typeOf(un_op); var callee_buf: ["__round?".len]u8 = undefined; @@ -8234,7 +8318,7 @@ fn airUnaryMath(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { } fn reuseOperand( - self: *Self, + self: *CodeGen, inst: Air.Inst.Index, operand: Air.Inst.Ref, op_index: Liveness.OperandInt, @@ -8244,7 +8328,7 @@ fn reuseOperand( } fn reuseOperandAdvanced( - self: *Self, + self: *CodeGen, inst: Air.Inst.Index, operand: Air.Inst.Ref, op_index: Liveness.OperandInt, @@ -8282,7 +8366,7 @@ fn reuseOperandAdvanced( return true; } -fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void { +fn packedLoad(self: *CodeGen, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void { const pt = self.pt; const zcu = pt.zcu; @@ -8390,7 +8474,7 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) Inn try self.genCopy(val_ty, dst_mcv, .{ .register = dst_reg }, .{}); } -fn load(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void { +fn load(self: *CodeGen, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void { const pt = self.pt; const zcu = pt.zcu; const dst_ty = ptr_ty.childType(zcu); @@ -8433,7 +8517,7 @@ fn load(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerErro } } -fn airLoad(self: *Self, inst: Air.Inst.Index) !void { +fn airLoad(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -8492,7 +8576,7 @@ fn airLoad(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerError!void { +fn packedStore(self: *CodeGen, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerError!void { const pt = self.pt; const zcu = pt.zcu; const ptr_info = ptr_ty.ptrInfo(zcu); @@ -8593,7 +8677,7 @@ fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) In } fn store( - self: *Self, + self: *CodeGen, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue, @@ -8641,7 +8725,7 @@ fn store( } } -fn airStore(self: *Self, inst: Air.Inst.Index, safety: bool) !void { +fn airStore(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void { const pt = self.pt; const zcu = pt.zcu; const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; @@ -8667,20 +8751,20 @@ fn airStore(self: *Self, inst: Air.Inst.Index, safety: bool) !void { return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none }); } -fn airStructFieldPtr(self: *Self, inst: Air.Inst.Index) !void { +fn airStructFieldPtr(self: *CodeGen, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; const extra = self.air.extraData(Air.StructField, ty_pl.payload).data; const result = try self.fieldPtr(inst, extra.struct_operand, extra.field_index); return self.finishAir(inst, result, .{ extra.struct_operand, .none, .none }); } -fn airStructFieldPtrIndex(self: *Self, inst: Air.Inst.Index, field_index: u8) !void { +fn airStructFieldPtrIndex(self: *CodeGen, inst: Air.Inst.Index, field_index: u8) !void { const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; const result = try self.fieldPtr(inst, ty_op.operand, field_index); return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn fieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, field_index: u32) !MCValue { +fn fieldPtr(self: *CodeGen, inst: Air.Inst.Index, operand: Air.Inst.Ref, field_index: u32) !MCValue { const ptr_field_ty = self.typeOfIndex(inst); const src_mcv = try self.resolveInst(operand); @@ -8692,7 +8776,7 @@ fn fieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, field_inde return dst_mcv.offset(self.fieldOffset(self.typeOf(operand), ptr_field_ty, field_index)); } -fn fieldOffset(self: *Self, ptr_agg_ty: Type, ptr_field_ty: Type, field_index: u32) i32 { +fn fieldOffset(self: *CodeGen, ptr_agg_ty: Type, ptr_field_ty: Type, field_index: u32) i32 { const pt = self.pt; const zcu = pt.zcu; const agg_ty = ptr_agg_ty.childType(zcu); @@ -8704,7 +8788,7 @@ fn fieldOffset(self: *Self, ptr_agg_ty: Type, ptr_field_ty: Type, field_index: u }; } -fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { +fn airStructFieldVal(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; @@ -8993,7 +9077,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ extra.struct_operand, .none, .none }); } -fn airFieldParentPtr(self: *Self, inst: Air.Inst.Index) !void { +fn airFieldParentPtr(self: *CodeGen, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; const extra = self.air.extraData(Air.FieldParentPtr, ty_pl.payload).data; @@ -9008,7 +9092,7 @@ fn airFieldParentPtr(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ extra.field_ptr, .none, .none }); } -fn genUnOp(self: *Self, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: Air.Inst.Ref) !MCValue { +fn genUnOp(self: *CodeGen, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: Air.Inst.Ref) !MCValue { const pt = self.pt; const zcu = pt.zcu; const src_ty = self.typeOf(src_air); @@ -9097,7 +9181,7 @@ fn genUnOp(self: *Self, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: return dst_mcv; } -fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv: MCValue) !void { +fn genUnOpMir(self: *CodeGen, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv: MCValue) !void { const pt = self.pt; const abi_size: u32 = @intCast(dst_ty.abiSize(pt.zcu)); if (abi_size > 8) return self.fail("TODO implement {} for {}", .{ mir_tag, dst_ty.fmt(pt) }); @@ -9133,14 +9217,14 @@ fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv: MC }, .indirect, .load_frame => try self.asmMemory( mir_tag, - try dst_mcv.mem(self, .fromSize(abi_size)), + try dst_mcv.mem(self, .{ .size = .fromSize(abi_size) }), ), } } /// Clobbers .rcx for non-immediate shift value. fn genShiftBinOpMir( - self: *Self, + self: *CodeGen, tag: Mir.Inst.FixedTag, lhs_ty: Type, lhs_mcv: MCValue, @@ -9773,7 +9857,7 @@ fn genShiftBinOpMir( /// Clobbers .rcx for non-immediate rhs, therefore care is needed to spill .rcx upfront. /// Asserts .rcx is free. fn genShiftBinOp( - self: *Self, + self: *CodeGen, air_tag: Air.Inst.Tag, maybe_inst: ?Air.Inst.Index, lhs_mcv: MCValue, @@ -9832,7 +9916,7 @@ fn genShiftBinOp( /// Clobbers .rax and .rdx therefore care is needed to spill .rax and .rdx upfront. /// Asserts .rax and .rdx are free. fn genMulDivBinOp( - self: *Self, + self: *CodeGen, tag: Air.Inst.Tag, maybe_inst: ?Air.Inst.Index, dst_ty: Type, @@ -9891,27 +9975,27 @@ fn genMulDivBinOp( defer self.register_manager.unlockReg(tmp_lock); if (mat_lhs_mcv.isMemory()) - try self.asmRegisterMemory(.{ ._, .mov }, .rax, try mat_lhs_mcv.mem(self, .qword)) + try self.asmRegisterMemory(.{ ._, .mov }, .rax, try mat_lhs_mcv.mem(self, .{ .size = .qword })) else try self.asmRegisterRegister(.{ ._, .mov }, .rax, mat_lhs_mcv.register_pair[0]); if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory( .{ ._, .mov }, tmp_reg, - try mat_rhs_mcv.address().offset(8).deref().mem(self, .qword), + try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), ) else try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, mat_rhs_mcv.register_pair[1]); try self.asmRegisterRegister(.{ .i_, .mul }, tmp_reg, .rax); if (mat_rhs_mcv.isMemory()) - try self.asmMemory(.{ ._, .mul }, try mat_rhs_mcv.mem(self, .qword)) + try self.asmMemory(.{ ._, .mul }, try mat_rhs_mcv.mem(self, .{ .size = .qword })) else try self.asmRegister(.{ ._, .mul }, mat_rhs_mcv.register_pair[0]); try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_reg); if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory( .{ ._, .mov }, tmp_reg, - try mat_lhs_mcv.address().offset(8).deref().mem(self, .qword), + try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), ) else try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, mat_lhs_mcv.register_pair[1]); if (mat_rhs_mcv.isMemory()) - try self.asmRegisterMemory(.{ .i_, .mul }, tmp_reg, try mat_rhs_mcv.mem(self, .qword)) + try self.asmRegisterMemory(.{ .i_, .mul }, tmp_reg, try mat_rhs_mcv.mem(self, .{ .size = .qword })) else try self.asmRegisterRegister(.{ .i_, .mul }, tmp_reg, mat_rhs_mcv.register_pair[0]); try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_reg); @@ -10238,7 +10322,7 @@ fn genMulDivBinOp( } fn genBinOp( - self: *Self, + self: *CodeGen, maybe_inst: ?Air.Inst.Index, air_tag: Air.Inst.Tag, lhs_air: Air.Inst.Ref, @@ -10334,7 +10418,7 @@ fn genBinOp( .{ .vp_w, .insr }, dst_reg, dst_reg, - try rhs_mcv.mem(self, .word), + try rhs_mcv.mem(self, .{ .size = .word }), .u(1), ) else try self.asmRegisterRegisterRegister( .{ .vp_, .unpcklwd }, @@ -10359,7 +10443,7 @@ fn genBinOp( mir_tag, dst_reg, dst_reg, - try src_mcv.mem(self, .fromBitSize(float_bits)), + try src_mcv.mem(self, .{ .size = .fromBitSize(float_bits) }), ) else try self.asmRegisterRegisterRegister( mir_tag, dst_reg, @@ -10378,7 +10462,7 @@ fn genBinOp( if (src_mcv.isMemory()) try self.asmRegisterMemory( mir_tag, dst_reg, - try src_mcv.mem(self, .fromBitSize(float_bits)), + try src_mcv.mem(self, .{ .size = .fromBitSize(float_bits) }), ) else try self.asmRegisterRegister( mir_tag, dst_reg, @@ -10651,22 +10735,22 @@ fn genBinOp( try self.asmRegisterMemory( .{ ._, .cmp }, dst_regs[0], - try src_mcv.mem(self, .qword), + try src_mcv.mem(self, .{ .size = .qword }), ); try self.asmRegisterMemory( .{ ._, .sbb }, tmp_reg, - try src_mcv.address().offset(8).deref().mem(self, .qword), + try src_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), ); try self.asmCmovccRegisterMemory( cc, dst_regs[0], - try src_mcv.mem(self, .qword), + try src_mcv.mem(self, .{ .size = .qword }), ); try self.asmCmovccRegisterMemory( cc, dst_regs[1], - try src_mcv.address().offset(8).deref().mem(self, .qword), + try src_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), ); } else { try self.asmRegisterRegister( @@ -10829,7 +10913,7 @@ fn genBinOp( .{ .vp_w, .insr }, dst_reg, dst_reg, - try src_mcv.mem(self, .word), + try src_mcv.mem(self, .{ .size = .word }), .u(1), ) else try self.asmRegisterRegisterRegister( .{ .vp_, .unpcklwd }, @@ -11275,7 +11359,7 @@ fn genBinOp( .{ .vp_w, .insr }, dst_reg, dst_reg, - try src_mcv.mem(self, .word), + try src_mcv.mem(self, .{ .size = .word }), .u(1), ) else try self.asmRegisterRegisterRegister( .{ .vp_, .unpcklwd }, @@ -11321,7 +11405,7 @@ fn genBinOp( if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( .{ .vp_d, .insr }, dst_reg, - try src_mcv.mem(self, .dword), + try src_mcv.mem(self, .{ .size = .dword }), .u(1), ) else try self.asmRegisterRegisterRegister( .{ .v_ps, .unpckl }, @@ -11373,7 +11457,7 @@ fn genBinOp( if (src_mcv.isMemory()) try self.asmRegisterMemory( .{ .v_ps, .cvtph2 }, tmp_reg, - try src_mcv.mem(self, .qword), + try src_mcv.mem(self, .{ .size = .qword }), ) else try self.asmRegisterRegister( .{ .v_ps, .cvtph2 }, tmp_reg, @@ -11416,7 +11500,7 @@ fn genBinOp( if (src_mcv.isMemory()) try self.asmRegisterMemory( .{ .v_ps, .cvtph2 }, tmp_reg, - try src_mcv.mem(self, .xword), + try src_mcv.mem(self, .{ .size = .xword }), ) else try self.asmRegisterRegister( .{ .v_ps, .cvtph2 }, tmp_reg, @@ -11579,10 +11663,10 @@ fn genBinOp( mir_tag, dst_reg, lhs_reg, - try src_mcv.mem(self, switch (lhs_ty.zigTypeTag(zcu)) { + try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) { else => .fromSize(abi_size), .vector => .fromBitSize(dst_reg.bitSize()), - }), + } }), ) else try self.asmRegisterRegisterRegister( mir_tag, dst_reg, @@ -11597,10 +11681,10 @@ fn genBinOp( if (src_mcv.isMemory()) try self.asmRegisterMemory( mir_tag, dst_reg, - try src_mcv.mem(self, switch (lhs_ty.zigTypeTag(zcu)) { + try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) { else => .fromSize(abi_size), .vector => .fromBitSize(dst_reg.bitSize()), - }), + } }), ) else try self.asmRegisterRegister( mir_tag, dst_reg, @@ -11625,10 +11709,10 @@ fn genBinOp( mir_tag, dst_reg, lhs_reg, - try src_mcv.mem(self, switch (lhs_ty.zigTypeTag(zcu)) { + try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) { else => .fromSize(abi_size), .vector => .fromBitSize(dst_reg.bitSize()), - }), + } }), imm, ) else try self.asmRegisterRegisterRegisterImmediate( mir_tag, @@ -11645,10 +11729,10 @@ fn genBinOp( if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( mir_tag, dst_reg, - try src_mcv.mem(self, switch (lhs_ty.zigTypeTag(zcu)) { + try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) { else => .fromSize(abi_size), .vector => .fromBitSize(dst_reg.bitSize()), - }), + } }), imm, ) else try self.asmRegisterRegisterImmediate( mir_tag, @@ -11854,7 +11938,7 @@ fn genBinOp( const unsigned_ty = try lhs_ty.toUnsigned(pt); const not_mcv = try self.genTypedValue(try unsigned_ty.maxInt(pt, unsigned_ty)); const not_mem: Memory = if (not_mcv.isMemory()) - try not_mcv.mem(self, .fromSize(abi_size)) + try not_mcv.mem(self, .{ .size = .fromSize(abi_size) }) else .{ .base = .{ .reg = try self.copyToTmpRegister(Type.usize, not_mcv.address()), @@ -11915,7 +11999,7 @@ fn genBinOp( } fn genBinOpMir( - self: *Self, + self: *CodeGen, mir_tag: Mir.Inst.FixedTag, ty: Type, dst_mcv: MCValue, @@ -12341,7 +12425,7 @@ fn genBinOpMir( /// Performs multi-operand integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv. /// Does not support byte-size operands. -fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void { +fn genIntMulComplexOpMir(self: *CodeGen, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void { const pt = self.pt; const abi_size: u32 = @intCast(dst_ty.abiSize(pt.zcu)); try self.spillEflagsIfOccupied(); @@ -12468,7 +12552,7 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M } } -fn airArg(self: *Self, inst: Air.Inst.Index) !void { +fn airArg(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; // skip zero-bit arguments as they don't have a corresponding arg instruction @@ -12522,7 +12606,7 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void { const dst_mcv = try self.allocRegOrMem(inst, false); if (regs_frame_addr.regs > 0) try self.asmMemoryRegister( .{ ._, .mov }, - try dst_mcv.mem(self, .byte), + try dst_mcv.mem(self, .{ .size = .byte }), prev_reg.to8(), ); try self.genInlineMemset( @@ -12554,7 +12638,7 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void { const unset = try self.asmJccReloc(.e, undefined); try self.asmMemoryRegister( .{ ._s, .bt }, - try dst_mcv.mem(self, .dword), + try dst_mcv.mem(self, .{ .size = .dword }), index_reg.to32(), ); self.performReloc(unset); @@ -12578,7 +12662,7 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ .none, .none, .none }); } -fn airDbgArg(self: *Self, inst: Air.Inst.Index) !void { +fn airDbgArg(self: *CodeGen, inst: Air.Inst.Index) !void { // skip zero-bit arguments as they don't have a corresponding arg instruction var arg_index = self.arg_index; while (self.args[arg_index] == .none) arg_index += 1; @@ -12594,13 +12678,13 @@ fn airDbgArg(self: *Self, inst: Air.Inst.Index) !void { } else try self.airDbgVarArgs(); } -fn airDbgVarArgs(self: *Self) !void { +fn airDbgVarArgs(self: *CodeGen) !void { if (self.pt.zcu.typeToFunc(self.fn_type).?.is_var_args) try self.asmPseudo(.pseudo_dbg_var_args_none); } fn genLocalDebugInfo( - self: *Self, + self: *CodeGen, inst: Air.Inst.Index, mcv: MCValue, ) !void { @@ -12656,19 +12740,19 @@ fn genLocalDebugInfo( } } -fn airRetAddr(self: *Self, inst: Air.Inst.Index) !void { +fn airRetAddr(self: *CodeGen, inst: Air.Inst.Index) !void { const dst_mcv = try self.allocRegOrMem(inst, true); try self.genCopy(Type.usize, dst_mcv, .{ .load_frame = .{ .index = .ret_addr } }, .{}); return self.finishAir(inst, dst_mcv, .{ .none, .none, .none }); } -fn airFrameAddress(self: *Self, inst: Air.Inst.Index) !void { +fn airFrameAddress(self: *CodeGen, inst: Air.Inst.Index) !void { const dst_mcv = try self.allocRegOrMem(inst, true); try self.genCopy(Type.usize, dst_mcv, .{ .lea_frame = .{ .index = .base_ptr } }, .{}); return self.finishAir(inst, dst_mcv, .{ .none, .none, .none }); } -fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier) !void { +fn airCall(self: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModifier) !void { if (modifier == .always_tail) return self.fail("TODO implement tail calls for x86_64", .{}); const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; @@ -12702,7 +12786,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier return self.finishAirResult(inst, result); } -fn genCall(self: *Self, info: union(enum) { +fn genCall(self: *CodeGen, info: union(enum) { air: Air.Inst.Ref, lib: struct { return_type: InternPool.Index, @@ -12808,7 +12892,7 @@ fn genCall(self: *Self, info: union(enum) { const index_lock = self.register_manager.lockRegAssumeUnused(index_reg); defer self.register_manager.unlockReg(index_lock); - const src_mem: Memory = if (src_arg.isMemory()) try src_arg.mem(self, .dword) else .{ + const src_mem: Memory = if (src_arg.isMemory()) try src_arg.mem(self, .{ .size = .dword }) else .{ .base = .{ .reg = try self.copyToTmpRegister( Type.usize, switch (src_arg) { @@ -12900,7 +12984,7 @@ fn genCall(self: *Self, info: union(enum) { .lea_frame = .{ .index = frame_index, .off = -reg_off.off }, }, .{}), .elementwise_regs_then_frame => |regs_frame_addr| { - const src_mem: Memory = if (src_arg.isMemory()) try src_arg.mem(self, .dword) else .{ + const src_mem: Memory = if (src_arg.isMemory()) try src_arg.mem(self, .{ .size = .dword }) else .{ .base = .{ .reg = try self.copyToTmpRegister( Type.usize, switch (src_arg) { @@ -13006,7 +13090,7 @@ fn genCall(self: *Self, info: union(enum) { return call_info.return_value.short; } -fn airRet(self: *Self, inst: Air.Inst.Index, safety: bool) !void { +fn airRet(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void { const pt = self.pt; const zcu = pt.zcu; const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; @@ -13042,7 +13126,7 @@ fn airRet(self: *Self, inst: Air.Inst.Index, safety: bool) !void { try self.exitlude_jump_relocs.append(self.gpa, jmp_reloc); } -fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { +fn airRetLoad(self: *CodeGen, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; const ptr = try self.resolveInst(un_op); @@ -13062,7 +13146,7 @@ fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { try self.exitlude_jump_relocs.append(self.gpa, jmp_reloc); } -fn airCmp(self: *Self, inst: Air.Inst.Index, op: std.math.CompareOperator) !void { +fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !void { const pt = self.pt; const zcu = pt.zcu; const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; @@ -13145,7 +13229,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: std.math.CompareOperator) !void if (lhs_mcv.isMemory()) try self.asmRegisterMemory( .{ ._, .mov }, temp_lhs_reg.to8(), - try lhs_mcv.address().offset(payload_abi_size).deref().mem(self, .byte), + try lhs_mcv.address().offset(payload_abi_size).deref().mem(self, .{ .size = .byte }), ) else { try self.genSetReg(temp_lhs_reg, opt_ty, lhs_mcv, .{}); try self.asmRegisterImmediate( @@ -13158,7 +13242,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: std.math.CompareOperator) !void const payload_compare = payload_compare: { if (rhs_mcv.isMemory()) { const rhs_mem = - try rhs_mcv.address().offset(payload_abi_size).deref().mem(self, .byte); + try rhs_mcv.address().offset(payload_abi_size).deref().mem(self, .{ .size = .byte }); try self.asmMemoryRegister(.{ ._, .@"test" }, rhs_mem, temp_lhs_reg.to8()); const payload_compare = try self.asmJccReloc(.nz, undefined); try self.asmRegisterMemory(.{ ._, .cmp }, temp_lhs_reg.to8(), rhs_mem); @@ -13459,7 +13543,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: std.math.CompareOperator) !void .{ .vp_w, .insr }, tmp1_reg, dst_reg.to128(), - try src_mcv.mem(self, .word), + try src_mcv.mem(self, .{ .size = .word }), .u(1), ) else try self.asmRegisterRegisterRegister( .{ .vp_, .unpcklwd }, @@ -13505,7 +13589,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: std.math.CompareOperator) !void return self.finishAir(inst, .{ .eflags = result }, .{ bin_op.lhs, bin_op.rhs, .none }); } -fn airCmpVector(self: *Self, inst: Air.Inst.Index) !void { +fn airCmpVector(self: *CodeGen, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; const extra = self.air.extraData(Air.VectorCmp, ty_pl.payload).data; const dst_mcv = try self.genBinOp( @@ -13517,7 +13601,7 @@ fn airCmpVector(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none }); } -fn airCmpLtErrorsLen(self: *Self, inst: Air.Inst.Index) !void { +fn airCmpLtErrorsLen(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; @@ -13550,7 +13634,7 @@ fn airCmpLtErrorsLen(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, .{ .eflags = .b }, .{ un_op, .none, .none }); } -fn airTry(self: *Self, inst: Air.Inst.Index) !void { +fn airTry(self: *CodeGen, inst: Air.Inst.Index) !void { const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; const extra = self.air.extraData(Air.Try, pl_op.payload); const body: []const Air.Inst.Index = @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len]); @@ -13559,7 +13643,7 @@ fn airTry(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ .none, .none, .none }); } -fn airTryPtr(self: *Self, inst: Air.Inst.Index) !void { +fn airTryPtr(self: *CodeGen, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; const extra = self.air.extraData(Air.TryPtr, ty_pl.payload); const body: []const Air.Inst.Index = @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len]); @@ -13569,7 +13653,7 @@ fn airTryPtr(self: *Self, inst: Air.Inst.Index) !void { } fn genTry( - self: *Self, + self: *CodeGen, inst: Air.Inst.Index, operand: Air.Inst.Ref, body: []const Air.Inst.Index, @@ -13615,7 +13699,7 @@ fn genTry( return result; } -fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void { +fn airDbgStmt(self: *CodeGen, inst: Air.Inst.Index) !void { const dbg_stmt = self.air.instructions.items(.data)[@intFromEnum(inst)].dbg_stmt; _ = try self.addInst(.{ .tag = .pseudo, @@ -13627,14 +13711,14 @@ fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void { }); } -fn airDbgEmptyStmt(self: *Self) !void { +fn airDbgEmptyStmt(self: *CodeGen) !void { if (self.mir_instructions.len > 0 and self.mir_instructions.items(.ops)[self.mir_instructions.len - 1] == .pseudo_dbg_line_stmt_line_column) self.mir_instructions.items(.ops)[self.mir_instructions.len - 1] = .pseudo_dbg_line_line_column; try self.asmOpOnly(.{ ._, .nop }); } -fn airDbgInlineBlock(self: *Self, inst: Air.Inst.Index) !void { +fn airDbgInlineBlock(self: *CodeGen, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; const extra = self.air.extraData(Air.DbgInlineBlock, ty_pl.payload); const old_inline_func = self.inline_func; @@ -13653,13 +13737,13 @@ fn airDbgInlineBlock(self: *Self, inst: Air.Inst.Index) !void { }); } -fn airDbgVar(self: *Self, inst: Air.Inst.Index) !void { +fn airDbgVar(self: *CodeGen, inst: Air.Inst.Index) !void { const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; try self.genLocalDebugInfo(inst, try self.resolveInst(pl_op.operand)); return self.finishAir(inst, .unreach, .{ pl_op.operand, .none, .none }); } -fn genCondBrMir(self: *Self, ty: Type, mcv: MCValue) !Mir.Inst.Index { +fn genCondBrMir(self: *CodeGen, ty: Type, mcv: MCValue) !Mir.Inst.Index { const pt = self.pt; const abi_size = ty.abiSize(pt.zcu); switch (mcv) { @@ -13686,7 +13770,7 @@ fn genCondBrMir(self: *Self, ty: Type, mcv: MCValue) !Mir.Inst.Index { } } -fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { +fn airCondBr(self: *CodeGen, inst: Air.Inst.Index) !void { const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; const cond = try self.resolveInst(pl_op.operand); const cond_ty = self.typeOf(pl_op.operand); @@ -13731,7 +13815,7 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { // We already took care of pl_op.operand earlier, so there's nothing left to do. } -fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MCValue { +fn isNull(self: *CodeGen, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MCValue { const pt = self.pt; const zcu = pt.zcu; switch (opt_mcv) { @@ -13862,7 +13946,7 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC } } -fn isNullPtr(self: *Self, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) !MCValue { +fn isNullPtr(self: *CodeGen, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) !MCValue { const pt = self.pt; const zcu = pt.zcu; const opt_ty = ptr_ty.childType(zcu); @@ -13899,7 +13983,7 @@ fn isNullPtr(self: *Self, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) return .{ .eflags = .e }; } -fn isErr(self: *Self, maybe_inst: ?Air.Inst.Index, eu_ty: Type, eu_mcv: MCValue) !MCValue { +fn isErr(self: *CodeGen, maybe_inst: ?Air.Inst.Index, eu_ty: Type, eu_mcv: MCValue) !MCValue { const pt = self.pt; const zcu = pt.zcu; const err_ty = eu_ty.errorUnionSet(zcu); @@ -13948,7 +14032,7 @@ fn isErr(self: *Self, maybe_inst: ?Air.Inst.Index, eu_ty: Type, eu_mcv: MCValue) return MCValue{ .eflags = .a }; } -fn isErrPtr(self: *Self, maybe_inst: ?Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) !MCValue { +fn isErrPtr(self: *CodeGen, maybe_inst: ?Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) !MCValue { const pt = self.pt; const zcu = pt.zcu; const eu_ty = ptr_ty.childType(zcu); @@ -13981,7 +14065,7 @@ fn isErrPtr(self: *Self, maybe_inst: ?Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCV return MCValue{ .eflags = .a }; } -fn isNonErr(self: *Self, inst: Air.Inst.Index, eu_ty: Type, eu_mcv: MCValue) !MCValue { +fn isNonErr(self: *CodeGen, inst: Air.Inst.Index, eu_ty: Type, eu_mcv: MCValue) !MCValue { const is_err_res = try self.isErr(inst, eu_ty, eu_mcv); switch (is_err_res) { .eflags => |cc| { @@ -13996,7 +14080,7 @@ fn isNonErr(self: *Self, inst: Air.Inst.Index, eu_ty: Type, eu_mcv: MCValue) !MC } } -fn isNonErrPtr(self: *Self, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) !MCValue { +fn isNonErrPtr(self: *CodeGen, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) !MCValue { const is_err_res = try self.isErrPtr(inst, ptr_ty, ptr_mcv); switch (is_err_res) { .eflags => |cc| { @@ -14011,7 +14095,7 @@ fn isNonErrPtr(self: *Self, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue } } -fn airIsNull(self: *Self, inst: Air.Inst.Index) !void { +fn airIsNull(self: *CodeGen, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; const operand = try self.resolveInst(un_op); const ty = self.typeOf(un_op); @@ -14019,7 +14103,7 @@ fn airIsNull(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ un_op, .none, .none }); } -fn airIsNullPtr(self: *Self, inst: Air.Inst.Index) !void { +fn airIsNullPtr(self: *CodeGen, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; const operand = try self.resolveInst(un_op); const ty = self.typeOf(un_op); @@ -14027,7 +14111,7 @@ fn airIsNullPtr(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ un_op, .none, .none }); } -fn airIsNonNull(self: *Self, inst: Air.Inst.Index) !void { +fn airIsNonNull(self: *CodeGen, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; const operand = try self.resolveInst(un_op); const ty = self.typeOf(un_op); @@ -14039,7 +14123,7 @@ fn airIsNonNull(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ un_op, .none, .none }); } -fn airIsNonNullPtr(self: *Self, inst: Air.Inst.Index) !void { +fn airIsNonNullPtr(self: *CodeGen, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; const operand = try self.resolveInst(un_op); const ty = self.typeOf(un_op); @@ -14050,7 +14134,7 @@ fn airIsNonNullPtr(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ un_op, .none, .none }); } -fn airIsErr(self: *Self, inst: Air.Inst.Index) !void { +fn airIsErr(self: *CodeGen, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; const operand = try self.resolveInst(un_op); const ty = self.typeOf(un_op); @@ -14058,7 +14142,7 @@ fn airIsErr(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ un_op, .none, .none }); } -fn airIsErrPtr(self: *Self, inst: Air.Inst.Index) !void { +fn airIsErrPtr(self: *CodeGen, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; const operand = try self.resolveInst(un_op); const ty = self.typeOf(un_op); @@ -14066,7 +14150,7 @@ fn airIsErrPtr(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ un_op, .none, .none }); } -fn airIsNonErr(self: *Self, inst: Air.Inst.Index) !void { +fn airIsNonErr(self: *CodeGen, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; const operand = try self.resolveInst(un_op); const ty = self.typeOf(un_op); @@ -14074,7 +14158,7 @@ fn airIsNonErr(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ un_op, .none, .none }); } -fn airIsNonErrPtr(self: *Self, inst: Air.Inst.Index) !void { +fn airIsNonErrPtr(self: *CodeGen, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; const operand = try self.resolveInst(un_op); const ty = self.typeOf(un_op); @@ -14082,7 +14166,7 @@ fn airIsNonErrPtr(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ un_op, .none, .none }); } -fn airLoop(self: *Self, inst: Air.Inst.Index) !void { +fn airLoop(self: *CodeGen, inst: Air.Inst.Index) !void { // A loop is a setup to be able to jump back to the beginning. const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; const loop = self.air.extraData(Air.Block, ty_pl.payload); @@ -14100,7 +14184,7 @@ fn airLoop(self: *Self, inst: Air.Inst.Index) !void { try self.genBodyBlock(body); } -fn airBlock(self: *Self, inst: Air.Inst.Index) !void { +fn airBlock(self: *CodeGen, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; const extra = self.air.extraData(Air.Block, ty_pl.payload); try self.asmPseudo(.pseudo_dbg_enter_block_none); @@ -14108,7 +14192,7 @@ fn airBlock(self: *Self, inst: Air.Inst.Index) !void { try self.asmPseudo(.pseudo_dbg_leave_block_none); } -fn lowerBlock(self: *Self, inst: Air.Inst.Index, body: []const Air.Inst.Index) !void { +fn lowerBlock(self: *CodeGen, inst: Air.Inst.Index, body: []const Air.Inst.Index) !void { // A block is a setup to be able to jump to the end. const inst_tracking_i = self.inst_tracking.count(); self.inst_tracking.putAssumeCapacityNoClobber(inst, .init(.unreach)); @@ -14137,7 +14221,7 @@ fn lowerBlock(self: *Self, inst: Air.Inst.Index, body: []const Air.Inst.Index) ! self.getValueIfFree(tracking.short, inst); } -fn lowerSwitchBr(self: *Self, inst: Air.Inst.Index, switch_br: Air.UnwrappedSwitch, condition: MCValue) !void { +fn lowerSwitchBr(self: *CodeGen, inst: Air.Inst.Index, switch_br: Air.UnwrappedSwitch, condition: MCValue) !void { const zcu = self.pt.zcu; const condition_ty = self.typeOf(switch_br.operand); const liveness = try self.liveness.getSwitchBr(self.gpa, inst, switch_br.cases_len + 1); @@ -14255,7 +14339,7 @@ fn lowerSwitchBr(self: *Self, inst: Air.Inst.Index, switch_br: Air.UnwrappedSwit } } -fn airSwitchBr(self: *Self, inst: Air.Inst.Index) !void { +fn airSwitchBr(self: *CodeGen, inst: Air.Inst.Index) !void { const switch_br = self.air.unwrapSwitch(inst); const condition = try self.resolveInst(switch_br.operand); @@ -14271,7 +14355,7 @@ fn airSwitchBr(self: *Self, inst: Air.Inst.Index) !void { // We already took care of pl_op.operand earlier, so there's nothing left to do } -fn airLoopSwitchBr(self: *Self, inst: Air.Inst.Index) !void { +fn airLoopSwitchBr(self: *CodeGen, inst: Air.Inst.Index) !void { const switch_br = self.air.unwrapSwitch(inst); const condition = try self.resolveInst(switch_br.operand); @@ -14309,7 +14393,7 @@ fn airLoopSwitchBr(self: *Self, inst: Air.Inst.Index) !void { try self.processDeath(inst); } -fn airSwitchDispatch(self: *Self, inst: Air.Inst.Index) !void { +fn airSwitchDispatch(self: *CodeGen, inst: Air.Inst.Index) !void { const br = self.air.instructions.items(.data)[@intFromEnum(inst)].br; const block_ty = self.typeOfIndex(br.block_inst); @@ -14354,7 +14438,7 @@ fn airSwitchDispatch(self: *Self, inst: Air.Inst.Index) !void { try self.freeValue(block_tracking.short); } -fn performReloc(self: *Self, reloc: Mir.Inst.Index) void { +fn performReloc(self: *CodeGen, reloc: Mir.Inst.Index) void { const next_inst: u32 = @intCast(self.mir_instructions.len); switch (self.mir_instructions.items(.tag)[reloc]) { .j, .jmp => {}, @@ -14367,7 +14451,7 @@ fn performReloc(self: *Self, reloc: Mir.Inst.Index) void { self.mir_instructions.items(.data)[reloc].inst.inst = next_inst; } -fn airBr(self: *Self, inst: Air.Inst.Index) !void { +fn airBr(self: *CodeGen, inst: Air.Inst.Index) !void { const zcu = self.pt.zcu; const br = self.air.instructions.items(.data)[@intFromEnum(inst)].br; @@ -14426,7 +14510,7 @@ fn airBr(self: *Self, inst: Air.Inst.Index) !void { try self.freeValue(block_tracking.short); } -fn airRepeat(self: *Self, inst: Air.Inst.Index) !void { +fn airRepeat(self: *CodeGen, inst: Air.Inst.Index) !void { const loop_inst = self.air.instructions.items(.data)[@intFromEnum(inst)].repeat.loop_inst; const repeat_info = self.loops.get(loop_inst).?; try self.restoreState(repeat_info.state, &.{}, .{ @@ -14438,7 +14522,7 @@ fn airRepeat(self: *Self, inst: Air.Inst.Index) !void { _ = try self.asmJmpReloc(repeat_info.target); } -fn airAsm(self: *Self, inst: Air.Inst.Index) !void { +fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; @@ -15110,7 +15194,7 @@ const MoveStrategy = union(enum) { extract: Mir.Inst.FixedTag, }; - pub fn read(strat: MoveStrategy, self: *Self, dst_reg: Register, src_mem: Memory) !void { + pub fn read(strat: MoveStrategy, self: *CodeGen, dst_reg: Register, src_mem: Memory) !void { switch (strat) { .move => |tag| try self.asmRegisterMemory(tag, switch (tag[1]) { else => dst_reg, @@ -15136,7 +15220,7 @@ const MoveStrategy = union(enum) { ), } } - pub fn write(strat: MoveStrategy, self: *Self, dst_mem: Memory, src_reg: Register) !void { + pub fn write(strat: MoveStrategy, self: *CodeGen, dst_mem: Memory, src_reg: Register) !void { switch (strat) { .move => |tag| try self.asmMemoryRegister(tag, dst_mem, src_reg), .x87_load_store => { @@ -15152,7 +15236,7 @@ const MoveStrategy = union(enum) { } } }; -fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !MoveStrategy { +fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool) !MoveStrategy { const pt = self.pt; const zcu = pt.zcu; switch (class) { @@ -15383,7 +15467,7 @@ const CopyOptions = struct { safety: bool = false, }; -fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: CopyOptions) InnerError!void { +fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: CopyOptions) InnerError!void { const pt = self.pt; const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; @@ -15496,7 +15580,7 @@ fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: Copy } fn genSetReg( - self: *Self, + self: *CodeGen, dst_reg: Register, ty: Type, src_mcv: MCValue, @@ -15781,7 +15865,7 @@ fn genSetReg( } fn genSetMem( - self: *Self, + self: *CodeGen, base: Memory.Base, disp: i32, ty: Type, @@ -16012,7 +16096,7 @@ fn genSetMem( } } -fn genInlineMemcpy(self: *Self, dst_ptr: MCValue, src_ptr: MCValue, len: MCValue) InnerError!void { +fn genInlineMemcpy(self: *CodeGen, dst_ptr: MCValue, src_ptr: MCValue, len: MCValue) InnerError!void { try self.spillRegisters(&.{ .rsi, .rdi, .rcx }); try self.genSetReg(.rsi, Type.usize, src_ptr, .{}); try self.genSetReg(.rdi, Type.usize, dst_ptr, .{}); @@ -16021,7 +16105,7 @@ fn genInlineMemcpy(self: *Self, dst_ptr: MCValue, src_ptr: MCValue, len: MCValue } fn genInlineMemset( - self: *Self, + self: *CodeGen, dst_ptr: MCValue, value: MCValue, len: MCValue, @@ -16035,7 +16119,7 @@ fn genInlineMemset( } fn genExternSymbolRef( - self: *Self, + self: *CodeGen, comptime tag: Mir.Inst.Tag, lib: ?[]const u8, callee: []const u8, @@ -16061,7 +16145,7 @@ fn genExternSymbolRef( } fn genLazySymbolRef( - self: *Self, + self: *CodeGen, comptime tag: Mir.Inst.Tag, reg: Register, lazy_sym: link.File.LazySymbol, @@ -16159,7 +16243,7 @@ fn genLazySymbolRef( } } -fn airIntFromPtr(self: *Self, inst: Air.Inst.Index) !void { +fn airIntFromPtr(self: *CodeGen, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; const result = result: { // TODO: handle case where the operand is a slice not a raw pointer @@ -16174,7 +16258,7 @@ fn airIntFromPtr(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ un_op, .none, .none }); } -fn airBitCast(self: *Self, inst: Air.Inst.Index) !void { +fn airBitCast(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -16239,7 +16323,7 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn airArrayToSlice(self: *Self, inst: Air.Inst.Index) !void { +fn airArrayToSlice(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -16264,7 +16348,7 @@ fn airArrayToSlice(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn airFloatFromInt(self: *Self, inst: Air.Inst.Index) !void { +fn airFloatFromInt(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -16344,7 +16428,7 @@ fn airFloatFromInt(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn airIntFromFloat(self: *Self, inst: Air.Inst.Index) !void { +fn airIntFromFloat(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -16416,7 +16500,7 @@ fn airIntFromFloat(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void { +fn airCmpxchg(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; const extra = self.air.extraData(Air.Cmpxchg, ty_pl.payload).data; @@ -16463,7 +16547,7 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void { const ptr_mcv = try self.resolveInst(extra.ptr); const mem_size: Memory.Size = .fromSize(val_abi_size); const ptr_mem: Memory = switch (ptr_mcv) { - .immediate, .register, .register_offset, .lea_frame => try ptr_mcv.deref().mem(self, mem_size), + .immediate, .register, .register_offset, .lea_frame => try ptr_mcv.deref().mem(self, .{ .size = mem_size }), else => .{ .base = .{ .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv) }, .mod = .{ .rm = .{ .size = mem_size } }, @@ -16504,7 +16588,7 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void { } fn atomicOp( - self: *Self, + self: *CodeGen, ptr_mcv: MCValue, val_mcv: MCValue, ptr_ty: Type, @@ -16530,7 +16614,7 @@ fn atomicOp( const val_abi_size: u32 = @intCast(val_ty.abiSize(zcu)); const mem_size: Memory.Size = .fromSize(val_abi_size); const ptr_mem: Memory = switch (ptr_mcv) { - .immediate, .register, .register_offset, .lea_frame => try ptr_mcv.deref().mem(self, mem_size), + .immediate, .register, .register_offset, .lea_frame => try ptr_mcv.deref().mem(self, .{ .size = mem_size }), else => .{ .base = .{ .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv) }, .mod = .{ .rm = .{ .size = mem_size } }, @@ -16649,7 +16733,7 @@ fn atomicOp( mir_tag, sse_reg.to128(), sse_reg.to128(), - try val_mcv.mem(self, self.memSize(val_ty)), + try val_mcv.mem(self, .{ .size = self.memSize(val_ty) }), ) else try self.asmRegisterRegisterRegister( mir_tag, sse_reg.to128(), @@ -16662,7 +16746,7 @@ fn atomicOp( ._ss, ._sd => if (val_mcv.isMemory()) try self.asmRegisterMemory( mir_tag, sse_reg.to128(), - try val_mcv.mem(self, self.memSize(val_ty)), + try val_mcv.mem(self, .{ .size = self.memSize(val_ty) }), ) else try self.asmRegisterRegister( mir_tag, sse_reg.to128(), @@ -16717,7 +16801,7 @@ fn atomicOp( try self.asmCmovccRegisterMemory( cc, registerAlias(tmp_reg, cmov_abi_size), - try val_mcv.mem(self, .fromSize(cmov_abi_size)), + try val_mcv.mem(self, .{ .size = .fromSize(cmov_abi_size) }), ); }, else => { @@ -16773,8 +16857,8 @@ fn atomicOp( .reg = try self.copyToTmpRegister(Type.usize, val_mcv.address()), } }, }; - const val_lo_mem = try val_mem_mcv.mem(self, .qword); - const val_hi_mem = try val_mem_mcv.address().offset(8).deref().mem(self, .qword); + const val_lo_mem = try val_mem_mcv.mem(self, .{ .size = .qword }); + const val_hi_mem = try val_mem_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }); if (rmw_op != std.builtin.AtomicRmwOp.Xchg) { try self.asmRegisterRegister(.{ ._, .mov }, .rbx, .rax); try self.asmRegisterRegister(.{ ._, .mov }, .rcx, .rdx); @@ -16862,7 +16946,7 @@ fn atomicOp( } } -fn airAtomicRmw(self: *Self, inst: Air.Inst.Index) !void { +fn airAtomicRmw(self: *CodeGen, inst: Air.Inst.Index) !void { const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; const extra = self.air.extraData(Air.AtomicRmw, pl_op.payload).data; @@ -16883,7 +16967,7 @@ fn airAtomicRmw(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ pl_op.operand, extra.operand, .none }); } -fn airAtomicLoad(self: *Self, inst: Air.Inst.Index) !void { +fn airAtomicLoad(self: *CodeGen, inst: Air.Inst.Index) !void { const atomic_load = self.air.instructions.items(.data)[@intFromEnum(inst)].atomic_load; const ptr_ty = self.typeOf(atomic_load.ptr); @@ -16904,7 +16988,7 @@ fn airAtomicLoad(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, dst_mcv, .{ atomic_load.ptr, .none, .none }); } -fn airAtomicStore(self: *Self, inst: Air.Inst.Index, order: std.builtin.AtomicOrder) !void { +fn airAtomicStore(self: *CodeGen, inst: Air.Inst.Index, order: std.builtin.AtomicOrder) !void { const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; const ptr_ty = self.typeOf(bin_op.lhs); @@ -16917,7 +17001,7 @@ fn airAtomicStore(self: *Self, inst: Air.Inst.Index, order: std.builtin.AtomicOr return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } -fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void { +fn airMemset(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void { const pt = self.pt; const zcu = pt.zcu; const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; @@ -17059,7 +17143,7 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void { return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none }); } -fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void { +fn airMemcpy(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; @@ -17107,7 +17191,7 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void { else => try self.asmRegisterMemoryImmediate( .{ .i_, .mul }, len_reg, - try dst.address().offset(8).deref().mem(self, .qword), + try dst.address().offset(8).deref().mem(self, .{ .size = .qword }), .s(@intCast(dst_ty.childType(zcu).abiSize(zcu))), ), } @@ -17139,7 +17223,7 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none }); } -fn airTagName(self: *Self, inst: Air.Inst.Index) !void { +fn airTagName(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; @@ -17179,7 +17263,7 @@ fn airTagName(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); } -fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { +fn airErrorName(self: *CodeGen, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; const err_ty = self.typeOf(un_op); @@ -17281,7 +17365,7 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); } -fn airSplat(self: *Self, inst: Air.Inst.Index) !void { +fn airSplat(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -17318,7 +17402,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { else => cc: { try self.asmMemoryImmediate( .{ ._, .@"test" }, - try src_mcv.mem(self, .byte), + try src_mcv.mem(self, .{ .size = .byte }), .u(1), ); break :cc .nz; @@ -17362,7 +17446,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { if (src_mcv.isMemory()) try self.asmRegisterMemory( mir_tag, registerAlias(dst_reg, @intCast(vector_ty.abiSize(zcu))), - try src_mcv.mem(self, self.memSize(scalar_ty)), + try src_mcv.mem(self, .{ .size = self.memSize(scalar_ty) }), ) else { if (mir_tag[0] == .v_i128) break :avx2; try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{}); @@ -17438,7 +17522,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { if (src_mcv.isMemory()) try self.asmRegisterMemory( .{ .v_ss, .broadcast }, dst_reg.to128(), - try src_mcv.mem(self, .dword), + try src_mcv.mem(self, .{ .size = .dword }), ) else { const src_reg = if (src_mcv.isRegister()) src_mcv.getReg().? @@ -17475,7 +17559,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { if (src_mcv.isMemory()) try self.asmRegisterMemory( .{ .v_ss, .broadcast }, dst_reg.to256(), - try src_mcv.mem(self, .dword), + try src_mcv.mem(self, .{ .size = .dword }), ) else { const src_reg = if (src_mcv.isRegister()) src_mcv.getReg().? @@ -17521,7 +17605,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { if (src_mcv.isMemory()) try self.asmRegisterMemory( if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup }, dst_reg.to128(), - try src_mcv.mem(self, .qword), + try src_mcv.mem(self, .{ .size = .qword }), ) else try self.asmRegisterRegister( if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup }, dst_reg.to128(), @@ -17546,7 +17630,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { if (src_mcv.isMemory()) try self.asmRegisterMemory( .{ .v_sd, .broadcast }, dst_reg.to256(), - try src_mcv.mem(self, .qword), + try src_mcv.mem(self, .{ .size = .qword }), ) else { const src_reg = if (src_mcv.isRegister()) src_mcv.getReg().? @@ -17589,7 +17673,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { if (src_mcv.isMemory()) try self.asmRegisterMemory( .{ .v_f128, .broadcast }, dst_reg.to256(), - try src_mcv.mem(self, .xword), + try src_mcv.mem(self, .{ .size = .xword }), ) else { const src_reg = if (src_mcv.isRegister()) src_mcv.getReg().? @@ -17616,7 +17700,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn airSelect(self: *Self, inst: Air.Inst.Index) !void { +fn airSelect(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; @@ -17695,7 +17779,7 @@ fn airSelect(self: *Self, inst: Air.Inst.Index) !void { else => unreachable, }, .broadcast }, mask_alias, - if (pred_mcv.isMemory()) try pred_mcv.mem(self, .byte) else .{ + if (pred_mcv.isMemory()) try pred_mcv.mem(self, .{ .size = .byte }) else .{ .base = .{ .reg = (try self.copyToTmpRegister( Type.usize, pred_mcv.address(), @@ -17893,7 +17977,7 @@ fn airSelect(self: *Self, inst: Air.Inst.Index) !void { mir_tag, dst_alias, rhs_alias, - try lhs_mcv.mem(self, self.memSize(ty)), + try lhs_mcv.mem(self, .{ .size = self.memSize(ty) }), mask_alias, ) else try self.asmRegisterRegisterRegisterRegister( mir_tag, @@ -17908,7 +17992,7 @@ fn airSelect(self: *Self, inst: Air.Inst.Index) !void { } else if (has_blend) if (lhs_mcv.isMemory()) try self.asmRegisterMemoryRegister( mir_tag, dst_alias, - try lhs_mcv.mem(self, self.memSize(ty)), + try lhs_mcv.mem(self, .{ .size = self.memSize(ty) }), mask_alias, ) else try self.asmRegisterRegisterRegister( mir_tag, @@ -17933,7 +18017,7 @@ fn airSelect(self: *Self, inst: Air.Inst.Index) !void { if (rhs_mcv.isMemory()) try self.asmRegisterMemory( .{ mir_fixes, .andn }, mask_alias, - try rhs_mcv.mem(self, .fromSize(abi_size)), + try rhs_mcv.mem(self, .{ .size = .fromSize(abi_size) }), ) else try self.asmRegisterRegister( .{ mir_fixes, .andn }, mask_alias, @@ -17949,7 +18033,7 @@ fn airSelect(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ pl_op.operand, extra.lhs, extra.rhs }); } -fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { +fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; @@ -18074,7 +18158,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { mir_tag, dst_alias, registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), - try rhs_mcv.mem(self, .fromSize(max_abi_size)), + try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), ) else try self.asmRegisterRegisterRegister( mir_tag, dst_alias, @@ -18086,7 +18170,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemory( mir_tag, dst_alias, - try rhs_mcv.mem(self, .fromSize(max_abi_size)), + try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), ) else try self.asmRegisterRegister( mir_tag, dst_alias, @@ -18135,7 +18219,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( .{ if (has_avx) .vp_d else .p_d, .shuf }, dst_alias, - try src_mcv.mem(self, .fromSize(max_abi_size)), + try src_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), .u(control), ) else try self.asmRegisterRegisterImmediate( .{ if (has_avx) .vp_d else .p_d, .shuf }, @@ -18192,7 +18276,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { .{ .v_ps, .shuf }, dst_alias, registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), - try rhs_mcv.mem(self, .fromSize(max_abi_size)), + try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), .u(control), ) else try self.asmRegisterRegisterRegisterImmediate( .{ .v_ps, .shuf }, @@ -18206,7 +18290,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate( .{ ._ps, .shuf }, dst_alias, - try rhs_mcv.mem(self, .fromSize(max_abi_size)), + try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), .u(control), ) else try self.asmRegisterRegisterImmediate( .{ ._ps, .shuf }, @@ -18259,7 +18343,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { .{ .v_pd, .shuf }, dst_alias, registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), - try rhs_mcv.mem(self, .fromSize(max_abi_size)), + try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), .u(control), ) else try self.asmRegisterRegisterRegisterImmediate( .{ .v_pd, .shuf }, @@ -18273,7 +18357,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate( .{ ._pd, .shuf }, dst_alias, - try rhs_mcv.mem(self, .fromSize(max_abi_size)), + try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), .u(control), ) else try self.asmRegisterRegisterImmediate( .{ ._pd, .shuf }, @@ -18329,7 +18413,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { .{ .vp_d, .blend }, registerAlias(dst_reg, dst_abi_size), registerAlias(lhs_reg, dst_abi_size), - try rhs_mcv.mem(self, .fromSize(dst_abi_size)), + try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), .u(expanded_control), ) else try self.asmRegisterRegisterRegisterImmediate( .{ .vp_d, .blend }, @@ -18384,7 +18468,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { lhs_mcv.getReg().? else dst_reg, dst_abi_size), - try rhs_mcv.mem(self, .fromSize(dst_abi_size)), + try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), .u(expanded_control), ) else try self.asmRegisterRegisterRegisterImmediate( .{ .vp_w, .blend }, @@ -18401,7 +18485,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate( .{ .p_w, .blend }, registerAlias(dst_reg, dst_abi_size), - try rhs_mcv.mem(self, .fromSize(dst_abi_size)), + try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), .u(expanded_control), ) else try self.asmRegisterRegisterImmediate( .{ .p_w, .blend }, @@ -18445,7 +18529,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { lhs_mcv.getReg().? else dst_reg, dst_abi_size), - try rhs_mcv.mem(self, .fromSize(dst_abi_size)), + try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), .u(expanded_control), ) else try self.asmRegisterRegisterRegisterImmediate( switch (elem_abi_size) { @@ -18470,7 +18554,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { else => unreachable, }, registerAlias(dst_reg, dst_abi_size), - try rhs_mcv.mem(self, .fromSize(dst_abi_size)), + try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), .u(expanded_control), ) else try self.asmRegisterRegisterImmediate( switch (elem_abi_size) { @@ -18560,7 +18644,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { registerAlias(lhs_mcv.getReg().?, dst_abi_size) else dst_alias, - try rhs_mcv.mem(self, .fromSize(dst_abi_size)), + try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), select_mask_alias, ) else try self.asmRegisterRegisterRegisterRegister( mir_tag, @@ -18577,7 +18661,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryRegister( mir_tag, dst_alias, - try rhs_mcv.mem(self, .fromSize(dst_abi_size)), + try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), select_mask_alias, ) else try self.asmRegisterRegisterRegister( mir_tag, @@ -18620,7 +18704,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { if (lhs_mcv.isMemory()) try self.asmRegisterMemory( .{ mir_fixes, .andn }, mask_alias, - try lhs_mcv.mem(self, .fromSize(dst_abi_size)), + try lhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), ) else try self.asmRegisterRegister( .{ mir_fixes, .andn }, mask_alias, @@ -18757,7 +18841,7 @@ fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ extra.a, extra.b, .none }); } -fn airReduce(self: *Self, inst: Air.Inst.Index) !void { +fn airReduce(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const reduce = self.air.instructions.items(.data)[@intFromEnum(inst)].reduce; @@ -18776,7 +18860,7 @@ fn airReduce(self: *Self, inst: Air.Inst.Index) !void { .Or => { if (operand_mcv.isMemory()) try self.asmMemoryImmediate( .{ ._, .@"test" }, - try operand_mcv.mem(self, .fromSize(abi_size)), + try operand_mcv.mem(self, .{ .size = .fromSize(abi_size) }), .u(mask), ) else { const operand_reg = registerAlias(if (operand_mcv.isRegister()) @@ -18815,7 +18899,7 @@ fn airReduce(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ reduce.operand, .none, .none }); } -fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { +fn airAggregateInit(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const result_ty = self.typeOfIndex(inst); @@ -19007,7 +19091,7 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { return self.finishAirResult(inst, result); } -fn airUnionInit(self: *Self, inst: Air.Inst.Index) !void { +fn airUnionInit(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ip = &zcu.intern_pool; @@ -19053,12 +19137,12 @@ fn airUnionInit(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ extra.init, .none, .none }); } -fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void { +fn airPrefetch(self: *CodeGen, inst: Air.Inst.Index) !void { const prefetch = self.air.instructions.items(.data)[@intFromEnum(inst)].prefetch; return self.finishAir(inst, .unreach, .{ prefetch.ptr, .none, .none }); } -fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { +fn airMulAdd(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; @@ -19219,14 +19303,14 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { mir_tag, mop1_reg, mop2_reg, - try mops[2].mem(self, .fromSize(abi_size)), + try mops[2].mem(self, .{ .size = .fromSize(abi_size) }), ); break :result mops[0]; }; return self.finishAir(inst, result, ops); } -fn airVaStart(self: *Self, inst: Air.Inst.Index) !void { +fn airVaStart(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const va_list_ty = self.air.instructions.items(.data)[@intFromEnum(inst)].ty; @@ -19284,7 +19368,7 @@ fn airVaStart(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ .none, .none, .none }); } -fn airVaArg(self: *Self, inst: Air.Inst.Index) !void { +fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; @@ -19451,7 +19535,7 @@ fn airVaArg(self: *Self, inst: Air.Inst.Index) !void { .{ .v_ss, .cvtsd2 }, dst_reg, dst_reg, - try promote_mcv.mem(self, .qword), + try promote_mcv.mem(self, .{ .size = .qword }), ) else try self.asmRegisterRegisterRegister( .{ .v_ss, .cvtsd2 }, dst_reg, @@ -19463,7 +19547,7 @@ fn airVaArg(self: *Self, inst: Air.Inst.Index) !void { ) else if (promote_mcv.isMemory()) try self.asmRegisterMemory( .{ ._ss, .cvtsd2 }, dst_reg, - try promote_mcv.mem(self, .qword), + try promote_mcv.mem(self, .{ .size = .qword }), ) else try self.asmRegisterRegister( .{ ._ss, .cvtsd2 }, dst_reg, @@ -19480,7 +19564,7 @@ fn airVaArg(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn airVaCopy(self: *Self, inst: Air.Inst.Index) !void { +fn airVaCopy(self: *CodeGen, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; const ptr_va_list_ty = self.typeOf(ty_op.operand); @@ -19489,12 +19573,12 @@ fn airVaCopy(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } -fn airVaEnd(self: *Self, inst: Air.Inst.Index) !void { +fn airVaEnd(self: *CodeGen, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; return self.finishAir(inst, .unreach, .{ un_op, .none, .none }); } -fn resolveInst(self: *Self, ref: Air.Inst.Ref) InnerError!MCValue { +fn resolveInst(self: *CodeGen, ref: Air.Inst.Ref) InnerError!MCValue { const zcu = self.pt.zcu; const ty = self.typeOf(ref); @@ -19543,7 +19627,7 @@ fn resolveInst(self: *Self, ref: Air.Inst.Ref) InnerError!MCValue { } } -fn getResolvedInstValue(self: *Self, inst: Air.Inst.Index) *InstTracking { +fn getResolvedInstValue(self: *CodeGen, inst: Air.Inst.Index) *InstTracking { const tracking = self.inst_tracking.getPtr(inst).?; return switch (tracking.short) { .none, .unreach, .dead => unreachable, @@ -19556,7 +19640,7 @@ fn getResolvedInstValue(self: *Self, inst: Air.Inst.Index) *InstTracking { /// A potential opportunity for future optimization here would be keeping track /// of the fact that the instruction is available both as an immediate /// and as a register. -fn limitImmediateType(self: *Self, operand: Air.Inst.Ref, comptime T: type) !MCValue { +fn limitImmediateType(self: *CodeGen, operand: Air.Inst.Ref, comptime T: type) !MCValue { const mcv = try self.resolveInst(operand); const ti = @typeInfo(T).int; switch (mcv) { @@ -19572,7 +19656,7 @@ fn limitImmediateType(self: *Self, operand: Air.Inst.Ref, comptime T: type) !MCV return mcv; } -fn genTypedValue(self: *Self, val: Value) InnerError!MCValue { +fn genTypedValue(self: *CodeGen, val: Value) InnerError!MCValue { const pt = self.pt; return switch (try codegen.genTypedValue(self.bin_file, pt, self.src_loc, val, self.target.*)) { .mcv => |mcv| switch (mcv) { @@ -19599,7 +19683,7 @@ const CallMCValues = struct { gp_count: u32, fp_count: u32, - fn deinit(self: *CallMCValues, func: *Self) void { + fn deinit(self: *CallMCValues, func: *CodeGen) void { func.gpa.free(self.args); self.* = undefined; } @@ -19607,7 +19691,7 @@ const CallMCValues = struct { /// Caller must call `CallMCValues.deinit`. fn resolveCallingConventionValues( - self: *Self, + self: *CodeGen, fn_info: InternPool.Key.FuncType, var_args: []const Type, stack_frame_base: FrameIndex, @@ -19889,7 +19973,7 @@ fn resolveCallingConventionValues( return result; } -fn fail(self: *Self, comptime format: []const u8, args: anytype) error{ OutOfMemory, CodegenFail } { +fn fail(self: *CodeGen, comptime format: []const u8, args: anytype) error{ OutOfMemory, CodegenFail } { @branchHint(.cold); const zcu = self.pt.zcu; switch (self.owner) { @@ -19899,7 +19983,7 @@ fn fail(self: *Self, comptime format: []const u8, args: anytype) error{ OutOfMem return error.CodegenFail; } -fn failMsg(self: *Self, msg: *Zcu.ErrorMsg) error{ OutOfMemory, CodegenFail } { +fn failMsg(self: *CodeGen, msg: *Zcu.ErrorMsg) error{ OutOfMemory, CodegenFail } { @branchHint(.cold); const zcu = self.pt.zcu; switch (self.owner) { @@ -19960,7 +20044,7 @@ fn registerAlias(reg: Register, size_bytes: u32) Register { }; } -fn memSize(self: *Self, ty: Type) Memory.Size { +fn memSize(self: *CodeGen, ty: Type) Memory.Size { const zcu = self.pt.zcu; return switch (ty.zigTypeTag(zcu)) { .float => .fromBitSize(ty.floatBits(self.target.*)), @@ -19968,7 +20052,7 @@ fn memSize(self: *Self, ty: Type) Memory.Size { }; } -fn splitType(self: *Self, ty: Type) ![2]Type { +fn splitType(self: *CodeGen, ty: Type) ![2]Type { const pt = self.pt; const zcu = pt.zcu; const classes = std.mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target.*, .other), .none); @@ -19998,7 +20082,7 @@ fn splitType(self: *Self, ty: Type) ![2]Type { /// Truncates the value in the register in place. /// Clobbers any remaining bits. -fn truncateRegister(self: *Self, ty: Type, reg: Register) !void { +fn truncateRegister(self: *CodeGen, ty: Type, reg: Register) !void { const pt = self.pt; const zcu = pt.zcu; const int_info = if (ty.isAbiInt(zcu)) ty.intInfo(zcu) else std.builtin.Type.Int{ @@ -20046,7 +20130,7 @@ fn truncateRegister(self: *Self, ty: Type, reg: Register) !void { } } -fn regBitSize(self: *Self, ty: Type) u64 { +fn regBitSize(self: *CodeGen, ty: Type) u64 { const zcu = self.pt.zcu; const abi_size = ty.abiSize(zcu); return switch (ty.zigTypeTag(zcu)) { @@ -20065,27 +20149,27 @@ fn regBitSize(self: *Self, ty: Type) u64 { }; } -fn regExtraBits(self: *Self, ty: Type) u64 { +fn regExtraBits(self: *CodeGen, ty: Type) u64 { return self.regBitSize(ty) - ty.bitSize(self.pt.zcu); } -fn hasFeature(self: *Self, feature: std.Target.x86.Feature) bool { +fn hasFeature(self: *CodeGen, feature: std.Target.x86.Feature) bool { return std.Target.x86.featureSetHas(self.target.cpu.features, feature); } -fn hasAnyFeatures(self: *Self, features: anytype) bool { +fn hasAnyFeatures(self: *CodeGen, features: anytype) bool { return std.Target.x86.featureSetHasAny(self.target.cpu.features, features); } -fn hasAllFeatures(self: *Self, features: anytype) bool { +fn hasAllFeatures(self: *CodeGen, features: anytype) bool { return std.Target.x86.featureSetHasAll(self.target.cpu.features, features); } -fn typeOf(self: *Self, inst: Air.Inst.Ref) Type { +fn typeOf(self: *CodeGen, inst: Air.Inst.Ref) Type { const pt = self.pt; const zcu = pt.zcu; return self.air.typeOf(inst, &zcu.intern_pool); } -fn typeOfIndex(self: *Self, inst: Air.Inst.Index) Type { +fn typeOfIndex(self: *CodeGen, inst: Air.Inst.Index) Type { const pt = self.pt; const zcu = pt.zcu; const temp: Temp = .{ .index = inst }; @@ -20118,7 +20202,7 @@ fn floatCompilerRtAbiName(float_bits: u32) u8 { }; } -fn floatCompilerRtAbiType(self: *Self, ty: Type, other_ty: Type) Type { +fn floatCompilerRtAbiType(self: *CodeGen, ty: Type, other_ty: Type) Type { if (ty.toIntern() == .f16_type and (other_ty.toIntern() == .f32_type or other_ty.toIntern() == .f64_type) and self.target.isDarwin()) return Type.u16; @@ -20145,7 +20229,7 @@ fn floatLibcAbiSuffix(ty: Type) []const u8 { }; } -fn promoteInt(self: *Self, ty: Type) Type { +fn promoteInt(self: *CodeGen, ty: Type) Type { const pt = self.pt; const zcu = pt.zcu; const int_info: InternPool.Key.IntType = switch (ty.toIntern()) { @@ -20165,7 +20249,7 @@ fn promoteInt(self: *Self, ty: Type) Type { return ty; } -fn promoteVarArg(self: *Self, ty: Type) Type { +fn promoteVarArg(self: *CodeGen, ty: Type) Type { if (!ty.isRuntimeFloat()) return self.promoteInt(ty); switch (ty.floatBits(self.target.*)) { 32, 64 => return Type.f64, @@ -20181,7 +20265,7 @@ fn promoteVarArg(self: *Self, ty: Type) Type { const Temp = struct { index: Air.Inst.Index, - fn unwrap(temp: Temp, self: *Self) union(enum) { + fn unwrap(temp: Temp, cg: *CodeGen) union(enum) { ref: Air.Inst.Ref, temp: Index, } { @@ -20189,38 +20273,69 @@ const Temp = struct { .ref => |ref| return .{ .ref = ref }, .target => |target_index| { const temp_index: Index = @enumFromInt(target_index); - assert(temp_index.isValid(self)); + assert(temp_index.isValid(cg)); return .{ .temp = temp_index }; }, } } - fn typeOf(temp: Temp, self: *Self) Type { - return switch (temp.unwrap(self)) { - .ref => |ref| self.typeOf(ref), - .temp => |temp_index| temp_index.typeOf(self), + fn typeOf(temp: Temp, cg: *CodeGen) Type { + return switch (temp.unwrap(cg)) { + .ref => |ref| cg.typeOf(ref), + .temp => |temp_index| temp_index.typeOf(cg), }; } - fn isMut(temp: Temp, self: *Self) bool { - return temp.unwrap(self) == .temp; + fn isMut(temp: Temp, cg: *CodeGen) bool { + return switch (temp.unwrap(cg)) { + .ref => false, + .temp => |temp_index| switch (temp_index.tracking(cg).short) { + .none, + .unreach, + .dead, + .undef, + .immediate, + .eflags, + .register_offset, + .memory, + .load_symbol, + .lea_symbol, + .indirect, + .load_direct, + .lea_direct, + .load_got, + .lea_got, + .load_tlv, + .lea_tlv, + .lea_frame, + .elementwise_regs_then_frame, + .reserved_frame, + .air_ref, + => false, + .register, + .register_pair, + .register_overflow, + => true, + .load_frame => |frame_addr| !frame_addr.index.isNamed(), + }, + }; } - fn tracking(temp: Temp, self: *Self) InstTracking { - return self.inst_tracking.get(temp.index).?; + fn tracking(temp: Temp, cg: *CodeGen) InstTracking { + return cg.inst_tracking.get(temp.index).?; } - fn getOffset(temp: Temp, off: i32, self: *Self) !Temp { - const new_temp_index = self.next_temp_index; - self.temp_type[@intFromEnum(new_temp_index)] = Type.usize; - self.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); - switch (temp.tracking(self).short) { + fn getOffset(temp: Temp, off: i32, cg: *CodeGen) !Temp { + const new_temp_index = cg.next_temp_index; + cg.temp_type[@intFromEnum(new_temp_index)] = Type.usize; + cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); + switch (temp.tracking(cg).short) { else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }), .register => |reg| { const new_reg = - try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); - new_temp_index.tracking(self).* = .init(.{ .register = new_reg }); - try self.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{ + try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); + new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); + try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{ .base = .{ .reg = reg.to64() }, .mod = .{ .rm = .{ .size = .qword, @@ -20230,9 +20345,9 @@ const Temp = struct { }, .register_offset => |reg_off| { const new_reg = - try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); - new_temp_index.tracking(self).* = .init(.{ .register = new_reg }); - try self.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{ + try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); + new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); + try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{ .base = .{ .reg = reg_off.reg.to64() }, .mod = .{ .rm = .{ .size = .qword, @@ -20240,18 +20355,18 @@ const Temp = struct { } }, }); }, - .lea_symbol => |sym_off| new_temp_index.tracking(self).* = .init(.{ .lea_symbol = .{ + .lea_symbol => |sym_off| new_temp_index.tracking(cg).* = .init(.{ .lea_symbol = .{ .sym_index = sym_off.sym_index, .off = sym_off.off + off, } }), .load_frame => |frame_addr| { const new_reg = - try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); - new_temp_index.tracking(self).* = .init(.{ .register_offset = .{ + try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); + new_temp_index.tracking(cg).* = .init(.{ .register_offset = .{ .reg = new_reg, .off = off, } }); - try self.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{ + try cg.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{ .base = .{ .frame = frame_addr.index }, .mod = .{ .rm = .{ .size = .qword, @@ -20259,7 +20374,7 @@ const Temp = struct { } }, }); }, - .lea_frame => |frame_addr| new_temp_index.tracking(self).* = .init(.{ .lea_frame = .{ + .lea_frame => |frame_addr| new_temp_index.tracking(cg).* = .init(.{ .lea_frame = .{ .index = frame_addr.index, .off = frame_addr.off + off, } }), @@ -20267,16 +20382,16 @@ const Temp = struct { return .{ .index = new_temp_index.toIndex() }; } - fn toOffset(temp: *Temp, off: i32, self: *Self) !void { + fn toOffset(temp: *Temp, off: i32, cg: *CodeGen) !void { if (off == 0) return; - switch (temp.unwrap(self)) { + switch (temp.unwrap(cg)) { .ref => {}, .temp => |temp_index| { - const temp_tracking = temp_index.tracking(self); + const temp_tracking = temp_index.tracking(cg); switch (temp_tracking.short) { else => {}, .register => |reg| { - try self.freeValue(temp_tracking.long); + try cg.freeValue(temp_tracking.long); temp_tracking.* = .init(.{ .register_offset = .{ .reg = reg, .off = off, @@ -20284,7 +20399,7 @@ const Temp = struct { return; }, .register_offset => |reg_off| { - try self.freeValue(temp_tracking.long); + try cg.freeValue(temp_tracking.long); temp_tracking.* = .init(.{ .register_offset = .{ .reg = reg_off.reg, .off = reg_off.off + off, @@ -20310,39 +20425,39 @@ const Temp = struct { } }, } - const new_temp = try temp.getOffset(off, self); - try temp.die(self); + const new_temp = try temp.getOffset(off, cg); + try temp.die(cg); temp.* = new_temp; } - fn getLimb(temp: Temp, limb_index: u28, self: *Self) !Temp { - const new_temp_index = self.next_temp_index; - self.temp_type[@intFromEnum(new_temp_index)] = Type.usize; - switch (temp.tracking(self).short) { + fn getLimb(temp: Temp, limb_index: u28, cg: *CodeGen) !Temp { + const new_temp_index = cg.next_temp_index; + cg.temp_type[@intFromEnum(new_temp_index)] = Type.usize; + switch (temp.tracking(cg).short) { else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }), .immediate => |imm| { assert(limb_index == 0); - new_temp_index.tracking(self).* = .init(.{ .immediate = imm }); + new_temp_index.tracking(cg).* = .init(.{ .immediate = imm }); }, .register => |reg| { assert(limb_index == 0); const new_reg = - try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); - new_temp_index.tracking(self).* = .init(.{ .register = new_reg }); - try self.asmRegisterRegister(.{ ._, .mov }, new_reg.to64(), reg.to64()); + try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); + new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); + try cg.asmRegisterRegister(.{ ._, .mov }, new_reg.to64(), reg.to64()); }, .register_pair => |regs| { const new_reg = - try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); - new_temp_index.tracking(self).* = .init(.{ .register = new_reg }); - try self.asmRegisterRegister(.{ ._, .mov }, new_reg.to64(), regs[limb_index].to64()); + try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); + new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); + try cg.asmRegisterRegister(.{ ._, .mov }, new_reg.to64(), regs[limb_index].to64()); }, .register_offset => |reg_off| { assert(limb_index == 0); const new_reg = - try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); - new_temp_index.tracking(self).* = .init(.{ .register = new_reg }); - try self.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{ + try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); + new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); + try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{ .base = .{ .reg = reg_off.reg.to64() }, .mod = .{ .rm = .{ .size = .qword, @@ -20352,9 +20467,9 @@ const Temp = struct { }, .load_symbol => |sym_off| { const new_reg = - try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); - new_temp_index.tracking(self).* = .init(.{ .register = new_reg }); - try self.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{ + try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); + new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); + try cg.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{ .base = .{ .reloc = sym_off.sym_index }, .mod = .{ .rm = .{ .size = .qword, @@ -20364,13 +20479,13 @@ const Temp = struct { }, .lea_symbol => |sym_off| { assert(limb_index == 0); - new_temp_index.tracking(self).* = .init(.{ .lea_symbol = sym_off }); + new_temp_index.tracking(cg).* = .init(.{ .lea_symbol = sym_off }); }, .load_frame => |frame_addr| { const new_reg = - try self.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); - new_temp_index.tracking(self).* = .init(.{ .register = new_reg }); - try self.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{ + try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); + new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); + try cg.asmRegisterMemory(.{ ._, .mov }, new_reg.to64(), .{ .base = .{ .frame = frame_addr.index }, .mod = .{ .rm = .{ .size = .qword, @@ -20380,23 +20495,23 @@ const Temp = struct { }, .lea_frame => |frame_addr| { assert(limb_index == 0); - new_temp_index.tracking(self).* = .init(.{ .lea_frame = frame_addr }); + new_temp_index.tracking(cg).* = .init(.{ .lea_frame = frame_addr }); }, } - self.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); + cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); return .{ .index = new_temp_index.toIndex() }; } - fn toLimb(temp: *Temp, limb_index: u28, self: *Self) !void { - switch (temp.unwrap(self)) { + fn toLimb(temp: *Temp, limb_index: u28, cg: *CodeGen) !void { + switch (temp.unwrap(cg)) { .ref => {}, .temp => |temp_index| { - const temp_tracking = temp_index.tracking(self); + const temp_tracking = temp_index.tracking(cg); switch (temp_tracking.short) { else => {}, .register, .lea_symbol, .lea_frame => { assert(limb_index == 0); - self.temp_type[@intFromEnum(temp_index)] = Type.usize; + cg.temp_type[@intFromEnum(temp_index)] = Type.usize; return; }, .register_pair => |regs| { @@ -20406,9 +20521,9 @@ const Temp = struct { temp_tracking.long.address().offset(@as(u31, limb_index) * 8).deref(), } for (regs, 0..) |reg, reg_index| if (reg_index != limb_index) - self.register_manager.freeReg(reg); + cg.register_manager.freeReg(reg); temp_tracking.* = .init(.{ .register = regs[limb_index] }); - self.temp_type[@intFromEnum(temp_index)] = Type.usize; + cg.temp_type[@intFromEnum(temp_index)] = Type.usize; return; }, .load_symbol => |sym_off| { @@ -20417,7 +20532,7 @@ const Temp = struct { .sym_index = sym_off.sym_index, .off = sym_off.off + @as(u31, limb_index) * 8, } }); - self.temp_type[@intFromEnum(temp_index)] = Type.usize; + cg.temp_type[@intFromEnum(temp_index)] = Type.usize; return; }, .load_frame => |frame_addr| if (!frame_addr.index.isNamed()) { @@ -20426,102 +20541,102 @@ const Temp = struct { .index = frame_addr.index, .off = frame_addr.off + @as(u31, limb_index) * 8, } }); - self.temp_type[@intFromEnum(temp_index)] = Type.usize; + cg.temp_type[@intFromEnum(temp_index)] = Type.usize; return; }, } }, } - const new_temp = try temp.getLimb(limb_index, self); - try temp.die(self); + const new_temp = try temp.getLimb(limb_index, cg); + try temp.die(cg); temp.* = new_temp; } - fn toReg(temp: *Temp, new_reg: Register, self: *Self) !bool { - const val, const ty = switch (temp.unwrap(self)) { - .ref => |ref| .{ temp.tracking(self).short, self.typeOf(ref) }, - .temp => |temp_index| val: { - const temp_tracking = temp_index.tracking(self); + fn toReg(temp: *Temp, new_reg: Register, cg: *CodeGen) !bool { + const val, const ty = val_ty: switch (temp.unwrap(cg)) { + .ref => |ref| .{ temp.tracking(cg).short, cg.typeOf(ref) }, + .temp => |temp_index| { + const temp_tracking = temp_index.tracking(cg); if (temp_tracking.short == .register and temp_tracking.short.register == new_reg) return false; - break :val .{ temp_tracking.short, temp_index.typeOf(self) }; + break :val_ty .{ temp_tracking.short, temp_index.typeOf(cg) }; }, }; - const new_temp_index = self.next_temp_index; - self.temp_type[@intFromEnum(new_temp_index)] = ty; - try self.genSetReg(new_reg, ty, val, .{}); - new_temp_index.tracking(self).* = .init(.{ .register = new_reg }); - try temp.die(self); - self.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); + const new_temp_index = cg.next_temp_index; + cg.temp_type[@intFromEnum(new_temp_index)] = ty; + try cg.genSetReg(new_reg, ty, val, .{}); + new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); + try temp.die(cg); + cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); temp.* = .{ .index = new_temp_index.toIndex() }; return true; } - fn toAnyReg(temp: *Temp, self: *Self) !bool { - const val, const ty = switch (temp.unwrap(self)) { - .ref => |ref| .{ temp.tracking(self).short, self.typeOf(ref) }, + fn toAnyReg(temp: *Temp, cg: *CodeGen) !bool { + const val, const ty = switch (temp.unwrap(cg)) { + .ref => |ref| .{ temp.tracking(cg).short, cg.typeOf(ref) }, .temp => |temp_index| val: { - const temp_tracking = temp_index.tracking(self); + const temp_tracking = temp_index.tracking(cg); if (temp_tracking.short == .register) return false; - break :val .{ temp_tracking.short, temp_index.typeOf(self) }; + break :val .{ temp_tracking.short, temp_index.typeOf(cg) }; }, }; - const new_temp_index = self.next_temp_index; - self.temp_type[@intFromEnum(new_temp_index)] = ty; + const new_temp_index = cg.next_temp_index; + cg.temp_type[@intFromEnum(new_temp_index)] = ty; const new_reg = - try self.register_manager.allocReg(new_temp_index.toIndex(), self.regSetForType(ty)); - try self.genSetReg(new_reg, ty, val, .{}); - new_temp_index.tracking(self).* = .init(.{ .register = new_reg }); - try temp.die(self); - self.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); + try cg.register_manager.allocReg(new_temp_index.toIndex(), cg.regSetForType(ty)); + try cg.genSetReg(new_reg, ty, val, .{}); + new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); + try temp.die(cg); + cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); temp.* = .{ .index = new_temp_index.toIndex() }; return true; } - fn toRegClass(temp: *Temp, rc: Register.Class, self: *Self) !bool { - const val, const ty = switch (temp.unwrap(self)) { - .ref => |ref| .{ temp.tracking(self).short, self.typeOf(ref) }, + fn toRegClass(temp: *Temp, rc: Register.Class, cg: *CodeGen) !bool { + const val, const ty = switch (temp.unwrap(cg)) { + .ref => |ref| .{ temp.tracking(cg).short, cg.typeOf(ref) }, .temp => |temp_index| val: { - const temp_tracking = temp_index.tracking(self); + const temp_tracking = temp_index.tracking(cg); switch (temp_tracking.short) { else => {}, .register => |reg| if (reg.class() == rc) return false, } - break :val .{ temp_tracking.short, temp_index.typeOf(self) }; + break :val .{ temp_tracking.short, temp_index.typeOf(cg) }; }, }; - const new_temp_index = self.next_temp_index; - self.temp_type[@intFromEnum(new_temp_index)] = ty; - const new_reg = try self.register_manager.allocReg(new_temp_index.toIndex(), regSetForRegClass(rc)); - try self.genSetReg(new_reg, ty, val, .{}); - new_temp_index.tracking(self).* = .init(.{ .register = new_reg }); - try temp.die(self); - self.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); + const new_temp_index = cg.next_temp_index; + cg.temp_type[@intFromEnum(new_temp_index)] = ty; + const new_reg = try cg.register_manager.allocReg(new_temp_index.toIndex(), regSetForRegClass(rc)); + try cg.genSetReg(new_reg, ty, val, .{}); + new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); + try temp.die(cg); + cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); temp.* = .{ .index = new_temp_index.toIndex() }; return true; } - fn toPair(first_temp: *Temp, second_temp: *Temp, self: *Self) !void { + fn toPair(first_temp: *Temp, second_temp: *Temp, cg: *CodeGen) !void { while (true) for ([_]*Temp{ first_temp, second_temp }) |part_temp| { - if (try part_temp.toAnyReg(self)) break; + if (try part_temp.toAnyReg(cg)) break; } else break; - const first_temp_tracking = first_temp.unwrap(self).temp.tracking(self); - const second_temp_tracking = second_temp.unwrap(self).temp.tracking(self); + const first_temp_tracking = first_temp.unwrap(cg).temp.tracking(cg); + const second_temp_tracking = second_temp.unwrap(cg).temp.tracking(cg); const result: MCValue = .{ .register_pair = .{ first_temp_tracking.short.register, second_temp_tracking.short.register, } }; - const result_temp_index = self.next_temp_index; + const result_temp_index = cg.next_temp_index; const result_temp: Temp = .{ .index = result_temp_index.toIndex() }; - assert(self.reuseTemp(result_temp.index, first_temp.index, first_temp_tracking)); - assert(self.reuseTemp(result_temp.index, second_temp.index, second_temp_tracking)); - self.temp_type[@intFromEnum(result_temp_index)] = Type.slice_const_u8; - result_temp_index.tracking(self).* = .init(result); + assert(cg.reuseTemp(result_temp.index, first_temp.index, first_temp_tracking)); + assert(cg.reuseTemp(result_temp.index, second_temp.index, second_temp_tracking)); + cg.temp_type[@intFromEnum(result_temp_index)] = Type.slice_const_u8; + result_temp_index.tracking(cg).* = .init(result); first_temp.* = result_temp; } - fn toLea(temp: *Temp, self: *Self) !bool { - switch (temp.tracking(self).short) { + fn toLea(temp: *Temp, cg: *CodeGen) !bool { + switch (temp.tracking(cg).short) { .none, .unreach, .dead, @@ -20548,65 +20663,83 @@ const Temp = struct { .load_got, .load_tlv, .load_frame, - => return temp.toAnyReg(self), + => return temp.toAnyReg(cg), .lea_symbol => |sym_off| { const off = sym_off.off; if (off == 0) return false; - try temp.toOffset(-off, self); - while (try temp.toAnyReg(self)) {} - try temp.toOffset(off, self); + try temp.toOffset(-off, cg); + while (try temp.toAnyReg(cg)) {} + try temp.toOffset(off, cg); return true; }, } } - fn load(ptr: *Temp, val_ty: Type, self: *Self) !Temp { - const val_abi_size: u32 = @intCast(val_ty.abiSize(self.pt.zcu)); - const val = try self.tempAlloc(val_ty); - switch (val.tracking(self).short) { + fn toBase(temp: *Temp, cg: *CodeGen) !bool { + const temp_tracking = temp.tracking(cg); + switch (temp_tracking.short) { + else => {}, + .indirect, .load_frame => return false, + } + const new_temp_index = cg.next_temp_index; + cg.temp_type[@intFromEnum(new_temp_index)] = temp.typeOf(cg); + const new_reg = + try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); + try cg.genSetReg(new_reg, Type.usize, temp_tracking.short.address(), .{}); + new_temp_index.tracking(cg).* = .init(.{ .indirect = .{ .reg = new_reg } }); + try temp.die(cg); + cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); + temp.* = .{ .index = new_temp_index.toIndex() }; + return true; + } + + fn load(ptr: *Temp, val_ty: Type, cg: *CodeGen) !Temp { + const val_abi_size: u32 = @intCast(val_ty.abiSize(cg.pt.zcu)); + const val = try cg.tempAlloc(val_ty); + switch (val.tracking(cg).short) { else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }), .register => |val_reg| { - while (try ptr.toLea(self)) {} + while (try ptr.toLea(cg)) {} switch (val_reg.class()) { - .general_purpose => try self.asmRegisterMemory( + .general_purpose => try cg.asmRegisterMemory( .{ ._, .mov }, registerAlias(val_reg, val_abi_size), - try ptr.tracking(self).short.deref().mem(self, self.memSize(val_ty)), + try ptr.tracking(cg).short.deref().mem(cg, .{ .size = cg.memSize(val_ty) }), ), else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }), } }, .load_frame => |val_frame_addr| { - var val_ptr = try self.tempFromValue(Type.usize, .{ .lea_frame = val_frame_addr }); - var len = try self.tempFromValue(Type.usize, .{ .immediate = val_abi_size }); - try val_ptr.memcpy(ptr, &len, self); - try val_ptr.die(self); - try len.die(self); + var val_ptr = try cg.tempFromValue(Type.usize, .{ .lea_frame = val_frame_addr }); + var len = try cg.tempFromValue(Type.usize, .{ .immediate = val_abi_size }); + try val_ptr.memcpy(ptr, &len, cg); + try val_ptr.die(cg); + try len.die(cg); }, } return val; } - fn store(ptr: *Temp, val: *Temp, self: *Self) !void { - const val_ty = val.typeOf(self); - const val_abi_size: u32 = @intCast(val_ty.abiSize(self.pt.zcu)); - val: switch (val.tracking(self).short) { + fn store(ptr: *Temp, val: *Temp, cg: *CodeGen) !void { + const val_ty = val.typeOf(cg); + const val_abi_size: u32 = @intCast(val_ty.abiSize(cg.pt.zcu)); + val: switch (val.tracking(cg).short) { else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }), .immediate => |imm| if (std.math.cast(i32, imm)) |s| { - while (try ptr.toLea(self)) {} - try self.asmMemoryImmediate( + while (try ptr.toLea(cg)) {} + try cg.asmMemoryImmediate( .{ ._, .mov }, - try ptr.tracking(self).short.deref().mem(self, self.memSize(val_ty)), + try ptr.tracking(cg).short.deref().mem(cg, .{ .size = cg.memSize(val_ty) }), .s(s), ); } else continue :val .{ .register = undefined }, .register => { - while (try ptr.toLea(self) or try val.toAnyReg(self)) {} - const val_reg = val.tracking(self).short.register; + while (try ptr.toLea(cg) or try val.toAnyReg(cg)) {} + const val_reg = val.tracking(cg).short.register; switch (val_reg.class()) { - .general_purpose => try self.asmMemoryRegister( + .general_purpose => try cg.asmMemoryRegister( .{ ._, .mov }, - try ptr.tracking(self).short.deref().mem(self, self.memSize(val_ty)), + try ptr.tracking(cg).short.deref().mem(cg, .{ .size = cg.memSize(val_ty) }), registerAlias(val_reg, val_abi_size), ), else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }), @@ -20615,64 +20748,64 @@ const Temp = struct { } } - fn memcpy(dst: *Temp, src: *Temp, len: *Temp, self: *Self) !void { + fn memcpy(dst: *Temp, src: *Temp, len: *Temp, cg: *CodeGen) !void { while (true) for ([_]*Temp{ dst, src, len }, [_]Register{ .rdi, .rsi, .rcx }) |temp, reg| { - if (try temp.toReg(reg, self)) break; + if (try temp.toReg(reg, cg)) break; } else break; - try self.asmOpOnly(.{ .@"rep _sb", .mov }); + try cg.asmOpOnly(.{ .@"rep _sb", .mov }); } // i, m, r - fn add(lhs: *Temp, rhs: *Temp, self: *Self) !Temp { - const res_index = self.next_temp_index; + fn add(lhs: *Temp, rhs: *Temp, cg: *CodeGen) !Temp { + const res_index = cg.next_temp_index; var res: Temp = .{ .index = res_index.toIndex() }; - try self.select(&.{ &res, lhs, rhs }, .{ ._, .add }, &.{ + try cg.select(&.{ &res, lhs, rhs }, .{ ._, .add }, &.{ .{ .ops = &.{ .{ .match = 1 }, .r, .i } }, .{ .ops = &.{ .{ .match = 1 }, .m, .i } }, .{ .ops = &.{ .{ .match = 1 }, .r, .m } }, .{ .ops = &.{ .{ .match = 1 }, .m, .r } }, .{ .ops = &.{ .{ .match = 1 }, .r, .r } }, }); - self.next_temp_index = @enumFromInt(@intFromEnum(res_index) + 1); - self.temp_type[@intFromEnum(res_index)] = lhs.typeOf(self); + cg.next_temp_index = @enumFromInt(@intFromEnum(res_index) + 1); + cg.temp_type[@intFromEnum(res_index)] = lhs.typeOf(cg); return res; } - fn mul(lhs: *Temp, rhs: *Temp, self: *Self) !Temp { - const res_index = self.next_temp_index; - var res: Temp = .{ .index = self.next_temp_index.toIndex() }; - try self.select(&.{ &res, lhs, rhs }, .{ .i_, .mul }, &.{ + fn mul(lhs: *Temp, rhs: *Temp, cg: *CodeGen) !Temp { + const res_index = cg.next_temp_index; + var res: Temp = .{ .index = cg.next_temp_index.toIndex() }; + try cg.select(&.{ &res, lhs, rhs }, .{ .i_, .mul }, &.{ .{ .ops = &.{ .r, .m, .i } }, .{ .ops = &.{ .r, .r, .i } }, .{ .ops = &.{ .{ .match = 1 }, .r, .m } }, .{ .ops = &.{ .{ .match = 1 }, .r, .r } }, }); - self.next_temp_index = @enumFromInt(@intFromEnum(res_index) + 1); - self.temp_type[@intFromEnum(res_index)] = lhs.typeOf(self); + cg.next_temp_index = @enumFromInt(@intFromEnum(res_index) + 1); + cg.temp_type[@intFromEnum(res_index)] = lhs.typeOf(cg); return res; } - fn moveTo(temp: Temp, inst: Air.Inst.Index, self: *Self) !void { - if (self.liveness.isUnused(inst)) try temp.die(self) else switch (temp.unwrap(self)) { + fn moveTo(temp: Temp, inst: Air.Inst.Index, cg: *CodeGen) !void { + if (cg.liveness.isUnused(inst)) try temp.die(cg) else switch (temp.unwrap(cg)) { .ref => { - const result = try self.allocRegOrMem(inst, true); - try self.genCopy(self.typeOfIndex(inst), result, temp.tracking(self).short, .{}); + const result = try cg.allocRegOrMem(inst, true); + try cg.genCopy(cg.typeOfIndex(inst), result, temp.tracking(cg).short, .{}); tracking_log.debug("{} => {} (birth)", .{ inst, result }); - self.inst_tracking.putAssumeCapacityNoClobber(inst, .init(result)); + cg.inst_tracking.putAssumeCapacityNoClobber(inst, .init(result)); }, .temp => |temp_index| { - const temp_tracking = temp_index.tracking(self); + const temp_tracking = temp_index.tracking(cg); tracking_log.debug("{} => {} (birth)", .{ inst, temp_tracking.short }); - self.inst_tracking.putAssumeCapacityNoClobber(inst, temp_tracking.*); - assert(self.reuseTemp(inst, temp_index.toIndex(), temp_tracking)); + cg.inst_tracking.putAssumeCapacityNoClobber(inst, temp_tracking.*); + assert(cg.reuseTemp(inst, temp_index.toIndex(), temp_tracking)); }, } } - fn die(temp: Temp, self: *Self) !void { - switch (temp.unwrap(self)) { + fn die(temp: Temp, cg: *CodeGen) !void { + switch (temp.unwrap(cg)) { .ref => {}, - .temp => |temp_index| try temp_index.tracking(self).die(self, temp_index.toIndex()), + .temp => |temp_index| try temp_index.tracking(cg).die(cg, temp_index.toIndex()), } } @@ -20687,17 +20820,17 @@ const Temp = struct { return @enumFromInt(index.toTargetIndex()); } - fn tracking(index: Index, self: *Self) *InstTracking { - return &self.inst_tracking.values()[@intFromEnum(index)]; + fn tracking(index: Index, cg: *CodeGen) *InstTracking { + return &cg.inst_tracking.values()[@intFromEnum(index)]; } - fn isValid(index: Index, self: *Self) bool { - return index.tracking(self).short != .dead; + fn isValid(index: Index, cg: *CodeGen) bool { + return index.tracking(cg).short != .dead; } - fn typeOf(index: Index, self: *Self) Type { - assert(index.isValid(self)); - return self.temp_type[@intFromEnum(index)]; + fn typeOf(index: Index, cg: *CodeGen) Type { + assert(index.isValid(cg)); + return cg.temp_type[@intFromEnum(index)]; } const max = std.math.maxInt(@typeInfo(Index).@"enum".tag_type); @@ -20723,17 +20856,17 @@ const Temp = struct { }; }; -fn resetTemps(self: *Self) void { - for (0..@intFromEnum(self.next_temp_index)) |temp_index| { +fn resetTemps(cg: *CodeGen) void { + for (0..@intFromEnum(cg.next_temp_index)) |temp_index| { const temp: Temp.Index = @enumFromInt(temp_index); - assert(!temp.isValid(self)); - self.temp_type[temp_index] = undefined; + assert(!temp.isValid(cg)); + cg.temp_type[temp_index] = undefined; } - self.next_temp_index = @enumFromInt(0); + cg.next_temp_index = @enumFromInt(0); } fn reuseTemp( - self: *Self, + cg: *CodeGen, new_inst: Air.Inst.Index, old_inst: Air.Inst.Index, tracking: *InstTracking, @@ -20743,79 +20876,80 @@ fn reuseTemp( .register_pair, .register_offset, .register_overflow, + .indirect, => for (tracking.short.getRegs()) |tracked_reg| { if (RegisterManager.indexOfRegIntoTracked(tracked_reg)) |tracked_index| { - self.register_manager.registers[tracked_index] = new_inst; + cg.register_manager.registers[tracked_index] = new_inst; } }, .load_frame => |frame_addr| if (frame_addr.index.isNamed()) return false, else => {}, } switch (tracking.short) { - .eflags, .register_overflow => self.eflags_inst = new_inst, + .eflags, .register_overflow => cg.eflags_inst = new_inst, else => {}, } - tracking.reuse(self, new_inst, old_inst); + tracking.reuse(cg, new_inst, old_inst); return true; } -fn tempAlloc(self: *Self, ty: Type) !Temp { - const temp_index = self.next_temp_index; - temp_index.tracking(self).* = .init( - try self.allocRegOrMemAdvanced(ty, temp_index.toIndex(), true), +fn tempAlloc(cg: *CodeGen, ty: Type) !Temp { + const temp_index = cg.next_temp_index; + temp_index.tracking(cg).* = .init( + try cg.allocRegOrMemAdvanced(ty, temp_index.toIndex(), true), ); - self.temp_type[@intFromEnum(temp_index)] = ty; - self.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); + cg.temp_type[@intFromEnum(temp_index)] = ty; + cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); return .{ .index = temp_index.toIndex() }; } -fn tempAllocReg(self: *Self, ty: Type, rc: RegisterManager.RegisterBitSet) !Temp { - const temp_index = self.next_temp_index; - temp_index.tracking(self).* = .init( - .{ .register = try self.register_manager.allocReg(temp_index.toIndex(), rc) }, +fn tempAllocReg(cg: *CodeGen, ty: Type, rc: RegisterManager.RegisterBitSet) !Temp { + const temp_index = cg.next_temp_index; + temp_index.tracking(cg).* = .init( + .{ .register = try cg.register_manager.allocReg(temp_index.toIndex(), rc) }, ); - self.temp_type[@intFromEnum(temp_index)] = ty; - self.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); + cg.temp_type[@intFromEnum(temp_index)] = ty; + cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); return .{ .index = temp_index.toIndex() }; } -fn tempFromValue(self: *Self, ty: Type, value: MCValue) !Temp { - const temp_index = self.next_temp_index; - temp_index.tracking(self).* = .init(value); - self.temp_type[@intFromEnum(temp_index)] = ty; - try self.getValue(value, temp_index.toIndex()); - self.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); +fn tempFromValue(cg: *CodeGen, ty: Type, value: MCValue) !Temp { + const temp_index = cg.next_temp_index; + temp_index.tracking(cg).* = .init(value); + cg.temp_type[@intFromEnum(temp_index)] = ty; + try cg.getValue(value, temp_index.toIndex()); + cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); return .{ .index = temp_index.toIndex() }; } fn tempFromOperand( - self: *Self, + cg: *CodeGen, inst: Air.Inst.Index, op_index: Liveness.OperandInt, op_ref: Air.Inst.Ref, ) !Temp { - const zcu = self.pt.zcu; + const zcu = cg.pt.zcu; const ip = &zcu.intern_pool; - if (!self.liveness.operandDies(inst, op_index)) { + if (!cg.liveness.operandDies(inst, op_index)) { if (op_ref.toIndex()) |op_inst| return .{ .index = op_inst }; const val = op_ref.toInterned().?; - const gop = try self.const_tracking.getOrPut(self.gpa, val); + const gop = try cg.const_tracking.getOrPut(cg.gpa, val); if (!gop.found_existing) gop.value_ptr.* = .init(init: { - const const_mcv = try self.genTypedValue(.fromInterned(val)); + const const_mcv = try cg.genTypedValue(.fromInterned(val)); switch (const_mcv) { - .lea_tlv => |tlv_sym| switch (self.bin_file.tag) { + .lea_tlv => |tlv_sym| switch (cg.bin_file.tag) { .elf, .macho => { - if (self.mod.pic) { - try self.spillRegisters(&.{ .rdi, .rax }); + if (cg.mod.pic) { + try cg.spillRegisters(&.{ .rdi, .rax }); } else { - try self.spillRegisters(&.{.rax}); + try cg.spillRegisters(&.{.rax}); } - const frame_index = try self.allocFrameIndex(.init(.{ + const frame_index = try cg.allocFrameIndex(.init(.{ .size = 8, .alignment = .@"8", })); - try self.genSetMem( + try cg.genSetMem( .{ .frame = frame_index }, 0, Type.usize, @@ -20829,24 +20963,24 @@ fn tempFromOperand( else => break :init const_mcv, } }); - return self.tempFromValue(.fromInterned(ip.typeOf(val)), gop.value_ptr.short); + return cg.tempFromValue(.fromInterned(ip.typeOf(val)), gop.value_ptr.short); } - const temp_index = self.next_temp_index; + const temp_index = cg.next_temp_index; const temp: Temp = .{ .index = temp_index.toIndex() }; const op_inst = op_ref.toIndex().?; - const tracking = self.getResolvedInstValue(op_inst); - temp_index.tracking(self).* = tracking.*; - if (!self.reuseTemp(temp.index, op_inst, tracking)) return .{ .index = op_ref.toIndex().? }; - self.temp_type[@intFromEnum(temp_index)] = self.typeOf(op_ref); - self.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); + const tracking = cg.getResolvedInstValue(op_inst); + temp_index.tracking(cg).* = tracking.*; + if (!cg.reuseTemp(temp.index, op_inst, tracking)) return .{ .index = op_ref.toIndex().? }; + cg.temp_type[@intFromEnum(temp_index)] = cg.typeOf(op_ref); + cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); return temp; } -inline fn tempsFromOperands(self: *Self, inst: Air.Inst.Index, op_refs: anytype) ![op_refs.len]Temp { +inline fn tempsFromOperands(cg: *CodeGen, inst: Air.Inst.Index, op_refs: anytype) ![op_refs.len]Temp { var temps: [op_refs.len]Temp = undefined; inline for (&temps, 0.., op_refs) |*temp, op_index, op_ref| { - temp.* = try self.tempFromOperand(inst, op_index, op_ref); + temp.* = try cg.tempFromOperand(inst, op_index, op_ref); } return temps; } @@ -20859,15 +20993,55 @@ const Operand = union(enum) { inst: Mir.Inst.Index, }; +const SelectLoop = struct { + element_reloc: Mir.Inst.Index, + element_offset: union(enum) { + unused, + known: u31, + temp: Temp, + }, + element_size: ?u13, + limb_reloc: Mir.Inst.Index, + limb_offset: union(enum) { + unused, + known: u31, + temp: Temp, + }, + limb_size: ?u8, + remaining_size: ?u64, +}; + const Pattern = struct { - tag: Mir.Inst.FixedTag, ops: []const Op, commute: struct { u8, u8 } = .{ 0, 0 }, - features: []const std.Target.x86.Feature = &.{}, + + const Set = struct { + required_features: []const std.Target.x86.Feature = &.{}, + loop: enum { + /// only execute the instruction once + once, + /// execute the instruction on all groups of non-overlapping bits in the entire value + bitwise, + /// for each element, execute the instruction on each limb, propogating the carry flag + limbwise_carry, + /// for each element, execute the instruction on pairs of limbs, starting from the + /// least significant, propogating a limb + limbwise_pairs_forward, + /// for each element, execute the instruction on pairs of limbs, starting from the + /// most significant, propogating a limb + limbwise_pairs_reverse, + /// for each element, execute the instruction + elementwise, + } = .once, + mir_tag: Mir.Inst.FixedTag, + patterns: []const Pattern, + }; const Op = union(enum) { - /// match another operand - match: u8, + /// reuse another operand + implicit: u8, + /// repeat another operand + explicit: u8, /// any general purpose register gpr, /// any 64-bit mmx register @@ -20878,165 +21052,449 @@ const Pattern = struct { ymm, /// any memory mem, + /// a limb stored in a gpr + gpr_limb, + /// a limb stored in a 64-bit mmx register + mm_limb, + /// a limb stored in a 128-bit sse register + xmm_limb, + /// a limb stored in a 256-bit sse register + ymm_limb, + /// a limb stored in memory + mem_limb, /// specific immediate imm: i8, /// any immediate signed extended from 32 bits simm32, - fn matches(op: Op, is_mut: bool, temp: Temp, self: *Self) bool { + fn matches(op: Op, is_mut: bool, temp: Temp, cg: *CodeGen) bool { + const abi_size = temp.typeOf(cg).abiSize(cg.pt.zcu); return switch (op) { - .match => unreachable, - .gpr => switch (temp.tracking(self).short) { + .implicit, .explicit => unreachable, + .gpr => abi_size <= 8 and switch (temp.tracking(cg).short) { .register => |reg| reg.class() == .general_purpose, .register_offset => |reg_off| reg_off.reg.class() == .general_purpose and reg_off.off == 0, - else => self.regClassForType(temp.typeOf(self)) == .general_purpose, + else => cg.regClassForType(temp.typeOf(cg)) == .general_purpose, }, - .mm => switch (temp.tracking(self).short) { + .mm => abi_size <= 8 and switch (temp.tracking(cg).short) { .register => |reg| reg.class() == .mmx, .register_offset => |reg_off| reg_off.reg.class() == .mmx and reg_off.off == 0, - else => self.regClassForType(temp.typeOf(self)) == .mmx, + else => cg.regClassForType(temp.typeOf(cg)) == .mmx, }, - .xmm => switch (temp.tracking(self).short) { + .xmm => abi_size > 8 and abi_size <= 16 and switch (temp.tracking(cg).short) { .register => |reg| reg.class() == .sse, .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0, - else => self.regClassForType(temp.typeOf(self)) == .sse, + else => cg.regClassForType(temp.typeOf(cg)) == .sse, }, - .ymm => switch (temp.tracking(self).short) { + .ymm => abi_size > 16 and abi_size <= 32 and switch (temp.tracking(cg).short) { .register => |reg| reg.class() == .sse, .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0, - else => self.regClassForType(temp.typeOf(self)) == .sse, - } and temp.typeOf(self).abiSize(self.pt.zcu) > 16, - .mem => (!is_mut or temp.isMut(self)) and temp.tracking(self).short.isMemory(), - .imm => |specific_imm| if (is_mut) unreachable else switch (temp.tracking(self).short) { + else => cg.regClassForType(temp.typeOf(cg)) == .sse, + }, + .mem, .mem_limb => (!is_mut or temp.isMut(cg)) and temp.tracking(cg).short.isMemory(), + .gpr_limb => abi_size > 8, + .mm_limb => abi_size > 8 and (!is_mut or temp.isMut(cg)) and temp.tracking(cg).short.isMemory() and cg.regClassForType(temp.typeOf(cg)) == .mmx, + .xmm_limb => abi_size > 16 and (!is_mut or temp.isMut(cg)) and temp.tracking(cg).short.isMemory(), + .ymm_limb => abi_size > 32 and (!is_mut or temp.isMut(cg)) and temp.tracking(cg).short.isMemory(), + .imm => |specific_imm| if (is_mut) unreachable else switch (temp.tracking(cg).short) { .immediate => |imm| @as(i64, @bitCast(imm)) == specific_imm, else => false, }, - .simm32 => if (is_mut) unreachable else switch (temp.tracking(self).short) { - .immediate => |imm| temp.typeOf(self).abiSize(self.pt.zcu) <= 4 or - std.math.cast(i32, @as(i64, @bitCast(imm))) != null, + .simm32 => if (is_mut) unreachable else switch (temp.tracking(cg).short) { + .immediate => |imm| abi_size <= 4 or std.math.cast(i32, @as(i64, @bitCast(imm))) != null, else => false, }, }; } }; }; -fn select(self: *Self, dst_temps: []Temp, src_temps: []const *Temp, patterns: []const Pattern) !void { - patterns: for (patterns) |pattern| { - for (pattern.features) |feature| if (!self.hasFeature(feature)) continue :patterns; - for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| if (!switch (src_op) { - .match => |match_index| pattern.ops[match_index], - else => src_op, - }.matches(src_op == .match, src_temp.*, self)) continue :patterns; - while (true) for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| { - if (switch (switch (src_op) { - .match => |match_index| pattern.ops[match_index], - else => src_op, - }) { - .match => unreachable, - .gpr => try src_temp.toRegClass(.general_purpose, self), - .mm => try src_temp.toRegClass(.mmx, self), - .xmm, .ymm => try src_temp.toRegClass(.sse, self), - .mem, .imm, .simm32 => false, - }) break; - } else break; - var mir_ops: [4]Operand = @splat(.none); - var mir_ops_len = dst_temps.len; - for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| { - const mir_op, const matched_src_op = op: switch (src_op) { - .match => |match_index| { - dst_temps[match_index] = src_temp.*; - break :op .{ &mir_ops[match_index], pattern.ops[match_index] }; +fn select(cg: *CodeGen, dst_temps: []Temp, src_temps: []const *Temp, pattern_sets: []const Pattern.Set) !void { + var loop: SelectLoop = .{ + .element_reloc = undefined, + .element_offset = .unused, + .element_size = null, + .limb_reloc = undefined, + .limb_offset = .unused, + .limb_size = null, + .remaining_size = null, + }; + var extra_temps: [4]?Temp = @splat(null); + pattern_sets: for (pattern_sets) |pattern_set| { + for (pattern_set.required_features) |required_feature| if (!cg.hasFeature(required_feature)) continue :pattern_sets; + patterns: for (pattern_set.patterns) |pattern| { + for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| { + const ref_src_op, const is_mut = switch (src_op) { + .implicit, .explicit => |op_index| .{ pattern.ops[op_index], true }, + else => .{ src_op, false }, + }; + if (!ref_src_op.matches(is_mut, src_temp.*, cg)) continue :patterns; + } + while (true) for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| { + if (switch (switch (src_op) { + .implicit, .explicit => |op_index| pattern.ops[op_index], + else => src_op, + }) { + .implicit, .explicit => unreachable, + .gpr => try src_temp.toRegClass(.general_purpose, cg), + .mm => try src_temp.toRegClass(.mmx, cg), + .xmm, .ymm => try src_temp.toRegClass(.sse, cg), + .mem, .imm, .simm32 => false, + .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb, .mem_limb => switch (src_temp.tracking(cg).short) { + .register_pair => false, + else => try src_temp.toBase(cg), + }, + }) break; + } else break; + var mir_ops_len = dst_temps.len; + for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| { + const ref_src_op, const extra_temp = op: switch (src_op) { + .implicit => |op_index| { + dst_temps[op_index] = if (src_temp.isMut(cg)) + src_temp.* + else + try cg.tempAlloc(src_temp.typeOf(cg)); + break :op .{ pattern.ops[op_index], &extra_temps[op_index] }; + }, + .explicit => |op_index| { + dst_temps[op_index] = if (src_temp.isMut(cg)) + src_temp.* + else + try cg.tempAlloc(src_temp.typeOf(cg)); + defer mir_ops_len += 1; + break :op .{ pattern.ops[op_index], &extra_temps[mir_ops_len] }; + }, + else => { + defer mir_ops_len += 1; + break :op .{ src_op, &extra_temps[mir_ops_len] }; + }, + }; + const limb_size: u8, const rc = switch (ref_src_op) { + else => continue, + .gpr_limb => .{ 8, abi.RegisterClass.gp }, + .mm_limb => .{ 8, @panic("TODO") }, + .xmm_limb => .{ 16, abi.RegisterClass.sse }, + .ymm_limb => .{ 32, abi.RegisterClass.sse }, + }; + assert(loop.limb_size == null or loop.limb_size == limb_size); + loop.limb_size = limb_size; + loop.remaining_size = loop.remaining_size orelse src_temp.typeOf(cg).abiSize(cg.pt.zcu); + switch (src_temp.tracking(cg).short) { + .register_pair => switch (loop.limb_offset) { + .unused, .temp => loop.limb_offset = .{ .known = 0 }, + .known => {}, + }, + else => { + switch (loop.limb_offset) { + .unused => loop.limb_offset = .{ .temp = undefined }, + .known, .temp => {}, + } + extra_temp.* = try cg.tempAllocReg(Type.usize, rc); + }, + } + } + switch (loop.element_offset) { + .unused, .known => {}, + .temp => |*element_offset| { + element_offset.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp); + const element_offset_reg = element_offset.tracking(cg).short.register; + try cg.asmRegisterRegister(.{ ._, .xor }, element_offset_reg.to32(), element_offset_reg.to32()); + loop.element_reloc = @intCast(cg.mir_instructions.len); }, - else => { - defer mir_ops_len += 1; - break :op .{ &mir_ops[mir_ops_len], src_op }; - }, - }; - const src_mcv = src_temp.tracking(self).short; - mir_op.* = switch (matched_src_op) { - .match => unreachable, - .gpr => .{ .reg = registerAlias( - src_mcv.register, - @intCast(src_temp.typeOf(self).abiSize(self.pt.zcu)), - ) }, - .mm => .{ .reg = src_mcv.register }, - .xmm => .{ .reg = src_mcv.register.to128() }, - .ymm => .{ .reg = src_mcv.register.to256() }, - .mem => .{ .mem = try src_mcv.mem(self, self.memSize(src_temp.typeOf(self))) }, - .imm => |imm| .{ .imm = .s(imm) }, - .simm32 => switch (src_temp.typeOf(self).abiSize(self.pt.zcu)) { - else => unreachable, - 1 => .{ .imm = if (std.math.cast(i8, @as(i64, @bitCast(src_mcv.immediate)))) |small| - .s(small) - else - .u(@as(u8, @intCast(src_mcv.immediate))) }, - 2 => .{ .imm = if (std.math.cast(i16, @as(i64, @bitCast(src_mcv.immediate)))) |small| - .s(small) - else - .u(@as(u16, @intCast(src_mcv.immediate))) }, - 3...8 => .{ .imm = if (std.math.cast(i32, @as(i64, @bitCast(src_mcv.immediate)))) |small| - .s(small) - else - .u(@as(u32, @intCast(src_mcv.immediate))) }, - }, - }; - } - for ( - pattern.ops[0..dst_temps.len], - dst_temps, - mir_ops[0..dst_temps.len], - ) |dst_op, *dst_temp, *mir_op| { - if (mir_op.* != .none) continue; - const ty = src_temps[0].typeOf(self); - switch (dst_op) { - .match => |match_index| { - dst_temp.* = dst_temps[match_index]; - mir_op.* = mir_ops[match_index]; - }, - .gpr => { - dst_temp.* = try self.tempAllocReg(ty, abi.RegisterClass.gp); - mir_op.* = .{ .reg = registerAlias( - dst_temp.tracking(self).short.register, - @intCast(ty.abiSize(self.pt.zcu)), - ) }; - }, - .mm => @panic("TODO"), - .xmm => { - dst_temp.* = try self.tempAllocReg(ty, abi.RegisterClass.sse); - mir_op.* = .{ .reg = dst_temp.tracking(self).short.register.to128() }; - }, - .ymm => { - dst_temp.* = try self.tempAllocReg(ty, abi.RegisterClass.sse); - mir_op.* = .{ .reg = dst_temp.tracking(self).short.register.to256() }; - }, - .mem => @panic("TODO"), - .imm, .simm32 => unreachable, // unmodifiable destination + } + while (true) { + switch (loop.limb_offset) { + .unused, .known => {}, + .temp => |*limb_offset| { + limb_offset.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp); + const limb_offset_reg = limb_offset.tracking(cg).short.register; + try cg.asmRegisterRegister(.{ ._, .xor }, limb_offset_reg.to32(), limb_offset_reg.to32()); + loop.limb_reloc = @intCast(cg.mir_instructions.len); + }, + } + while (true) { + var mir_ops: [4]Operand = @splat(.none); + mir_ops_len = dst_temps.len; + for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| { + const mir_op, const ref_src_op, const extra_temp = op: switch (src_op) { + .implicit => |op_index| .{ &mir_ops[op_index], pattern.ops[op_index], &extra_temps[op_index] }, + .explicit => |op_index| { + defer mir_ops_len += 1; + break :op .{ &mir_ops[mir_ops_len], pattern.ops[op_index], &extra_temps[mir_ops_len] }; + }, + else => { + defer mir_ops_len += 1; + break :op .{ &mir_ops[mir_ops_len], src_op, &extra_temps[mir_ops_len] }; + }, + }; + const src_mcv = src_temp.tracking(cg).short; + switch (ref_src_op) { + else => {}, + .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb => switch (src_mcv) { + .register_pair => {}, + else => try cg.asmRegisterMemory( + switch (ref_src_op) { + else => unreachable, + .gpr_limb => .{ ._, .mov }, + .mm_limb => .{ ._q, .mov }, + .xmm_limb, .ymm_limb => .{ if (cg.hasFeature(.avx)) .v_ else ._, .movdqu }, + }, + registerAlias(extra_temp.*.?.tracking(cg).short.register, loop.limb_size.?), + try src_mcv.mem(cg, switch (loop.limb_offset) { + .unused => unreachable, + .known => |limb_offset| .{ + .size = .fromSize(loop.limb_size.?), + .disp = limb_offset, + }, + .temp => |limb_offset| .{ + .size = .fromSize(loop.limb_size.?), + .index = limb_offset.tracking(cg).short.register.to64(), + }, + }), + ), + }, + } + mir_op.* = switch (ref_src_op) { + .implicit, .explicit => unreachable, + .gpr => .{ .reg = registerAlias( + src_mcv.register, + @intCast(src_temp.typeOf(cg).abiSize(cg.pt.zcu)), + ) }, + .mm => .{ .reg = src_mcv.register }, + .xmm => .{ .reg = src_mcv.register.to128() }, + .ymm => .{ .reg = src_mcv.register.to256() }, + .mem => .{ .mem = try src_mcv.mem(cg, .{ .size = cg.memSize(src_temp.typeOf(cg)) }) }, + .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb => switch (src_mcv) { + .register_pair => |src_regs| switch (loop.limb_offset) { + .unused => unreachable, + .known => |limb_offset| .{ .reg = registerAlias( + src_regs[@divExact(limb_offset, loop.limb_size.?)], + loop.limb_size.?, + ) }, + .temp => unreachable, + }, + else => .{ .reg = registerAlias( + extra_temp.*.?.tracking(cg).short.register, + loop.limb_size.?, + ) }, + }, + .mem_limb => .{ .mem = switch (src_mcv) { + .register_pair => unreachable, + else => switch (loop.limb_offset) { + .unused => unreachable, + .known => |limb_offset| try src_mcv.mem(cg, .{ + .size = .fromSize(loop.limb_size.?), + .disp = limb_offset, + }), + .temp => |limb_offset| try src_mcv.mem(cg, .{ + .size = .fromSize(loop.limb_size.?), + .index = limb_offset.tracking(cg).short.register.to64(), + }), + }, + } }, + .imm => |imm| .{ .imm = .s(imm) }, + .simm32 => switch (src_temp.typeOf(cg).abiSize(cg.pt.zcu)) { + else => unreachable, + 1 => .{ .imm = if (std.math.cast(i8, @as(i64, @bitCast(src_mcv.immediate)))) |small| + .s(small) + else + .u(@as(u8, @intCast(src_mcv.immediate))) }, + 2 => .{ .imm = if (std.math.cast(i16, @as(i64, @bitCast(src_mcv.immediate)))) |small| + .s(small) + else + .u(@as(u16, @intCast(src_mcv.immediate))) }, + 3...8 => .{ .imm = if (std.math.cast(i32, @as(i64, @bitCast(src_mcv.immediate)))) |small| + .s(small) + else + .u(@as(u32, @intCast(src_mcv.immediate))) }, + }, + }; + } + for ( + pattern.ops[0..dst_temps.len], + dst_temps, + mir_ops[0..dst_temps.len], + extra_temps[0..dst_temps.len], + ) |dst_op, *dst_temp, *mir_op, *extra_temp| { + if (mir_op.* != .none) continue; + const ty = src_temps[0].typeOf(cg); + switch (dst_op) { + .implicit => unreachable, + .explicit => |op_index| { + dst_temp.* = dst_temps[op_index]; + mir_op.* = mir_ops[op_index]; + }, + .gpr => { + dst_temp.* = try cg.tempAllocReg(ty, abi.RegisterClass.gp); + mir_op.* = .{ .reg = registerAlias( + dst_temp.tracking(cg).short.register, + @intCast(ty.abiSize(cg.pt.zcu)), + ) }; + }, + .mm => @panic("TODO"), + .xmm => { + dst_temp.* = try cg.tempAllocReg(ty, abi.RegisterClass.sse); + mir_op.* = .{ .reg = dst_temp.tracking(cg).short.register.to128() }; + }, + .ymm => { + dst_temp.* = try cg.tempAllocReg(ty, abi.RegisterClass.sse); + mir_op.* = .{ .reg = dst_temp.tracking(cg).short.register.to256() }; + }, + .mem => @panic("TODO"), + .gpr_limb => { + dst_temp.* = try cg.tempAlloc(ty); + extra_temp.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp); + mir_op.* = .{ .reg = extra_temp.*.?.tracking(cg).short.register.to64() }; + }, + .mm_limb => { + dst_temp.* = try cg.tempAlloc(ty); + extra_temp.* = try cg.tempAllocReg(Type.usize, @panic("TODO")); + mir_op.* = .{ .reg = extra_temp.*.?.tracking(cg).short.register }; + }, + .xmm_limb => { + dst_temp.* = try cg.tempAlloc(ty); + extra_temp.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.sse); + mir_op.* = .{ .reg = extra_temp.*.?.tracking(cg).short.register.to128() }; + }, + .ymm_limb => { + dst_temp.* = try cg.tempAlloc(ty); + extra_temp.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.sse); + mir_op.* = .{ .reg = extra_temp.*.?.tracking(cg).short.register.to256() }; + }, + .mem_limb => { + dst_temp.* = try cg.tempAlloc(ty); + mir_op.* = .{ .mem = try dst_temp.tracking(cg).short.mem(cg, switch (loop.limb_offset) { + .unused => unreachable, + .known => |limb_offset| .{ + .size = .fromSize(loop.limb_size.?), + .disp = limb_offset, + }, + .temp => |limb_offset| .{ + .size = .fromSize(loop.limb_size.?), + .index = limb_offset.tracking(cg).short.register.to64(), + }, + }) }; + }, + .imm, .simm32 => unreachable, // unmodifiable destination + } + } + std.mem.swap(Operand, &mir_ops[pattern.commute[0]], &mir_ops[pattern.commute[1]]); + cg.asmOps(pattern_set.mir_tag, mir_ops) catch |err| switch (err) { + error.InvalidInstruction => { + const fixes = @tagName(pattern_set.mir_tag[0]); + const fixes_blank = std.mem.indexOfScalar(u8, fixes, '_').?; + return cg.fail( + "invalid instruction: '{s}{s}{s} {s} {s} {s} {s}'", + .{ + fixes[0..fixes_blank], + @tagName(pattern_set.mir_tag[1]), + fixes[fixes_blank + 1 ..], + @tagName(mir_ops[0]), + @tagName(mir_ops[1]), + @tagName(mir_ops[2]), + @tagName(mir_ops[3]), + }, + ); + }, + else => |e| return e, + }; + for ( + extra_temps[0..dst_temps.len], + pattern.ops[0..dst_temps.len], + dst_temps, + ) |maybe_extra_temp, dst_op, dst_temp| if (maybe_extra_temp) |extra_temp| switch (dst_op) { + else => {}, + .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb => switch (dst_temp.tracking(cg).short) { + .register_pair => |dst_regs| switch (loop.limb_offset) { + .unused => unreachable, + .known => |limb_offset| try cg.asmRegisterRegister( + .{ ._, .mov }, + dst_regs[@divExact(limb_offset, loop.limb_size.?)].to64(), + extra_temp.tracking(cg).short.register.to64(), + ), + .temp => unreachable, + }, + else => |dst_mcv| try cg.asmMemoryRegister( + switch (dst_op) { + else => unreachable, + .gpr_limb => .{ ._, .mov }, + .mm_limb => .{ ._q, .mov }, + .xmm_limb, .ymm_limb => .{ if (cg.hasFeature(.avx)) .v_ else ._, .movdqu }, + }, + try dst_mcv.mem(cg, switch (loop.limb_offset) { + .unused => unreachable, + .known => |limb_offset| .{ + .size = .fromSize(loop.limb_size.?), + .disp = limb_offset, + }, + .temp => |limb_offset| .{ + .size = .fromSize(loop.limb_size.?), + .index = limb_offset.tracking(cg).short.register.to64(), + }, + }), + registerAlias(extra_temp.tracking(cg).short.register, loop.limb_size.?), + ), + }, + }; + switch (pattern_set.loop) { + .once => break :pattern_sets, + .bitwise => {}, + .limbwise_carry => @panic("TODO"), + .limbwise_pairs_forward => @panic("TODO"), + .limbwise_pairs_reverse => @panic("TODO"), + .elementwise => @panic("TODO"), + } + switch (loop.limb_offset) { + .unused => break, + .known => |*limb_offset| { + limb_offset.* += loop.limb_size.?; + loop.remaining_size.? -= loop.limb_size.?; + if (loop.remaining_size.? < loop.limb_size.? or + (loop.element_size != null and limb_offset.* >= loop.element_size.?)) + { + limb_offset.* = 0; + break; + } + }, + .temp => |limb_offset| { + const limb_offset_reg = limb_offset.tracking(cg).short.register; + try cg.asmRegisterMemory(.{ ._, .lea }, limb_offset_reg.to32(), .{ + .base = .{ .reg = limb_offset_reg.to64() }, + .mod = .{ .rm = .{ + .size = .qword, + .disp = loop.limb_size.?, + } }, + }); + try cg.asmRegisterImmediate( + .{ ._, .cmp }, + limb_offset_reg.to32(), + .u(loop.element_size orelse loop.remaining_size.?), + ); + _ = try cg.asmJccReloc(.b, loop.limb_reloc); + try limb_offset.die(cg); + break; + }, + } + } + switch (loop.element_offset) { + .unused => break :pattern_sets, + .known => |*element_offset| { + if (loop.remaining_size.? == 0) break :pattern_sets; + element_offset.* += loop.element_size.?; + }, + .temp => |element_offset| { + if (true) @panic("TODO"); + try element_offset.die(cg); + if (loop.remaining_size.? == 0) break :pattern_sets; + break; + }, + } } } - std.mem.swap(Operand, &mir_ops[pattern.commute[0]], &mir_ops[pattern.commute[1]]); - self.asmOps(pattern.tag, mir_ops) catch |err| switch (err) { - error.InvalidInstruction => { - const fixes = @tagName(pattern.tag[0]); - const fixes_replace = std.mem.indexOfScalar(u8, fixes, '_').?; - return self.fail( - "invalid instruction: '{s}{s}{s} {s} {s} {s} {s}'", - .{ - fixes[0..fixes_replace], - @tagName(pattern.tag[1]), - fixes[fixes_replace + 1 ..], - @tagName(mir_ops[0]), - @tagName(mir_ops[1]), - @tagName(mir_ops[2]), - @tagName(mir_ops[3]), - }, - ); - }, - else => |e| return e, - }; - return; + } else { + log.err("failed to select:", .{}); + for (src_temps) |src_temp| log.err("{}", .{src_temp.tracking(cg)}); + return cg.fail("failed to select", .{}); } - log.err("failed to select:", .{}); - for (src_temps) |src_temp| log.err("{}", .{src_temp.tracking(self)}); - return self.fail("failed to select", .{}); + for (extra_temps) |extra_temp| if (extra_temp) |temp| try temp.die(cg); } diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig index ac5181cb3e..19881e0dee 100644 --- a/src/arch/x86_64/bits.zig +++ b/src/arch/x86_64/bits.zig @@ -474,13 +474,15 @@ pub const Memory = struct { }; pub const Mod = union(enum(u1)) { - rm: struct { + rm: Rm, + off: u64, + + pub const Rm = struct { size: Size, index: Register = .none, scale: Scale = .@"1", disp: i32 = 0, - }, - off: u64, + }; }; pub const Size = enum(u4) { From c4b93555b0d2c56cbbd85f77bb387ebb43d23369 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sun, 1 Dec 2024 23:53:22 -0500 Subject: [PATCH 04/25] x86_64: testing --- src/Air.zig | 10 +- src/arch/x86_64/CodeGen.zig | 3029 +++++++++++++++++++++--------- src/arch/x86_64/Disassembler.zig | 2 +- src/arch/x86_64/Encoding.zig | 13 +- src/arch/x86_64/Mir.zig | 8 + src/arch/x86_64/abi.zig | 12 +- src/arch/x86_64/encoder.zig | 19 +- src/arch/x86_64/encodings.zig | 10 + src/register_manager.zig | 14 +- test/behavior.zig | 2 + test/behavior/x86_64.zig | 9 + test/behavior/x86_64/math.zig | 230 +++ 12 files changed, 2403 insertions(+), 955 deletions(-) create mode 100644 test/behavior/x86_64.zig create mode 100644 test/behavior/x86_64/math.zig diff --git a/src/Air.zig b/src/Air.zig index e76ffb3c39..81fb7af85e 100644 --- a/src/Air.zig +++ b/src/Air.zig @@ -1229,7 +1229,7 @@ pub const VectorCmp = struct { op: u32, pub fn compareOperator(self: VectorCmp) std.math.CompareOperator { - return @as(std.math.CompareOperator, @enumFromInt(@as(u3, @truncate(self.op)))); + return @enumFromInt(@as(u3, @intCast(self.op))); } pub fn encodeOp(compare_operator: std.math.CompareOperator) u32 { @@ -1274,11 +1274,11 @@ pub const Cmpxchg = struct { flags: u32, pub fn successOrder(self: Cmpxchg) std.builtin.AtomicOrder { - return @as(std.builtin.AtomicOrder, @enumFromInt(@as(u3, @truncate(self.flags)))); + return @enumFromInt(@as(u3, @truncate(self.flags))); } pub fn failureOrder(self: Cmpxchg) std.builtin.AtomicOrder { - return @as(std.builtin.AtomicOrder, @enumFromInt(@as(u3, @truncate(self.flags >> 3)))); + return @enumFromInt(@as(u3, @intCast(self.flags >> 3))); } }; @@ -1289,11 +1289,11 @@ pub const AtomicRmw = struct { flags: u32, pub fn ordering(self: AtomicRmw) std.builtin.AtomicOrder { - return @as(std.builtin.AtomicOrder, @enumFromInt(@as(u3, @truncate(self.flags)))); + return @enumFromInt(@as(u3, @truncate(self.flags))); } pub fn op(self: AtomicRmw) std.builtin.AtomicRmwOp { - return @as(std.builtin.AtomicRmwOp, @enumFromInt(@as(u4, @truncate(self.flags >> 3)))); + return @enumFromInt(@as(u4, @intCast(self.flags >> 3))); } }; diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 9631173b9f..51e4c5ce68 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -135,6 +135,8 @@ const Owner = union(enum) { } }; +const MaskKind = enum { sign, all }; + pub const MCValue = union(enum) { /// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc. /// TODO Look into deleting this tag and using `dead` instead, since every use @@ -156,10 +158,16 @@ pub const MCValue = union(enum) { register: Register, /// The value is split across two registers. register_pair: [2]Register, + /// The value is split across three registers. + register_triple: [3]Register, + /// The value is split across four registers. + register_quadruple: [4]Register, /// The value is a constant offset from the value in a register. register_offset: bits.RegisterOffset, /// The value is a tuple { wrapped, overflow } where wrapped value is stored in the GP register. register_overflow: struct { reg: Register, eflags: Condition }, + /// The value is a bool vector stored in a vector register with a different scalar type. + register_mask: struct { reg: Register, kind: MaskKind, inverted: bool, scalar: Memory.Size }, /// The value is in memory at a hard-coded address. /// If the type is a pointer, it means the pointer address is stored at this memory location. memory: u64, @@ -195,7 +203,7 @@ pub const MCValue = union(enum) { /// Payload is a frame address. lea_frame: bits.FrameAddr, /// Supports integer_per_element abi - elementwise_regs_then_frame: packed struct { regs: u3 = 0, frame_off: i29 = 0, frame_index: FrameIndex }, + elementwise_regs_then_frame: packed struct { regs: u3, frame_off: i29, frame_index: FrameIndex }, /// This indicates that we have already allocated a frame index for this instruction, /// but it has not been spilled there yet in the current control flow. /// Payload is a frame index. @@ -210,6 +218,7 @@ pub const MCValue = union(enum) { .undef, .immediate, .register_offset, + .register_mask, .eflags, .register_overflow, .lea_symbol, @@ -223,6 +232,8 @@ pub const MCValue = union(enum) { => false, .register, .register_pair, + .register_triple, + .register_quadruple, .memory, .load_symbol, .load_got, @@ -234,13 +245,20 @@ pub const MCValue = union(enum) { }; } - fn isMemory(mcv: MCValue) bool { + fn isBase(mcv: MCValue) bool { return switch (mcv) { .memory, .indirect, .load_frame => true, else => false, }; } + fn isMemory(mcv: MCValue) bool { + return switch (mcv) { + .memory, .indirect, .load_frame, .load_symbol => true, + else => false, + }; + } + fn isImmediate(mcv: MCValue) bool { return switch (mcv) { .immediate => true, @@ -268,16 +286,23 @@ pub const MCValue = union(enum) { .register => |reg| reg, .register_offset, .indirect => |ro| ro.reg, .register_overflow => |ro| ro.reg, + .register_mask => |rm| rm.reg, else => null, }; } fn getRegs(mcv: *const MCValue) []const Register { return switch (mcv.*) { - .register => |*reg| @as(*const [1]Register, reg), - .register_pair => |*regs| regs, - .register_offset, .indirect => |*ro| @as(*const [1]Register, &ro.reg), - .register_overflow => |*ro| @as(*const [1]Register, &ro.reg), + .register => |*reg| reg[0..1], + inline .register_pair, + .register_triple, + .register_quadruple, + => |*regs| regs, + inline .register_offset, + .indirect, + .register_overflow, + .register_mask, + => |*pl| (&pl.reg)[0..1], else => &.{}, }; } @@ -300,8 +325,11 @@ pub const MCValue = union(enum) { .eflags, .register, .register_pair, + .register_triple, + .register_quadruple, .register_offset, .register_overflow, + .register_mask, .lea_symbol, .lea_direct, .lea_got, @@ -332,7 +360,10 @@ pub const MCValue = union(enum) { .undef, .eflags, .register_pair, + .register_triple, + .register_quadruple, .register_overflow, + .register_mask, .memory, .indirect, .load_direct, @@ -367,7 +398,10 @@ pub const MCValue = union(enum) { => unreachable, // not valid .eflags, .register_pair, + .register_triple, + .register_quadruple, .register_overflow, + .register_mask, .memory, .indirect, .load_direct, @@ -404,8 +438,11 @@ pub const MCValue = union(enum) { .eflags, .register, .register_pair, + .register_triple, + .register_quadruple, .register_offset, .register_overflow, + .register_mask, .load_direct, .lea_direct, .load_got, @@ -472,10 +509,11 @@ pub const MCValue = union(enum) { .memory => |pl| try writer.print("[ds:0x{x}]", .{pl}), inline .eflags, .register => |pl| try writer.print("{s}", .{@tagName(pl)}), .register_pair => |pl| try writer.print("{s}:{s}", .{ @tagName(pl[1]), @tagName(pl[0]) }), + .register_triple => |pl| try writer.print("{s}:{s}:{s}", .{ @tagName(pl[2]), @tagName(pl[1]), @tagName(pl[0]) }), + .register_quadruple => |pl| try writer.print("{s}:{s}:{s}:{s}", .{ @tagName(pl[3]), @tagName(pl[2]), @tagName(pl[1]), @tagName(pl[0]) }), .register_offset => |pl| try writer.print("{s} + 0x{x}", .{ @tagName(pl.reg), pl.off }), - .register_overflow => |pl| try writer.print("{s}:{s}", .{ - @tagName(pl.eflags), @tagName(pl.reg), - }), + .register_overflow => |pl| try writer.print("{s}:{s}", .{ @tagName(pl.eflags), @tagName(pl.reg) }), + .register_mask => |pl| try writer.print("mask({s},{}):{s}", .{ @tagName(pl.kind), pl.scalar, @tagName(pl.reg) }), .load_symbol => |pl| try writer.print("[sym:{} + 0x{x}]", .{ pl.sym_index, pl.off }), .lea_symbol => |pl| try writer.print("sym:{} + 0x{x}", .{ pl.sym_index, pl.off }), .indirect => |pl| try writer.print("[{s} + 0x{x}]", .{ @tagName(pl.reg), pl.off }), @@ -526,8 +564,11 @@ const InstTracking = struct { .eflags, .register, .register_pair, + .register_triple, + .register_quadruple, .register_offset, .register_overflow, + .register_mask, .indirect, => .none, }, .short = result }; @@ -545,17 +586,17 @@ const InstTracking = struct { return self.short.getCondition(); } - fn spill(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index) !void { + fn spill(self: *InstTracking, cg: *CodeGen, inst: Air.Inst.Index) !void { if (std.meta.eql(self.long, self.short)) return; // Already spilled // Allocate or reuse frame index switch (self.long) { - .none => self.long = try function.allocRegOrMem(inst, false), + .none => self.long = try cg.allocRegOrMem(inst, false), .load_frame => {}, .reserved_frame => |index| self.long = .{ .load_frame = .{ .index = index } }, else => unreachable, } tracking_log.debug("spill {} from {} to {}", .{ inst, self.short, self.long }); - try function.genCopy(function.typeOfIndex(inst), self.long, self.short, .{}); + try cg.genCopy(cg.typeOfIndex(inst), self.long, self.short, .{}); } fn reuseFrame(self: *InstTracking) void { @@ -584,8 +625,11 @@ const InstTracking = struct { .eflags, .register, .register_pair, + .register_triple, + .register_quadruple, .register_offset, .register_overflow, + .register_mask, .indirect, .elementwise_regs_then_frame, .reserved_frame, @@ -630,8 +674,11 @@ const InstTracking = struct { .eflags, .register, .register_pair, + .register_triple, + .register_quadruple, .register_offset, .register_overflow, + .register_mask, .indirect, .elementwise_regs_then_frame, .air_ref, @@ -735,13 +782,13 @@ const InstTracking = struct { } pub fn format( - self: InstTracking, + tracking: InstTracking, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype, ) @TypeOf(writer).Error!void { - if (!std.meta.eql(self.long, self.short)) try writer.print("|{}| ", .{self.long}); - try writer.print("{}", .{self.short}); + if (!std.meta.eql(tracking.long, tracking.short)) try writer.print("|{}| ", .{tracking.long}); + try writer.print("{}", .{tracking.short}); } }; @@ -2259,44 +2306,44 @@ fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void { try self.asmPseudo(.pseudo_dbg_leave_block_none); } -fn genBody(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void { - const pt = self.pt; +fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { + const pt = cg.pt; const zcu = pt.zcu; const ip = &zcu.intern_pool; - const air_tags = self.air.instructions.items(.tag); - const air_datas = self.air.instructions.items(.data); - const use_old = self.target.ofmt == .coff; + const air_tags = cg.air.instructions.items(.tag); + const air_datas = cg.air.instructions.items(.data); + const use_old = cg.target.ofmt == .coff; - self.arg_index = 0; + cg.arg_index = 0; for (body) |inst| switch (air_tags[@intFromEnum(inst)]) { .arg => { - wip_mir_log.debug("{}", .{self.fmtAir(inst)}); - verbose_tracking_log.debug("{}", .{self.fmtTracking()}); + wip_mir_log.debug("{}", .{cg.fmtAir(inst)}); + verbose_tracking_log.debug("{}", .{cg.fmtTracking()}); - self.reused_operands = .initEmpty(); - try self.inst_tracking.ensureUnusedCapacity(self.gpa, 1); + cg.reused_operands = .initEmpty(); + try cg.inst_tracking.ensureUnusedCapacity(cg.gpa, 1); - try self.airArg(inst); + try cg.airArg(inst); - self.resetTemps(); - self.checkInvariantsAfterAirInst(); + cg.resetTemps(); + cg.checkInvariantsAfterAirInst(); }, else => break, }; - if (self.arg_index == 0) try self.airDbgVarArgs(); - self.arg_index = 0; + if (cg.arg_index == 0) try cg.airDbgVarArgs(); + cg.arg_index = 0; for (body) |inst| { - if (self.liveness.isUnused(inst) and !self.air.mustLower(inst, ip)) continue; - wip_mir_log.debug("{}", .{self.fmtAir(inst)}); - verbose_tracking_log.debug("{}", .{self.fmtTracking()}); + if (cg.liveness.isUnused(inst) and !cg.air.mustLower(inst, ip)) continue; + wip_mir_log.debug("{}", .{cg.fmtAir(inst)}); + verbose_tracking_log.debug("{}", .{cg.fmtTracking()}); - self.reused_operands = .initEmpty(); - try self.inst_tracking.ensureUnusedCapacity(self.gpa, 1); + cg.reused_operands = .initEmpty(); + try cg.inst_tracking.ensureUnusedCapacity(cg.gpa, 1); switch (air_tags[@intFromEnum(inst)]) { // zig fmt: off .not, - => |tag| try self.airUnOp(inst, tag), + => |air_tag| try cg.airUnOp(inst, air_tag), .add, .add_wrap, @@ -2306,22 +2353,22 @@ fn genBody(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .bool_or, .min, .max, - => |tag| try self.airBinOp(inst, tag), + => |air_tag| try cg.airBinOp(inst, air_tag), - .ptr_add, .ptr_sub => |tag| try self.airPtrArithmetic(inst, tag), + .ptr_add, .ptr_sub => |air_tag| try cg.airPtrArithmetic(inst, air_tag), - .shr, .shr_exact => try self.airShlShrBinOp(inst), - .shl, .shl_exact => try self.airShlShrBinOp(inst), + .shr, .shr_exact => try cg.airShlShrBinOp(inst), + .shl, .shl_exact => try cg.airShlShrBinOp(inst), - .mul => try self.airMulDivBinOp(inst), - .mul_wrap => try self.airMulDivBinOp(inst), - .rem => try self.airMulDivBinOp(inst), - .mod => try self.airMulDivBinOp(inst), + .mul => try cg.airMulDivBinOp(inst), + .mul_wrap => try cg.airMulDivBinOp(inst), + .rem => try cg.airMulDivBinOp(inst), + .mod => try cg.airMulDivBinOp(inst), - .add_sat => try self.airAddSat(inst), - .sub_sat => try self.airSubSat(inst), - .mul_sat => try self.airMulSat(inst), - .shl_sat => try self.airShlSat(inst), + .add_sat => try cg.airAddSat(inst), + .sub_sat => try cg.airSubSat(inst), + .mul_sat => try cg.airMulSat(inst), + .shl_sat => try cg.airShlSat(inst), .sin, .cos, @@ -2332,98 +2379,97 @@ fn genBody(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .log2, .log10, .round, - => |tag| try self.airUnaryMath(inst, tag), + => |air_tag| try cg.airUnaryMath(inst, air_tag), - .floor => try self.airRound(inst, .{ .mode = .down, .precision = .inexact }), - .ceil => try self.airRound(inst, .{ .mode = .up, .precision = .inexact }), - .trunc_float => try self.airRound(inst, .{ .mode = .zero, .precision = .inexact }), - .sqrt => try self.airSqrt(inst), - .neg => try self.airFloatSign(inst), + .floor => try cg.airRound(inst, .{ .mode = .down, .precision = .inexact }), + .ceil => try cg.airRound(inst, .{ .mode = .up, .precision = .inexact }), + .trunc_float => try cg.airRound(inst, .{ .mode = .zero, .precision = .inexact }), + .sqrt => try cg.airSqrt(inst), + .neg => try cg.airFloatSign(inst), - .abs => try self.airAbs(inst), + .abs => try cg.airAbs(inst), - .add_with_overflow => try self.airAddSubWithOverflow(inst), - .sub_with_overflow => try self.airAddSubWithOverflow(inst), - .mul_with_overflow => try self.airMulWithOverflow(inst), - .shl_with_overflow => try self.airShlWithOverflow(inst), + .add_with_overflow => try cg.airAddSubWithOverflow(inst), + .sub_with_overflow => try cg.airAddSubWithOverflow(inst), + .mul_with_overflow => try cg.airMulWithOverflow(inst), + .shl_with_overflow => try cg.airShlWithOverflow(inst), - .div_float, .div_trunc, .div_floor, .div_exact => try self.airMulDivBinOp(inst), + .div_float, .div_trunc, .div_floor, .div_exact => try cg.airMulDivBinOp(inst), - .cmp_lt => try self.airCmp(inst, .lt), - .cmp_lte => try self.airCmp(inst, .lte), - .cmp_eq => try self.airCmp(inst, .eq), - .cmp_gte => try self.airCmp(inst, .gte), - .cmp_gt => try self.airCmp(inst, .gt), - .cmp_neq => try self.airCmp(inst, .neq), + .cmp_lt => try cg.airCmp(inst, .lt), + .cmp_lte => try cg.airCmp(inst, .lte), + .cmp_eq => try cg.airCmp(inst, .eq), + .cmp_gte => try cg.airCmp(inst, .gte), + .cmp_gt => try cg.airCmp(inst, .gt), + .cmp_neq => try cg.airCmp(inst, .neq), - .cmp_vector => try self.airCmpVector(inst), - .cmp_lt_errors_len => try self.airCmpLtErrorsLen(inst), + .cmp_lt_errors_len => try cg.airCmpLtErrorsLen(inst), - .bitcast => try self.airBitCast(inst), - .fptrunc => try self.airFptrunc(inst), - .fpext => try self.airFpext(inst), - .intcast => try self.airIntCast(inst), - .trunc => try self.airTrunc(inst), - .is_non_null => try self.airIsNonNull(inst), - .is_null => try self.airIsNull(inst), - .is_non_err => try self.airIsNonErr(inst), - .is_err => try self.airIsErr(inst), - .load => try self.airLoad(inst), - .store => try self.airStore(inst, false), - .store_safe => try self.airStore(inst, true), - .struct_field_val => try self.airStructFieldVal(inst), - .float_from_int => try self.airFloatFromInt(inst), - .int_from_float => try self.airIntFromFloat(inst), - .cmpxchg_strong => try self.airCmpxchg(inst), - .cmpxchg_weak => try self.airCmpxchg(inst), - .atomic_rmw => try self.airAtomicRmw(inst), - .atomic_load => try self.airAtomicLoad(inst), - .memcpy => try self.airMemcpy(inst), - .memset => try self.airMemset(inst, false), - .memset_safe => try self.airMemset(inst, true), - .set_union_tag => try self.airSetUnionTag(inst), - .get_union_tag => try self.airGetUnionTag(inst), - .clz => try self.airClz(inst), - .ctz => try self.airCtz(inst), - .popcount => try self.airPopCount(inst), - .byte_swap => try self.airByteSwap(inst), - .bit_reverse => try self.airBitReverse(inst), - .tag_name => try self.airTagName(inst), - .error_name => try self.airErrorName(inst), - .splat => try self.airSplat(inst), - .select => try self.airSelect(inst), - .shuffle => try self.airShuffle(inst), - .reduce => try self.airReduce(inst), - .aggregate_init => try self.airAggregateInit(inst), - .union_init => try self.airUnionInit(inst), - .prefetch => try self.airPrefetch(inst), - .mul_add => try self.airMulAdd(inst), + .bitcast => try cg.airBitCast(inst), + .fptrunc => try cg.airFptrunc(inst), + .fpext => try cg.airFpext(inst), + .intcast => try cg.airIntCast(inst), + .trunc => try cg.airTrunc(inst), + .is_non_null => try cg.airIsNonNull(inst), + .is_null => try cg.airIsNull(inst), + .is_non_err => try cg.airIsNonErr(inst), + .is_err => try cg.airIsErr(inst), + .load => try cg.airLoad(inst), + .store => try cg.airStore(inst, false), + .store_safe => try cg.airStore(inst, true), + .struct_field_val => try cg.airStructFieldVal(inst), + .float_from_int => try cg.airFloatFromInt(inst), + .int_from_float => try cg.airIntFromFloat(inst), + .cmpxchg_strong => try cg.airCmpxchg(inst), + .cmpxchg_weak => try cg.airCmpxchg(inst), + .atomic_rmw => try cg.airAtomicRmw(inst), + .atomic_load => try cg.airAtomicLoad(inst), + .memcpy => try cg.airMemcpy(inst), + .memset => try cg.airMemset(inst, false), + .memset_safe => try cg.airMemset(inst, true), + .set_union_tag => try cg.airSetUnionTag(inst), + .get_union_tag => try cg.airGetUnionTag(inst), + .clz => try cg.airClz(inst), + .ctz => try cg.airCtz(inst), + .popcount => try cg.airPopCount(inst), + .byte_swap => try cg.airByteSwap(inst), + .bit_reverse => try cg.airBitReverse(inst), + .tag_name => try cg.airTagName(inst), + .error_name => try cg.airErrorName(inst), + .splat => try cg.airSplat(inst), + .select => try cg.airSelect(inst), + .shuffle => try cg.airShuffle(inst), + .reduce => try cg.airReduce(inst), + .aggregate_init => try cg.airAggregateInit(inst), + .union_init => try cg.airUnionInit(inst), + .prefetch => try cg.airPrefetch(inst), + .mul_add => try cg.airMulAdd(inst), - .atomic_store_unordered => try self.airAtomicStore(inst, .unordered), - .atomic_store_monotonic => try self.airAtomicStore(inst, .monotonic), - .atomic_store_release => try self.airAtomicStore(inst, .release), - .atomic_store_seq_cst => try self.airAtomicStore(inst, .seq_cst), + .atomic_store_unordered => try cg.airAtomicStore(inst, .unordered), + .atomic_store_monotonic => try cg.airAtomicStore(inst, .monotonic), + .atomic_store_release => try cg.airAtomicStore(inst, .release), + .atomic_store_seq_cst => try cg.airAtomicStore(inst, .seq_cst), - .array_elem_val => try self.airArrayElemVal(inst), - .slice_elem_val => try self.airSliceElemVal(inst), - .ptr_elem_val => try self.airPtrElemVal(inst), + .array_elem_val => try cg.airArrayElemVal(inst), + .slice_elem_val => try cg.airSliceElemVal(inst), + .ptr_elem_val => try cg.airPtrElemVal(inst), - .optional_payload => try self.airOptionalPayload(inst), - .unwrap_errunion_err => try self.airUnwrapErrUnionErr(inst), - .unwrap_errunion_payload => try self.airUnwrapErrUnionPayload(inst), - .err_return_trace => try self.airErrReturnTrace(inst), - .set_err_return_trace => try self.airSetErrReturnTrace(inst), - .save_err_return_trace_index=> try self.airSaveErrReturnTraceIndex(inst), + .optional_payload => try cg.airOptionalPayload(inst), + .unwrap_errunion_err => try cg.airUnwrapErrUnionErr(inst), + .unwrap_errunion_payload => try cg.airUnwrapErrUnionPayload(inst), + .err_return_trace => try cg.airErrReturnTrace(inst), + .set_err_return_trace => try cg.airSetErrReturnTrace(inst), + .save_err_return_trace_index=> try cg.airSaveErrReturnTraceIndex(inst), - .wrap_optional => try self.airWrapOptional(inst), - .wrap_errunion_payload => try self.airWrapErrUnionPayload(inst), - .wrap_errunion_err => try self.airWrapErrUnionErr(inst), + .wrap_optional => try cg.airWrapOptional(inst), + .wrap_errunion_payload => try cg.airWrapErrUnionPayload(inst), + .wrap_errunion_err => try cg.airWrapErrUnionErr(inst), // zig fmt: on .add_safe, .sub_safe, .mul_safe, - => return self.fail("TODO implement safety_checked_instructions", .{}), + => return cg.fail("TODO implement safety_checked_instructions", .{}), .add_optimized, .sub_optimized, .mul_optimized, @@ -2440,214 +2486,612 @@ fn genBody(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .cmp_gte_optimized, .cmp_gt_optimized, .cmp_neq_optimized, - .cmp_vector_optimized, .reduce_optimized, .int_from_float_optimized, - => return self.fail("TODO implement optimized float mode", .{}), + => return cg.fail("TODO implement optimized float mode", .{}), - .arg => try self.airDbgArg(inst), - .alloc => if (use_old) try self.airAlloc(inst) else { - var slot = try self.tempFromValue(self.typeOfIndex(inst), .{ .lea_frame = .{ - .index = try self.allocMemPtr(inst), + .arg => try cg.airDbgArg(inst), + .alloc => if (use_old) try cg.airAlloc(inst) else { + var slot = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .lea_frame = .{ + .index = try cg.allocMemPtr(inst), } }); - try slot.moveTo(inst, self); + try slot.moveTo(inst, cg); }, .inferred_alloc => unreachable, .inferred_alloc_comptime => unreachable, - .ret_ptr => if (use_old) try self.airRetPtr(inst) else { - var slot = switch (self.ret_mcv.long) { + .ret_ptr => if (use_old) try cg.airRetPtr(inst) else { + var slot = switch (cg.ret_mcv.long) { else => unreachable, - .none => try self.tempFromValue(self.typeOfIndex(inst), .{ .lea_frame = .{ - .index = try self.allocMemPtr(inst), + .none => try cg.tempFromValue(cg.typeOfIndex(inst), .{ .lea_frame = .{ + .index = try cg.allocMemPtr(inst), } }), .load_frame => slot: { - var slot = try self.tempFromValue(self.typeOfIndex(inst), self.ret_mcv.long); - try slot.toOffset(self.ret_mcv.short.indirect.off, self); + var slot = try cg.tempFromValue(cg.typeOfIndex(inst), cg.ret_mcv.long); + try slot.toOffset(cg.ret_mcv.short.indirect.off, cg); break :slot slot; }, }; - try slot.moveTo(inst, self); + try slot.moveTo(inst, cg); }, - .assembly => try self.airAsm(inst), - inline .bit_and, .bit_or, .xor => |tag| if (use_old) try self.airBinOp(inst, tag) else { + .assembly => try cg.airAsm(inst), + .bit_and, .bit_or, .xor => |air_tag| if (use_old) try cg.airBinOp(inst, air_tag) else { const bin_op = air_datas[@intFromEnum(inst)].bin_op; - var ops = try self.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); - try self.spillEflagsIfOccupied(); - const mir_tag: Mir.Inst.Tag = switch (tag) { - else => unreachable, - .bit_and => .@"and", - .bit_or => .@"or", - .xor => .xor, - }; + var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); var res: [1]Temp = undefined; - try self.select(&res, &.{ &ops[0], &ops[1] }, &.{ - .{ - .required_features = &.{.avx2}, - .mir_tag = .{ .vp_, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .ymm, .ymm, .mem } }, - .{ .ops = &.{ .ymm, .mem, .ymm }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .ymm, .ymm, .ymm } }, - }, - }, - .{ - .required_features = &.{.avx}, - .mir_tag = .{ .vp_, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .xmm, .xmm, .mem } }, - .{ .ops = &.{ .xmm, .mem, .xmm }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .xmm, .xmm, .xmm } }, - }, - }, - .{ - .required_features = &.{.sse2}, - .mir_tag = .{ .p_, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .xmm, .{ .implicit = 0 }, .mem } }, - .{ .ops = &.{ .xmm, .mem, .{ .implicit = 0 } } }, - .{ .ops = &.{ .xmm, .{ .implicit = 0 }, .xmm } }, - .{ .ops = &.{ .xmm, .xmm, .{ .implicit = 0 } } }, - }, - }, - .{ - .required_features = &.{.mmx}, - .mir_tag = .{ .p_, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .mm, .{ .implicit = 0 }, .mem } }, - .{ .ops = &.{ .mm, .mem, .{ .implicit = 0 } } }, - .{ .ops = &.{ .mm, .{ .implicit = 0 }, .mm } }, - .{ .ops = &.{ .mm, .mm, .{ .implicit = 0 } } }, - }, - }, - .{ - .mir_tag = .{ ._, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .mem, .{ .implicit = 0 }, .simm32 } }, - .{ .ops = &.{ .mem, .simm32, .{ .implicit = 0 } } }, - .{ .ops = &.{ .mem, .{ .implicit = 0 }, .gpr } }, - .{ .ops = &.{ .mem, .gpr, .{ .implicit = 0 } } }, - .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .simm32 } }, - .{ .ops = &.{ .gpr, .simm32, .{ .implicit = 0 } } }, - .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .mem } }, - .{ .ops = &.{ .gpr, .mem, .{ .implicit = 0 } } }, - .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .gpr } }, - .{ .ops = &.{ .gpr, .gpr, .{ .implicit = 0 } } }, - }, - }, + try cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, pattern_sets: switch (air_tag) { + else => unreachable, + inline .bit_and, .bit_or, .xor => |ct_air_tag| { + const mir_tag: Mir.Inst.Tag = switch (ct_air_tag) { + else => unreachable, + .bit_and => .@"and", + .bit_or => .@"or", + .xor => .xor, + }; + break :pattern_sets &.{ + .{ + .required_features = &.{.avx2}, + .mir_tag = .{ .vp_, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .ymm, .ymm, .mem } }, + .{ .ops = &.{ .ymm, .mem, .ymm }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .ymm, .ymm, .ymm } }, + }, + }, + .{ + .required_features = &.{.avx}, + .mir_tag = .{ .vp_, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .xmm, .xmm, .mem } }, + .{ .ops = &.{ .xmm, .mem, .xmm }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .xmm, .xmm, .xmm } }, + }, + }, + .{ + .required_features = &.{.sse2}, + .mir_tag = .{ .p_, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .xmm, .{ .implicit = 0 }, .mem } }, + .{ .ops = &.{ .xmm, .mem, .{ .implicit = 0 } } }, + .{ .ops = &.{ .xmm, .{ .implicit = 0 }, .xmm } }, + .{ .ops = &.{ .xmm, .xmm, .{ .implicit = 0 } } }, + }, + }, + .{ + .required_features = &.{.mmx}, + .mir_tag = .{ .p_, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .mm, .{ .implicit = 0 }, .mem } }, + .{ .ops = &.{ .mm, .mem, .{ .implicit = 0 } } }, + .{ .ops = &.{ .mm, .{ .implicit = 0 }, .mm } }, + .{ .ops = &.{ .mm, .mm, .{ .implicit = 0 } } }, + }, + }, + .{ + .clobbers = .{ .eflags = true }, + .mir_tag = .{ ._, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .mem, .{ .implicit = 0 }, .simm32 } }, + .{ .ops = &.{ .mem, .simm32, .{ .implicit = 0 } } }, + .{ .ops = &.{ .mem, .{ .implicit = 0 }, .gpr } }, + .{ .ops = &.{ .mem, .gpr, .{ .implicit = 0 } } }, + .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .simm32 } }, + .{ .ops = &.{ .gpr, .simm32, .{ .implicit = 0 } } }, + .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .mem } }, + .{ .ops = &.{ .gpr, .mem, .{ .implicit = 0 } } }, + .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .gpr } }, + .{ .ops = &.{ .gpr, .gpr, .{ .implicit = 0 } } }, + }, + }, - .{ - .required_features = &.{.avx2}, - .loop = .bitwise, - .mir_tag = .{ .vp_, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .ymm_limb, .{ .explicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .ymm_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .ymm_limb, .ymm_limb, .mem_limb } }, - }, + .{ + .required_features = &.{.avx2}, + .loop = .bitwise, + .mir_tag = .{ .vp_, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .ymm_limb, .{ .explicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .ymm_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .ymm_limb, .ymm_limb, .mem_limb } }, + .{ .ops = &.{ .ymm_limb, .ymm_limb, .ymm_limb } }, + }, + }, + .{ + .required_features = &.{.avx}, + .loop = .bitwise, + .mir_tag = .{ .vp_, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .xmm_limb, .{ .explicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .xmm_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .xmm_limb, .xmm_limb, .mem_limb } }, + .{ .ops = &.{ .xmm_limb, .xmm_limb, .xmm_limb } }, + }, + }, + .{ + .required_features = &.{.sse2}, + .loop = .bitwise, + .mir_tag = .{ .p_, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .xmm_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .xmm_limb, .mem_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .xmm_limb, .{ .implicit = 0 }, .xmm_limb } }, + }, + }, + .{ + .required_features = &.{.mmx}, + .loop = .bitwise, + .mir_tag = .{ .p_, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .mm_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .mm_limb, .mem_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .mm_limb, .{ .implicit = 0 }, .mm_limb } }, + }, + }, + .{ + .clobbers = .{ .eflags = true }, + .loop = .bitwise, + .mir_tag = .{ ._, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .mem_limb, .{ .implicit = 0 }, .gpr_limb } }, + .{ .ops = &.{ .mem_limb, .gpr_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .gpr_limb, .mem_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .gpr_limb } }, + }, + }, + }; }, - .{ - .required_features = &.{.avx}, - .loop = .bitwise, - .mir_tag = .{ .vp_, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .xmm_limb, .{ .explicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .xmm_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .xmm_limb, .xmm_limb, .mem_limb } }, - }, - }, - .{ - .required_features = &.{.sse2}, - .loop = .bitwise, - .mir_tag = .{ .p_, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .xmm_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .xmm_limb, .mem_limb, .{ .implicit = 0 } } }, - }, - }, - .{ - .required_features = &.{.mmx}, - .loop = .bitwise, - .mir_tag = .{ .p_, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .mm_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .mm_limb, .mem_limb, .{ .implicit = 0 } } }, - }, - }, - .{ - .loop = .bitwise, - .mir_tag = .{ ._, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .mem_limb, .{ .implicit = 0 }, .gpr_limb } }, - .{ .ops = &.{ .mem_limb, .gpr_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .gpr_limb, .mem_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .gpr_limb } }, - }, - }, - }); - if (ops[0].index != res[0].index) try ops[0].die(self); - if (ops[1].index != res[0].index) try ops[1].die(self); - try res[0].moveTo(inst, self); + }, .{}); + if (ops[0].index != res[0].index) try ops[0].die(cg); + if (ops[1].index != res[0].index) try ops[1].die(cg); + try res[0].moveTo(inst, cg); }, - .block => if (use_old) try self.airBlock(inst) else { + .block => if (use_old) try cg.airBlock(inst) else { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; - const extra = self.air.extraData(Air.Block, ty_pl.payload); - try self.asmPseudo(.pseudo_dbg_enter_block_none); - try self.lowerBlock(inst, @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len])); - try self.asmPseudo(.pseudo_dbg_leave_block_none); + const extra = cg.air.extraData(Air.Block, ty_pl.payload); + try cg.asmPseudo(.pseudo_dbg_enter_block_none); + try cg.lowerBlock(inst, @ptrCast(cg.air.extra[extra.end..][0..extra.data.body_len])); + try cg.asmPseudo(.pseudo_dbg_leave_block_none); }, - .loop => if (use_old) try self.airLoop(inst) else { + .loop => if (use_old) try cg.airLoop(inst) else { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; - const extra = self.air.extraData(Air.Block, ty_pl.payload); - self.scope_generation += 1; - try self.loops.putNoClobber(self.gpa, inst, .{ - .state = try self.saveState(), - .target = @intCast(self.mir_instructions.len), + const extra = cg.air.extraData(Air.Block, ty_pl.payload); + cg.scope_generation += 1; + try cg.loops.putNoClobber(cg.gpa, inst, .{ + .state = try cg.saveState(), + .target = @intCast(cg.mir_instructions.len), }); - defer assert(self.loops.remove(inst)); - try self.genBodyBlock(@ptrCast(self.air.extra[extra.end..][0..extra.data.body_len])); + defer assert(cg.loops.remove(inst)); + try cg.genBodyBlock(@ptrCast(cg.air.extra[extra.end..][0..extra.data.body_len])); }, - .repeat => if (use_old) try self.airRepeat(inst) else { + .repeat => if (use_old) try cg.airRepeat(inst) else { const repeat = air_datas[@intFromEnum(inst)].repeat; - const loop = self.loops.get(repeat.loop_inst).?; - try self.restoreState(loop.state, &.{}, .{ + const loop = cg.loops.get(repeat.loop_inst).?; + try cg.restoreState(loop.state, &.{}, .{ .emit_instructions = true, .update_tracking = false, .resurrect = false, .close_scope = true, }); - _ = try self.asmJmpReloc(loop.target); + _ = try cg.asmJmpReloc(loop.target); }, - .br => try self.airBr(inst), - .trap => try self.asmOpOnly(.{ ._, .ud2 }), - .breakpoint => try self.asmOpOnly(.{ ._, .int3 }), - .ret_addr => if (use_old) try self.airRetAddr(inst) else { - var slot = try self.tempFromValue(self.typeOfIndex(inst), .{ .load_frame = .{ + .br => try cg.airBr(inst), + .trap => try cg.asmOpOnly(.{ ._, .ud2 }), + .breakpoint => try cg.asmOpOnly(.{ ._, .int3 }), + .ret_addr => if (use_old) try cg.airRetAddr(inst) else { + var slot = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .load_frame = .{ .index = .ret_addr, } }); - while (try slot.toAnyReg(self)) {} - try slot.moveTo(inst, self); + while (try slot.toAnyReg(cg)) {} + try slot.moveTo(inst, cg); }, - .frame_addr => if (use_old) try self.airFrameAddress(inst) else { - var slot = try self.tempFromValue(self.typeOfIndex(inst), .{ .lea_frame = .{ + .frame_addr => if (use_old) try cg.airFrameAddress(inst) else { + var slot = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .lea_frame = .{ .index = .base_ptr, } }); - try slot.moveTo(inst, self); + try slot.moveTo(inst, cg); }, - .call => try self.airCall(inst, .auto), - .call_always_tail => try self.airCall(inst, .always_tail), - .call_never_tail => try self.airCall(inst, .never_tail), - .call_never_inline => try self.airCall(inst, .never_inline), + .call => try cg.airCall(inst, .auto), + .call_always_tail => try cg.airCall(inst, .always_tail), + .call_never_tail => try cg.airCall(inst, .never_tail), + .call_never_inline => try cg.airCall(inst, .never_inline), - .cond_br => try self.airCondBr(inst), - .switch_br => try self.airSwitchBr(inst), - .loop_switch_br => try self.airLoopSwitchBr(inst), - .switch_dispatch => try self.airSwitchDispatch(inst), - .@"try", .try_cold => try self.airTry(inst), - .try_ptr, .try_ptr_cold => try self.airTryPtr(inst), - .dbg_stmt => if (use_old) try self.airDbgStmt(inst) else { + .cmp_vector, .cmp_vector_optimized => if (use_old) try cg.airCmpVector(inst) else fallback: { + const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; + const extra = cg.air.extraData(Air.VectorCmp, ty_pl.payload).data; + switch (extra.compareOperator()) { + .eq, .neq => {}, + else => break :fallback try cg.airCmpVector(inst), + } + var ops = try cg.tempsFromOperands(inst, .{ extra.lhs, extra.rhs }); + var res: [1]Temp = undefined; + switch (extra.compareOperator()) { + .lt => unreachable, + .lte => unreachable, + .eq, .neq => |cmp_op| try cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, &.{ + .{ + .required_features = &.{.avx2}, + .scalar = .{ .any_int = .byte }, + .mir_tag = .{ .vp_b, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .ymm_mask, .ymm, .mem } }, + .{ .ops = &.{ .ymm_mask, .mem, .ymm }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .ymm_mask, .ymm, .ymm } }, + }, + }, + .{ + .required_features = &.{.avx2}, + .scalar = .{ .any_int = .word }, + .mir_tag = .{ .vp_w, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .ymm_mask, .ymm, .mem } }, + .{ .ops = &.{ .ymm_mask, .mem, .ymm }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .ymm_mask, .ymm, .ymm } }, + }, + }, + .{ + .required_features = &.{.avx2}, + .scalar = .{ .any_int = .dword }, + .mir_tag = .{ .vp_d, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .ymm_mask, .ymm, .mem } }, + .{ .ops = &.{ .ymm_mask, .mem, .ymm }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .ymm_mask, .ymm, .ymm } }, + }, + }, + .{ + .required_features = &.{.avx2}, + .scalar = .{ .any_int = .qword }, + .mir_tag = .{ .vp_q, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .ymm_mask, .ymm, .mem } }, + .{ .ops = &.{ .ymm_mask, .mem, .ymm }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .ymm_mask, .ymm, .ymm } }, + }, + }, + .{ + .required_features = &.{.avx}, + .scalar = .{ .any_int = .byte }, + .mir_tag = .{ .vp_b, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask, .xmm, .mem } }, + .{ .ops = &.{ .xmm_mask, .mem, .xmm }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .xmm_mask, .xmm, .xmm } }, + }, + }, + .{ + .required_features = &.{.avx}, + .scalar = .{ .any_int = .word }, + .mir_tag = .{ .vp_w, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask, .xmm, .mem } }, + .{ .ops = &.{ .xmm_mask, .mem, .xmm }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .xmm_mask, .xmm, .xmm } }, + }, + }, + .{ + .required_features = &.{.avx}, + .scalar = .{ .any_int = .dword }, + .mir_tag = .{ .vp_d, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask, .xmm, .mem } }, + .{ .ops = &.{ .xmm_mask, .mem, .xmm }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .xmm_mask, .xmm, .xmm } }, + }, + }, + .{ + .required_features = &.{.avx}, + .scalar = .{ .any_int = .qword }, + .mir_tag = .{ .vp_q, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask, .xmm, .mem } }, + .{ .ops = &.{ .xmm_mask, .mem, .xmm }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .xmm_mask, .xmm, .xmm } }, + }, + }, + .{ + .required_features = &.{.sse2}, + .scalar = .{ .any_int = .byte }, + .mir_tag = .{ .p_b, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .mem } }, + .{ .ops = &.{ .xmm_mask, .mem, .{ .implicit = 0 } } }, + .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .xmm } }, + .{ .ops = &.{ .xmm_mask, .xmm, .{ .implicit = 0 } } }, + }, + }, + .{ + .required_features = &.{.sse2}, + .scalar = .{ .any_int = .word }, + .mir_tag = .{ .p_w, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .mem } }, + .{ .ops = &.{ .xmm_mask, .mem, .{ .implicit = 0 } } }, + .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .xmm } }, + .{ .ops = &.{ .xmm_mask, .xmm, .{ .implicit = 0 } } }, + }, + }, + .{ + .required_features = &.{.sse2}, + .scalar = .{ .any_int = .dword }, + .mir_tag = .{ .p_d, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .mem } }, + .{ .ops = &.{ .xmm_mask, .mem, .{ .implicit = 0 } } }, + .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .xmm } }, + .{ .ops = &.{ .xmm_mask, .xmm, .{ .implicit = 0 } } }, + }, + }, + .{ + .required_features = &.{.sse4_1}, + .scalar = .{ .any_int = .qword }, + .mir_tag = .{ .p_q, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .mem } }, + .{ .ops = &.{ .xmm_mask, .mem, .{ .implicit = 0 } } }, + .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .xmm } }, + .{ .ops = &.{ .xmm_mask, .xmm, .{ .implicit = 0 } } }, + }, + }, + .{ + .required_features = &.{.mmx}, + .scalar = .{ .any_int = .byte }, + .mir_tag = .{ .p_b, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .mm_mask, .{ .implicit = 0 }, .mem } }, + .{ .ops = &.{ .mm_mask, .mem, .{ .implicit = 0 } } }, + .{ .ops = &.{ .mm_mask, .{ .implicit = 0 }, .mm } }, + .{ .ops = &.{ .mm_mask, .mm, .{ .implicit = 0 } } }, + }, + }, + .{ + .required_features = &.{.mmx}, + .scalar = .{ .any_int = .word }, + .mir_tag = .{ .p_w, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .mm_mask, .{ .implicit = 0 }, .mem } }, + .{ .ops = &.{ .mm_mask, .mem, .{ .implicit = 0 } } }, + .{ .ops = &.{ .mm_mask, .{ .implicit = 0 }, .mm } }, + .{ .ops = &.{ .mm_mask, .mm, .{ .implicit = 0 } } }, + }, + }, + .{ + .required_features = &.{.mmx}, + .scalar = .{ .any_int = .dword }, + .mir_tag = .{ .p_d, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .mm_mask, .{ .implicit = 0 }, .mem } }, + .{ .ops = &.{ .mm_mask, .mem, .{ .implicit = 0 } } }, + .{ .ops = &.{ .mm_mask, .{ .implicit = 0 }, .mm } }, + .{ .ops = &.{ .mm_mask, .mm, .{ .implicit = 0 } } }, + }, + }, + .{ + .scalar = .bool, + .clobbers = .{ .eflags = true }, + .invert_result = true, + .mir_tag = .{ ._, .xor }, + .patterns = &.{ + .{ .ops = &.{ .mem, .{ .implicit = 0 }, .simm32 } }, + .{ .ops = &.{ .mem, .simm32, .{ .implicit = 0 } } }, + .{ .ops = &.{ .mem, .{ .implicit = 0 }, .gpr } }, + .{ .ops = &.{ .mem, .gpr, .{ .implicit = 0 } } }, + .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .simm32 } }, + .{ .ops = &.{ .gpr, .simm32, .{ .implicit = 0 } } }, + .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .mem } }, + .{ .ops = &.{ .gpr, .mem, .{ .implicit = 0 } } }, + .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .gpr } }, + .{ .ops = &.{ .gpr, .gpr, .{ .implicit = 0 } } }, + }, + }, + + .{ + .required_features = &.{.avx2}, + .scalar = .{ .any_int = .byte }, + .loop = .elementwise, + .mir_tag = .{ .vp_b, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } }, + .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } }, + }, + }, + .{ + .required_features = &.{.avx2}, + .scalar = .{ .any_int = .word }, + .loop = .elementwise, + .mir_tag = .{ .vp_w, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } }, + .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } }, + }, + }, + .{ + .required_features = &.{.avx2}, + .scalar = .{ .any_int = .dword }, + .loop = .elementwise, + .mir_tag = .{ .vp_d, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } }, + .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } }, + }, + }, + .{ + .required_features = &.{.avx2}, + .scalar = .{ .any_int = .qword }, + .loop = .elementwise, + .mir_tag = .{ .vp_q, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } }, + .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } }, + }, + }, + .{ + .required_features = &.{.avx}, + .scalar = .{ .any_int = .byte }, + .loop = .elementwise, + .mir_tag = .{ .vp_b, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } }, + }, + }, + .{ + .required_features = &.{.avx}, + .scalar = .{ .any_int = .word }, + .loop = .elementwise, + .mir_tag = .{ .vp_w, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } }, + }, + }, + .{ + .required_features = &.{.avx}, + .scalar = .{ .any_int = .dword }, + .loop = .elementwise, + .mir_tag = .{ .vp_d, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } }, + }, + }, + .{ + .required_features = &.{.avx}, + .scalar = .{ .any_int = .qword }, + .loop = .elementwise, + .mir_tag = .{ .vp_q, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } }, + }, + }, + .{ + .required_features = &.{.sse2}, + .scalar = .{ .any_int = .byte }, + .loop = .elementwise, + .mir_tag = .{ .p_b, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } }, + }, + }, + .{ + .required_features = &.{.sse2}, + .scalar = .{ .any_int = .word }, + .loop = .elementwise, + .mir_tag = .{ .p_w, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } }, + }, + }, + .{ + .required_features = &.{.sse2}, + .scalar = .{ .any_int = .dword }, + .loop = .elementwise, + .mir_tag = .{ .p_d, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } }, + }, + }, + .{ + .required_features = &.{.sse4_1}, + .scalar = .{ .any_int = .qword }, + .loop = .elementwise, + .mir_tag = .{ .p_q, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } }, + }, + }, + .{ + .required_features = &.{.mmx}, + .scalar = .{ .any_int = .byte }, + .loop = .elementwise, + .mir_tag = .{ .p_b, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .mm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mm_limb } }, + }, + }, + .{ + .required_features = &.{.mmx}, + .scalar = .{ .any_int = .word }, + .loop = .elementwise, + .mir_tag = .{ .p_w, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .mm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mm_limb } }, + }, + }, + .{ + .required_features = &.{.mmx}, + .scalar = .{ .any_int = .dword }, + .loop = .elementwise, + .mir_tag = .{ .p_d, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .mm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mm_limb } }, + }, + }, + .{ + .scalar = .bool, + .clobbers = .{ .eflags = true }, + .invert_result = true, + .loop = .elementwise, + .mir_tag = .{ ._, .xor }, + .patterns = &.{ + .{ .ops = &.{ .mem_limb, .{ .implicit = 0 }, .gpr_limb } }, + .{ .ops = &.{ .mem_limb, .gpr_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .gpr_limb, .mem_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .gpr_limb } }, + }, + }, + }, .{ .invert_result = switch (cmp_op) { + .eq => false, + .neq => true, + else => unreachable, + } }), + .gte => unreachable, + .gt => unreachable, + } + if (ops[0].index != res[0].index) try ops[0].die(cg); + if (ops[1].index != res[0].index) try ops[1].die(cg); + try res[0].moveTo(inst, cg); + }, + + .cond_br => try cg.airCondBr(inst), + .switch_br => try cg.airSwitchBr(inst), + .loop_switch_br => try cg.airLoopSwitchBr(inst), + .switch_dispatch => try cg.airSwitchDispatch(inst), + .@"try", .try_cold => try cg.airTry(inst), + .try_ptr, .try_ptr_cold => try cg.airTryPtr(inst), + .dbg_stmt => if (use_old) try cg.airDbgStmt(inst) else { const dbg_stmt = air_datas[@intFromEnum(inst)].dbg_stmt; - _ = try self.addInst(.{ + _ = try cg.addInst(.{ .tag = .pseudo, .ops = .pseudo_dbg_line_line_column, .data = .{ .line_column = .{ @@ -2656,50 +3100,50 @@ fn genBody(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }); }, - .dbg_empty_stmt => if (use_old) try self.airDbgEmptyStmt() else { - if (self.mir_instructions.len > 0) { - const prev_mir_op = &self.mir_instructions.items(.ops)[self.mir_instructions.len - 1]; + .dbg_empty_stmt => if (use_old) try cg.airDbgEmptyStmt() else { + if (cg.mir_instructions.len > 0) { + const prev_mir_op = &cg.mir_instructions.items(.ops)[cg.mir_instructions.len - 1]; if (prev_mir_op.* == .pseudo_dbg_line_stmt_line_column) prev_mir_op.* = .pseudo_dbg_line_line_column; } - try self.asmOpOnly(.{ ._, .nop }); + try cg.asmOpOnly(.{ ._, .nop }); }, - .dbg_inline_block => if (use_old) try self.airDbgInlineBlock(inst) else { + .dbg_inline_block => if (use_old) try cg.airDbgInlineBlock(inst) else { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; - const extra = self.air.extraData(Air.DbgInlineBlock, ty_pl.payload); - const old_inline_func = self.inline_func; - defer self.inline_func = old_inline_func; - self.inline_func = extra.data.func; - _ = try self.addInst(.{ + const extra = cg.air.extraData(Air.DbgInlineBlock, ty_pl.payload); + const old_inline_func = cg.inline_func; + defer cg.inline_func = old_inline_func; + cg.inline_func = extra.data.func; + _ = try cg.addInst(.{ .tag = .pseudo, .ops = .pseudo_dbg_enter_inline_func, .data = .{ .func = extra.data.func }, }); - try self.lowerBlock(inst, @ptrCast(self.air.extra[extra.end..][0..extra.data.body_len])); - _ = try self.addInst(.{ + try cg.lowerBlock(inst, @ptrCast(cg.air.extra[extra.end..][0..extra.data.body_len])); + _ = try cg.addInst(.{ .tag = .pseudo, .ops = .pseudo_dbg_leave_inline_func, .data = .{ .func = old_inline_func }, }); }, - .dbg_var_ptr, .dbg_var_val, .dbg_arg_inline => if (use_old) try self.airDbgVar(inst) else { + .dbg_var_ptr, .dbg_var_val, .dbg_arg_inline => if (use_old) try cg.airDbgVar(inst) else { const pl_op = air_datas[@intFromEnum(inst)].pl_op; - var ops = try self.tempsFromOperands(inst, .{pl_op.operand}); - try self.genLocalDebugInfo(inst, ops[0].tracking(self).short); - try ops[0].die(self); + var ops = try cg.tempsFromOperands(inst, .{pl_op.operand}); + try cg.genLocalDebugInfo(inst, ops[0].tracking(cg).short); + try ops[0].die(cg); }, - .is_null_ptr => if (use_old) try self.airIsNullPtr(inst) else { + .is_null_ptr => if (use_old) try cg.airIsNullPtr(inst) else { const un_op = air_datas[@intFromEnum(inst)].un_op; - const opt_ty = self.typeOf(un_op).childType(zcu); + const opt_ty = cg.typeOf(un_op).childType(zcu); const opt_repr_is_pl = opt_ty.optionalReprIsPayload(zcu); const opt_child_ty = opt_ty.optionalChild(zcu); const opt_child_abi_size: u31 = @intCast(opt_child_ty.abiSize(zcu)); - var ops = try self.tempsFromOperands(inst, .{un_op}); - if (!opt_repr_is_pl) try ops[0].toOffset(opt_child_abi_size, self); - while (try ops[0].toLea(self)) {} - try self.asmMemoryImmediate( + var ops = try cg.tempsFromOperands(inst, .{un_op}); + if (!opt_repr_is_pl) try ops[0].toOffset(opt_child_abi_size, cg); + while (try ops[0].toLea(cg)) {} + try cg.asmMemoryImmediate( .{ ._, .cmp }, - try ops[0].tracking(self).short.deref().mem(self, .{ .size = if (!opt_repr_is_pl) + try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = if (!opt_repr_is_pl) .byte else if (opt_child_ty.isSlice(zcu)) .qword @@ -2707,22 +3151,22 @@ fn genBody(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .fromSize(opt_child_abi_size) }), .u(0), ); - var is_null = try self.tempFromValue(self.typeOfIndex(inst), .{ .eflags = .e }); - try ops[0].die(self); - try is_null.moveTo(inst, self); + var is_null = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .eflags = .e }); + try ops[0].die(cg); + try is_null.moveTo(inst, cg); }, - .is_non_null_ptr => if (use_old) try self.airIsNonNullPtr(inst) else { + .is_non_null_ptr => if (use_old) try cg.airIsNonNullPtr(inst) else { const un_op = air_datas[@intFromEnum(inst)].un_op; - const opt_ty = self.typeOf(un_op).childType(zcu); + const opt_ty = cg.typeOf(un_op).childType(zcu); const opt_repr_is_pl = opt_ty.optionalReprIsPayload(zcu); const opt_child_ty = opt_ty.optionalChild(zcu); const opt_child_abi_size: u31 = @intCast(opt_child_ty.abiSize(zcu)); - var ops = try self.tempsFromOperands(inst, .{un_op}); - if (!opt_repr_is_pl) try ops[0].toOffset(opt_child_abi_size, self); - while (try ops[0].toLea(self)) {} - try self.asmMemoryImmediate( + var ops = try cg.tempsFromOperands(inst, .{un_op}); + if (!opt_repr_is_pl) try ops[0].toOffset(opt_child_abi_size, cg); + while (try ops[0].toLea(cg)) {} + try cg.asmMemoryImmediate( .{ ._, .cmp }, - try ops[0].tracking(self).short.deref().mem(self, .{ .size = if (!opt_repr_is_pl) + try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = if (!opt_repr_is_pl) .byte else if (opt_child_ty.isSlice(zcu)) .qword @@ -2730,244 +3174,244 @@ fn genBody(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .fromSize(opt_child_abi_size) }), .u(0), ); - var is_non_null = try self.tempFromValue(self.typeOfIndex(inst), .{ .eflags = .ne }); - try ops[0].die(self); - try is_non_null.moveTo(inst, self); + var is_non_null = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .eflags = .ne }); + try ops[0].die(cg); + try is_non_null.moveTo(inst, cg); }, - .is_err_ptr => if (use_old) try self.airIsErrPtr(inst) else { + .is_err_ptr => if (use_old) try cg.airIsErrPtr(inst) else { const un_op = air_datas[@intFromEnum(inst)].un_op; - const eu_ty = self.typeOf(un_op).childType(zcu); + const eu_ty = cg.typeOf(un_op).childType(zcu); const eu_err_ty = eu_ty.errorUnionSet(zcu); const eu_pl_ty = eu_ty.errorUnionPayload(zcu); const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu)); - var ops = try self.tempsFromOperands(inst, .{un_op}); - try ops[0].toOffset(eu_err_off, self); - while (try ops[0].toLea(self)) {} - try self.asmMemoryImmediate( + var ops = try cg.tempsFromOperands(inst, .{un_op}); + try ops[0].toOffset(eu_err_off, cg); + while (try ops[0].toLea(cg)) {} + try cg.asmMemoryImmediate( .{ ._, .cmp }, - try ops[0].tracking(self).short.deref().mem(self, .{ .size = self.memSize(eu_err_ty) }), + try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = cg.memSize(eu_err_ty) }), .u(0), ); - var is_err = try self.tempFromValue(self.typeOfIndex(inst), .{ .eflags = .ne }); - try ops[0].die(self); - try is_err.moveTo(inst, self); + var is_err = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .eflags = .ne }); + try ops[0].die(cg); + try is_err.moveTo(inst, cg); }, - .is_non_err_ptr => if (use_old) try self.airIsNonErrPtr(inst) else { + .is_non_err_ptr => if (use_old) try cg.airIsNonErrPtr(inst) else { const un_op = air_datas[@intFromEnum(inst)].un_op; - const eu_ty = self.typeOf(un_op).childType(zcu); + const eu_ty = cg.typeOf(un_op).childType(zcu); const eu_err_ty = eu_ty.errorUnionSet(zcu); const eu_pl_ty = eu_ty.errorUnionPayload(zcu); const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu)); - var ops = try self.tempsFromOperands(inst, .{un_op}); - try ops[0].toOffset(eu_err_off, self); - while (try ops[0].toLea(self)) {} - try self.asmMemoryImmediate( + var ops = try cg.tempsFromOperands(inst, .{un_op}); + try ops[0].toOffset(eu_err_off, cg); + while (try ops[0].toLea(cg)) {} + try cg.asmMemoryImmediate( .{ ._, .cmp }, - try ops[0].tracking(self).short.deref().mem(self, .{ .size = self.memSize(eu_err_ty) }), + try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = cg.memSize(eu_err_ty) }), .u(0), ); - var is_non_err = try self.tempFromValue(self.typeOfIndex(inst), .{ .eflags = .e }); - try ops[0].die(self); - try is_non_err.moveTo(inst, self); + var is_non_err = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .eflags = .e }); + try ops[0].die(cg); + try is_non_err.moveTo(inst, cg); }, - .int_from_ptr => if (use_old) try self.airIntFromPtr(inst) else { + .int_from_ptr => if (use_old) try cg.airIntFromPtr(inst) else { const un_op = air_datas[@intFromEnum(inst)].un_op; - var ops = try self.tempsFromOperands(inst, .{un_op}); - try ops[0].toLimb(0, self); - try ops[0].moveTo(inst, self); + var ops = try cg.tempsFromOperands(inst, .{un_op}); + try ops[0].toLimb(0, cg); + try ops[0].moveTo(inst, cg); }, - .int_from_bool => if (use_old) try self.airIntFromBool(inst) else { + .int_from_bool => if (use_old) try cg.airIntFromBool(inst) else { const un_op = air_datas[@intFromEnum(inst)].un_op; - var ops = try self.tempsFromOperands(inst, .{un_op}); - try ops[0].moveTo(inst, self); + var ops = try cg.tempsFromOperands(inst, .{un_op}); + try ops[0].moveTo(inst, cg); }, - .ret => try self.airRet(inst, false), - .ret_safe => try self.airRet(inst, true), - .ret_load => try self.airRetLoad(inst), + .ret => try cg.airRet(inst, false), + .ret_safe => try cg.airRet(inst, true), + .ret_load => try cg.airRetLoad(inst), .unreach => {}, - .optional_payload_ptr => if (use_old) try self.airOptionalPayloadPtr(inst) else { + .optional_payload_ptr => if (use_old) try cg.airOptionalPayloadPtr(inst) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; - var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].moveTo(inst, self); + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].moveTo(inst, cg); }, - .optional_payload_ptr_set => if (use_old) try self.airOptionalPayloadPtrSet(inst) else { + .optional_payload_ptr_set => if (use_old) try cg.airOptionalPayloadPtrSet(inst) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; - const opt_ty = self.typeOf(ty_op.operand).childType(zcu); - var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); + const opt_ty = cg.typeOf(ty_op.operand).childType(zcu); + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); if (!opt_ty.optionalReprIsPayload(zcu)) { const opt_child_ty = opt_ty.optionalChild(zcu); const opt_child_abi_size: i32 = @intCast(opt_child_ty.abiSize(zcu)); - try ops[0].toOffset(opt_child_abi_size, self); - var has_value = try self.tempFromValue(Type.bool, .{ .immediate = 1 }); - try ops[0].store(&has_value, self); - try has_value.die(self); - try ops[0].toOffset(-opt_child_abi_size, self); + try ops[0].toOffset(opt_child_abi_size, cg); + var has_value = try cg.tempFromValue(Type.bool, .{ .immediate = 1 }); + try ops[0].store(&has_value, cg); + try has_value.die(cg); + try ops[0].toOffset(-opt_child_abi_size, cg); } - try ops[0].moveTo(inst, self); + try ops[0].moveTo(inst, cg); }, - .unwrap_errunion_payload_ptr => if (use_old) try self.airUnwrapErrUnionPayloadPtr(inst) else { + .unwrap_errunion_payload_ptr => if (use_old) try cg.airUnwrapErrUnionPayloadPtr(inst) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; - const eu_ty = self.typeOf(ty_op.operand).childType(zcu); + const eu_ty = cg.typeOf(ty_op.operand).childType(zcu); const eu_pl_ty = eu_ty.errorUnionPayload(zcu); const eu_pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(eu_pl_ty, zcu)); - var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].toOffset(eu_pl_off, self); - try ops[0].moveTo(inst, self); + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toOffset(eu_pl_off, cg); + try ops[0].moveTo(inst, cg); }, - .unwrap_errunion_err_ptr => if (use_old) try self.airUnwrapErrUnionErrPtr(inst) else { + .unwrap_errunion_err_ptr => if (use_old) try cg.airUnwrapErrUnionErrPtr(inst) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; - const eu_ty = self.typeOf(ty_op.operand).childType(zcu); + const eu_ty = cg.typeOf(ty_op.operand).childType(zcu); const eu_pl_ty = eu_ty.errorUnionPayload(zcu); const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu)); - var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].toOffset(eu_err_off, self); - var err = try ops[0].load(eu_ty.errorUnionSet(zcu), self); - try ops[0].die(self); - try err.moveTo(inst, self); + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toOffset(eu_err_off, cg); + var err = try ops[0].load(eu_ty.errorUnionSet(zcu), cg); + try ops[0].die(cg); + try err.moveTo(inst, cg); }, - .errunion_payload_ptr_set => if (use_old) try self.airErrUnionPayloadPtrSet(inst) else { + .errunion_payload_ptr_set => if (use_old) try cg.airErrUnionPayloadPtrSet(inst) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; - const eu_ty = self.typeOf(ty_op.operand).childType(zcu); + const eu_ty = cg.typeOf(ty_op.operand).childType(zcu); const eu_err_ty = eu_ty.errorUnionSet(zcu); const eu_pl_ty = eu_ty.errorUnionPayload(zcu); const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu)); const eu_pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(eu_pl_ty, zcu)); - var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].toOffset(eu_err_off, self); - var no_err = try self.tempFromValue(eu_err_ty, .{ .immediate = 0 }); - try ops[0].store(&no_err, self); - try no_err.die(self); - try ops[0].toOffset(eu_pl_off - eu_err_off, self); - try ops[0].moveTo(inst, self); + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toOffset(eu_err_off, cg); + var no_err = try cg.tempFromValue(eu_err_ty, .{ .immediate = 0 }); + try ops[0].store(&no_err, cg); + try no_err.die(cg); + try ops[0].toOffset(eu_pl_off - eu_err_off, cg); + try ops[0].moveTo(inst, cg); }, - .struct_field_ptr => if (use_old) try self.airStructFieldPtr(inst) else { + .struct_field_ptr => if (use_old) try cg.airStructFieldPtr(inst) else { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; - const extra = self.air.extraData(Air.StructField, ty_pl.payload).data; - var ops = try self.tempsFromOperands(inst, .{extra.struct_operand}); - try ops[0].toOffset(self.fieldOffset( - self.typeOf(extra.struct_operand), - self.typeOfIndex(inst), + const extra = cg.air.extraData(Air.StructField, ty_pl.payload).data; + var ops = try cg.tempsFromOperands(inst, .{extra.struct_operand}); + try ops[0].toOffset(cg.fieldOffset( + cg.typeOf(extra.struct_operand), + cg.typeOfIndex(inst), extra.field_index, - ), self); - try ops[0].moveTo(inst, self); + ), cg); + try ops[0].moveTo(inst, cg); }, - .struct_field_ptr_index_0 => if (use_old) try self.airStructFieldPtrIndex(inst, 0) else { + .struct_field_ptr_index_0 => if (use_old) try cg.airStructFieldPtrIndex(inst, 0) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; - var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].toOffset(self.fieldOffset( - self.typeOf(ty_op.operand), - self.typeOfIndex(inst), + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toOffset(cg.fieldOffset( + cg.typeOf(ty_op.operand), + cg.typeOfIndex(inst), 0, - ), self); - try ops[0].moveTo(inst, self); + ), cg); + try ops[0].moveTo(inst, cg); }, - .struct_field_ptr_index_1 => if (use_old) try self.airStructFieldPtrIndex(inst, 1) else { + .struct_field_ptr_index_1 => if (use_old) try cg.airStructFieldPtrIndex(inst, 1) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; - var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].toOffset(self.fieldOffset( - self.typeOf(ty_op.operand), - self.typeOfIndex(inst), + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toOffset(cg.fieldOffset( + cg.typeOf(ty_op.operand), + cg.typeOfIndex(inst), 1, - ), self); - try ops[0].moveTo(inst, self); + ), cg); + try ops[0].moveTo(inst, cg); }, - .struct_field_ptr_index_2 => if (use_old) try self.airStructFieldPtrIndex(inst, 2) else { + .struct_field_ptr_index_2 => if (use_old) try cg.airStructFieldPtrIndex(inst, 2) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; - var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].toOffset(self.fieldOffset( - self.typeOf(ty_op.operand), - self.typeOfIndex(inst), + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toOffset(cg.fieldOffset( + cg.typeOf(ty_op.operand), + cg.typeOfIndex(inst), 2, - ), self); - try ops[0].moveTo(inst, self); + ), cg); + try ops[0].moveTo(inst, cg); }, - .struct_field_ptr_index_3 => if (use_old) try self.airStructFieldPtrIndex(inst, 3) else { + .struct_field_ptr_index_3 => if (use_old) try cg.airStructFieldPtrIndex(inst, 3) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; - var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].toOffset(self.fieldOffset( - self.typeOf(ty_op.operand), - self.typeOfIndex(inst), + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toOffset(cg.fieldOffset( + cg.typeOf(ty_op.operand), + cg.typeOfIndex(inst), 3, - ), self); - try ops[0].moveTo(inst, self); + ), cg); + try ops[0].moveTo(inst, cg); }, - .slice => if (use_old) try self.airSlice(inst) else { + .slice => if (use_old) try cg.airSlice(inst) else { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; - const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; - var ops = try self.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); - try ops[0].toPair(&ops[1], self); - try ops[0].moveTo(inst, self); + const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data; + var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); + try ops[0].toPair(&ops[1], cg); + try ops[0].moveTo(inst, cg); }, - .slice_len => if (use_old) try self.airSliceLen(inst) else { + .slice_len => if (use_old) try cg.airSliceLen(inst) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; - var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].toLimb(1, self); - try ops[0].moveTo(inst, self); + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toLimb(1, cg); + try ops[0].moveTo(inst, cg); }, - .slice_ptr => if (use_old) try self.airSlicePtr(inst) else { + .slice_ptr => if (use_old) try cg.airSlicePtr(inst) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; - var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].toLimb(0, self); - try ops[0].moveTo(inst, self); + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toLimb(0, cg); + try ops[0].moveTo(inst, cg); }, - .ptr_slice_len_ptr => if (use_old) try self.airPtrSliceLenPtr(inst) else { + .ptr_slice_len_ptr => if (use_old) try cg.airPtrSliceLenPtr(inst) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; - var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].toOffset(8, self); - try ops[0].moveTo(inst, self); + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toOffset(8, cg); + try ops[0].moveTo(inst, cg); }, - .ptr_slice_ptr_ptr => if (use_old) try self.airPtrSlicePtrPtr(inst) else { + .ptr_slice_ptr_ptr => if (use_old) try cg.airPtrSlicePtrPtr(inst) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; - var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].toOffset(0, self); - try ops[0].moveTo(inst, self); + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].toOffset(0, cg); + try ops[0].moveTo(inst, cg); }, .slice_elem_ptr, .ptr_elem_ptr => |tag| if (use_old) switch (tag) { else => unreachable, - .slice_elem_ptr => try self.airSliceElemPtr(inst), - .ptr_elem_ptr => try self.airPtrElemPtr(inst), + .slice_elem_ptr => try cg.airSliceElemPtr(inst), + .ptr_elem_ptr => try cg.airPtrElemPtr(inst), } else { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; - const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; - var ops = try self.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); + const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data; + var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); switch (tag) { else => unreachable, - .slice_elem_ptr => try ops[0].toLimb(0, self), + .slice_elem_ptr => try ops[0].toLimb(0, cg), .ptr_elem_ptr => {}, } - const dst_ty = self.typeOfIndex(inst); + const dst_ty = cg.typeOfIndex(inst); if (dst_ty.ptrInfo(zcu).flags.vector_index == .none) zero_offset: { const elem_size = dst_ty.childType(zcu).abiSize(zcu); if (elem_size == 0) break :zero_offset; while (true) for (&ops) |*op| { - if (try op.toAnyReg(self)) break; + if (try op.toAnyReg(cg)) break; } else break; - const lhs_reg = ops[0].unwrap(self).temp.tracking(self).short.register.to64(); - const rhs_reg = ops[1].unwrap(self).temp.tracking(self).short.register.to64(); + const lhs_reg = ops[0].unwrap(cg).temp.tracking(cg).short.register.to64(); + const rhs_reg = ops[1].unwrap(cg).temp.tracking(cg).short.register.to64(); if (!std.math.isPowerOfTwo(elem_size)) { - try self.spillEflagsIfOccupied(); - try self.asmRegisterRegisterImmediate( + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterRegisterImmediate( .{ .i_, .mul }, rhs_reg, rhs_reg, .u(elem_size), ); - try self.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ + try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ .base = .{ .reg = lhs_reg }, .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, }); } else if (elem_size > 8) { - try self.spillEflagsIfOccupied(); - try self.asmRegisterImmediate( + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterImmediate( .{ ._l, .sh }, rhs_reg, .u(std.math.log2_int(u64, elem_size)), ); - try self.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ + try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ .base = .{ .reg = lhs_reg }, .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, }); - } else try self.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ + } else try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ .base = .{ .reg = lhs_reg }, .mod = .{ .rm = .{ .size = .qword, @@ -2976,57 +3420,57 @@ fn genBody(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }); } - try ops[1].die(self); - try ops[0].moveTo(inst, self); + try ops[1].die(cg); + try ops[0].moveTo(inst, cg); }, - .array_to_slice => if (use_old) try self.airArrayToSlice(inst) else { + .array_to_slice => if (use_old) try cg.airArrayToSlice(inst) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; - var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); - var len = try self.tempFromValue(Type.usize, .{ - .immediate = self.typeOf(ty_op.operand).childType(zcu).arrayLen(zcu), + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + var len = try cg.tempFromValue(Type.usize, .{ + .immediate = cg.typeOf(ty_op.operand).childType(zcu).arrayLen(zcu), }); - try ops[0].toPair(&len, self); - try ops[0].moveTo(inst, self); + try ops[0].toPair(&len, cg); + try ops[0].moveTo(inst, cg); }, - .error_set_has_value => return self.fail("TODO implement error_set_has_value", .{}), - .field_parent_ptr => if (use_old) try self.airFieldParentPtr(inst) else { + .error_set_has_value => return cg.fail("TODO implement error_set_has_value", .{}), + .field_parent_ptr => if (use_old) try cg.airFieldParentPtr(inst) else { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; - const extra = self.air.extraData(Air.FieldParentPtr, ty_pl.payload).data; - var ops = try self.tempsFromOperands(inst, .{extra.field_ptr}); - try ops[0].toOffset(-self.fieldOffset( - self.typeOfIndex(inst), - self.typeOf(extra.field_ptr), + const extra = cg.air.extraData(Air.FieldParentPtr, ty_pl.payload).data; + var ops = try cg.tempsFromOperands(inst, .{extra.field_ptr}); + try ops[0].toOffset(-cg.fieldOffset( + cg.typeOfIndex(inst), + cg.typeOf(extra.field_ptr), extra.field_index, - ), self); - try ops[0].moveTo(inst, self); + ), cg); + try ops[0].moveTo(inst, cg); }, - .is_named_enum_value => return self.fail("TODO implement is_named_enum_value", .{}), + .is_named_enum_value => return cg.fail("TODO implement is_named_enum_value", .{}), .wasm_memory_size => unreachable, .wasm_memory_grow => unreachable, .addrspace_cast => { const ty_op = air_datas[@intFromEnum(inst)].ty_op; - var ops = try self.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].moveTo(inst, self); + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + try ops[0].moveTo(inst, cg); }, - .vector_store_elem => return self.fail("TODO implement vector_store_elem", .{}), + .vector_store_elem => return cg.fail("TODO implement vector_store_elem", .{}), - .c_va_arg => try self.airVaArg(inst), - .c_va_copy => try self.airVaCopy(inst), - .c_va_end => try self.airVaEnd(inst), - .c_va_start => try self.airVaStart(inst), + .c_va_arg => try cg.airVaArg(inst), + .c_va_copy => try cg.airVaCopy(inst), + .c_va_end => try cg.airVaEnd(inst), + .c_va_start => try cg.airVaStart(inst), .work_item_id => unreachable, .work_group_size => unreachable, .work_group_id => unreachable, } - self.resetTemps(); - self.checkInvariantsAfterAirInst(); + cg.resetTemps(); + cg.checkInvariantsAfterAirInst(); } - verbose_tracking_log.debug("{}", .{self.fmtTracking()}); + verbose_tracking_log.debug("{}", .{cg.fmtTracking()}); } fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void { @@ -3117,12 +3561,16 @@ fn freeReg(self: *CodeGen, reg: Register) !void { fn freeValue(self: *CodeGen, value: MCValue) !void { switch (value) { .register => |reg| try self.freeReg(reg), - .register_pair => |regs| for (regs) |reg| try self.freeReg(reg), + inline .register_pair, + .register_triple, + .register_quadruple, + => |regs| for (regs) |reg| try self.freeReg(reg), .register_offset, .indirect => |reg_off| try self.freeReg(reg_off.reg), .register_overflow => |reg_ov| { try self.freeReg(reg_ov.reg); self.eflags_inst = null; }, + .register_mask => |reg_mask| try self.freeReg(reg_mask.reg), .eflags => self.eflags_inst = null, else => {}, // TODO process stack allocation death } @@ -3323,11 +3771,11 @@ fn allocRegOrMemAdvanced(self: *CodeGen, ty: Type, inst: ?Air.Inst.Index, reg_ok }, .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) { .float => switch (ty.childType(zcu).floatBits(self.target.*)) { - 16, 32, 64, 128 => if (self.hasFeature(.avx)) 32 else 16, + 16, 32, 64, 128 => self.vectorSize(.float), 80 => break :need_mem, else => unreachable, }, - else => if (self.hasFeature(.avx)) 32 else 16, + else => self.vectorSize(.int), }, else => 8, })) { @@ -3374,6 +3822,28 @@ fn regSetForType(self: *CodeGen, ty: Type) RegisterManager.RegisterBitSet { return regSetForRegClass(self.regClassForType(ty)); } +fn vectorSize(cg: *CodeGen, kind: enum { int, float }) u6 { + return if (cg.hasFeature(switch (kind) { + .int => .avx2, + .float => .avx, + })) 32 else if (cg.hasFeature(.sse)) 16 else 8; +} + +fn limbType(cg: *CodeGen, ty: Type) Type { + const pt = cg.pt; + const zcu = pt.zcu; + const vector_size = cg.vectorSize(if (ty.isRuntimeFloat()) .float else .int); + const scalar_ty, const scalar_size = scalar: { + const scalar_ty = ty.scalarType(zcu); + const scalar_size = scalar_ty.abiSize(zcu); + if (scalar_size <= vector_size) break :scalar .{ scalar_ty, scalar_size }; + }; + pt.vectorType(.{ + .len = @divExact(vector_size, scalar_size), + .child = scalar_ty.toIntern(), + }); +} + const State = struct { registers: RegisterManager.TrackedRegisters, reg_tracking: [RegisterManager.RegisterBitSet.bit_length]InstTracking, @@ -3639,7 +4109,7 @@ fn airFptrunc(self: *CodeGen, inst: Air.Inst.Index) !void { } } else { assert(src_bits == 64 and dst_bits == 32); - if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( + if (self.hasFeature(.avx)) if (src_mcv.isBase()) try self.asmRegisterRegisterMemory( .{ .v_ss, .cvtsd2 }, dst_reg, dst_reg, @@ -3652,7 +4122,7 @@ fn airFptrunc(self: *CodeGen, inst: Air.Inst.Index) !void { src_mcv.getReg().? else try self.copyToTmpRegister(src_ty, src_mcv)).to128(), - ) else if (src_mcv.isMemory()) try self.asmRegisterMemory( + ) else if (src_mcv.isBase()) try self.asmRegisterMemory( .{ ._ss, .cvtsd2 }, dst_reg, try src_mcv.mem(self, .{ .size = .qword }), @@ -3751,7 +4221,7 @@ fn airFpext(self: *CodeGen, inst: Air.Inst.Index) !void { } else { assert(src_bits == 32 and dst_bits == 64); if (self.hasFeature(.avx)) switch (vec_len) { - 1 => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( + 1 => if (src_mcv.isBase()) try self.asmRegisterRegisterMemory( .{ .v_sd, .cvtss2 }, dst_alias, dst_alias, @@ -3765,7 +4235,7 @@ fn airFpext(self: *CodeGen, inst: Air.Inst.Index) !void { else try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size), ), - 2...4 => if (src_mcv.isMemory()) try self.asmRegisterMemory( + 2...4 => if (src_mcv.isBase()) try self.asmRegisterMemory( .{ .v_pd, .cvtps2 }, dst_alias, try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }), @@ -3778,7 +4248,7 @@ fn airFpext(self: *CodeGen, inst: Air.Inst.Index) !void { try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size), ), else => break :result null, - } else if (src_mcv.isMemory()) try self.asmRegisterMemory( + } else if (src_mcv.isBase()) try self.asmRegisterMemory( switch (vec_len) { 1 => .{ ._sd, .cvtss2 }, 2 => .{ ._pd, .cvtps2 }, @@ -3827,7 +4297,7 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void { if (dst_ty.isVector(zcu)) { const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu)); const max_abi_size = @max(dst_abi_size, src_abi_size); - if (max_abi_size > @as(u32, if (self.hasFeature(.avx2)) 32 else 16)) break :result null; + if (max_abi_size > self.vectorSize(.int)) break :result null; const has_avx = self.hasFeature(.avx); const dst_elem_abi_size = dst_ty.childType(zcu).abiSize(zcu); @@ -3919,7 +4389,7 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void { const dst_reg = dst_mcv.getReg().?; const dst_alias = registerAlias(dst_reg, dst_abi_size); - if (src_mcv.isMemory()) try self.asmRegisterMemory( + if (src_mcv.isBase()) try self.asmRegisterMemory( mir_tag, dst_alias, try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }), @@ -4017,7 +4487,7 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void { const src_limbs_len = std.math.divCeil(u16, src_int_info.bits, 64) catch unreachable; const dst_limbs_len = std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable; - const high_mcv: MCValue = if (dst_mcv.isMemory()) + const high_mcv: MCValue = if (dst_mcv.isBase()) dst_mcv.address().offset((src_limbs_len - 1) * 8).deref() else .{ .register = dst_mcv.register_pair[1] }; @@ -4392,7 +4862,7 @@ fn airMulDivBinOp(self: *CodeGen, inst: Air.Inst.Index) !void { else => null, }; defer if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock); - if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory( + if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory( .{ ._, .mov }, tmp_reg, try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), @@ -4416,7 +4886,7 @@ fn airMulDivBinOp(self: *CodeGen, inst: Air.Inst.Index) !void { else => null, }; defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock); - if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory( + if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory( .{ ._, .xor }, tmp_reg, try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), @@ -4518,7 +4988,7 @@ fn airMulDivBinOp(self: *CodeGen, inst: Air.Inst.Index) !void { for (tmp_regs, dst_regs) |tmp_reg, dst_reg| try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_reg); - if (mat_rhs_mcv.isMemory()) { + if (mat_rhs_mcv.isBase()) { try self.asmRegisterMemory( .{ ._, .add }, tmp_regs[0], @@ -4793,7 +5263,7 @@ fn airMulSat(self: *CodeGen, inst: Air.Inst.Index) !void { else => null, }; defer if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock); - if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory( + if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory( .{ ._, .mov }, tmp_reg, try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), @@ -4817,7 +5287,7 @@ fn airMulSat(self: *CodeGen, inst: Air.Inst.Index) !void { else => null, }; defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock); - if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory( + if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory( .{ ._, .xor }, tmp_reg, try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), @@ -5348,7 +5818,7 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { }; defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock); - if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory( + if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory( .{ ._, .mov }, .rax, try mat_lhs_mcv.mem(self, .{ .size = .qword }), @@ -5357,7 +5827,7 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { .rax, mat_lhs_mcv.register_pair[0], ); - if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory( + if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory( .{ ._, .mov }, tmp_regs[0], try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), @@ -5370,14 +5840,14 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { try self.asmSetccRegister(.nz, tmp_regs[1].to8()); try self.asmRegisterRegister(.{ .i_, .mul }, tmp_regs[0], .rax); try self.asmSetccRegister(.o, tmp_regs[2].to8()); - if (mat_rhs_mcv.isMemory()) + if (mat_rhs_mcv.isBase()) try self.asmMemory(.{ ._, .mul }, try mat_rhs_mcv.mem(self, .{ .size = .qword })) else try self.asmRegister(.{ ._, .mul }, mat_rhs_mcv.register_pair[0]); try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_regs[0]); try self.asmSetccRegister(.c, tmp_regs[3].to8()); try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[2].to8(), tmp_regs[3].to8()); - if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory( + if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory( .{ ._, .mov }, tmp_regs[0], try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), @@ -5394,7 +5864,7 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { tmp_regs[3].to8(), ); try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8()); - if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory( + if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory( .{ .i_, .mul }, tmp_regs[0], try mat_rhs_mcv.mem(self, .{ .size = .qword }), @@ -6512,7 +6982,7 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { const index_ty = self.typeOf(bin_op.rhs); const index_mcv = try self.resolveInst(bin_op.rhs); - const index_lock: ?RegisterLock = switch (index_mcv) { + const index_lock = switch (index_mcv) { .register => |reg| self.register_manager.lockRegAssumeUnused(reg), else => null, }; @@ -6520,48 +6990,102 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { try self.spillEflagsIfOccupied(); if (array_ty.isVector(zcu) and elem_ty.bitSize(zcu) == 1) { - const index_reg = switch (index_mcv) { - .register => |reg| reg, - else => try self.copyToTmpRegister(index_ty, index_mcv), + const array_mat_mcv: MCValue = switch (array_mcv) { + else => array_mcv, + .register_mask => .{ .register = try self.copyToTmpRegister(array_ty, array_mcv) }, }; - switch (array_mcv) { + const array_mat_lock = switch (array_mat_mcv) { + .register => |reg| self.register_manager.lockReg(reg), + else => null, + }; + defer if (array_mat_lock) |lock| self.register_manager.unlockReg(lock); + + switch (array_mat_mcv) { .register => |array_reg| switch (array_reg.class()) { - .general_purpose => try self.asmRegisterRegister( - .{ ._, .bt }, - array_reg.to64(), - index_reg.to64(), - ), + .general_purpose => switch (index_mcv) { + .immediate => |index_imm| try self.asmRegisterImmediate( + .{ ._, .bt }, + array_reg.to64(), + .u(index_imm), + ), + else => try self.asmRegisterRegister( + .{ ._, .bt }, + array_reg.to64(), + switch (index_mcv) { + .register => |index_reg| index_reg, + else => try self.copyToTmpRegister(index_ty, index_mcv), + }.to64(), + ), + }, .sse => { const frame_index = try self.allocFrameIndex(.initType(array_ty, zcu)); - try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array_mcv, .{}); - try self.asmMemoryRegister( - .{ ._, .bt }, - .{ - .base = .{ .frame = frame_index }, - .mod = .{ .rm = .{ .size = .qword } }, - }, - index_reg.to64(), - ); + try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array_mat_mcv, .{}); + switch (index_mcv) { + .immediate => |index_imm| try self.asmMemoryImmediate( + .{ ._, .bt }, + .{ + .base = .{ .frame = frame_index }, + .mod = .{ .rm = .{ .size = .qword } }, + }, + .u(index_imm), + ), + else => try self.asmMemoryRegister( + .{ ._, .bt }, + .{ + .base = .{ .frame = frame_index }, + .mod = .{ .rm = .{ .size = .qword } }, + }, + switch (index_mcv) { + .register => |index_reg| index_reg, + else => try self.copyToTmpRegister(index_ty, index_mcv), + }.to64(), + ), + } }, else => unreachable, }, - .load_frame => try self.asmMemoryRegister( - .{ ._, .bt }, - try array_mcv.mem(self, .{ .size = .qword }), - index_reg.to64(), - ), - .memory, .load_symbol, .load_direct, .load_got, .load_tlv => try self.asmMemoryRegister( - .{ ._, .bt }, - .{ - .base = .{ - .reg = try self.copyToTmpRegister(Type.usize, array_mcv.address()), + .load_frame => switch (index_mcv) { + .immediate => |index_imm| try self.asmMemoryImmediate( + .{ ._, .bt }, + try array_mat_mcv.mem(self, .{ .size = .qword }), + .u(index_imm), + ), + else => try self.asmMemoryRegister( + .{ ._, .bt }, + try array_mat_mcv.mem(self, .{ .size = .qword }), + switch (index_mcv) { + .register => |index_reg| index_reg, + else => try self.copyToTmpRegister(index_ty, index_mcv), + }.to64(), + ), + }, + .memory, .load_symbol, .load_direct, .load_got, .load_tlv => switch (index_mcv) { + .immediate => |index_imm| try self.asmMemoryImmediate( + .{ ._, .bt }, + .{ + .base = .{ + .reg = try self.copyToTmpRegister(Type.usize, array_mat_mcv.address()), + }, + .mod = .{ .rm = .{ .size = .qword } }, }, - .mod = .{ .rm = .{ .size = .qword } }, - }, - index_reg.to64(), - ), + .u(index_imm), + ), + else => try self.asmMemoryRegister( + .{ ._, .bt }, + .{ + .base = .{ + .reg = try self.copyToTmpRegister(Type.usize, array_mat_mcv.address()), + }, + .mod = .{ .rm = .{ .size = .qword } }, + }, + switch (index_mcv) { + .register => |index_reg| index_reg, + else => try self.copyToTmpRegister(index_ty, index_mcv), + }.to64(), + ), + }, else => return self.fail("TODO airArrayElemVal for {s} of {}", .{ - @tagName(array_mcv), array_ty.fmt(pt), + @tagName(array_mat_mcv), array_ty.fmt(pt), }), } @@ -6856,6 +7380,15 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void { const src_bits: u31 = @intCast(src_ty.bitSize(zcu)); const has_lzcnt = self.hasFeature(.lzcnt); if (src_bits > @as(u32, if (has_lzcnt) 128 else 64)) { + const src_frame_addr: bits.FrameAddr = src_frame_addr: switch (src_mcv) { + .load_frame => |src_frame_addr| src_frame_addr, + else => { + const src_frame_addr = try self.allocFrameIndex(.initSpill(src_ty, zcu)); + try self.genSetMem(.{ .frame = src_frame_addr }, 0, src_ty, src_mcv, .{}); + break :src_frame_addr .{ .index = src_frame_addr }; + }, + }; + const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable; const extra_bits = abi_size * 8 - src_bits; @@ -6881,22 +7414,22 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void { try self.asmRegister(.{ ._, .dec }, index_reg.to32()); } try self.asmMemoryImmediate(.{ ._, .cmp }, .{ - .base = .{ .frame = src_mcv.load_frame.index }, + .base = .{ .frame = src_frame_addr.index }, .mod = .{ .rm = .{ .size = .qword, .index = index_reg.to64(), .scale = .@"8", - .disp = src_mcv.load_frame.off, + .disp = src_frame_addr.off, } }, }, .u(0)); _ = try self.asmJccReloc(.e, loop); try self.asmRegisterMemory(.{ ._, .bsr }, dst_reg.to64(), .{ - .base = .{ .frame = src_mcv.load_frame.index }, + .base = .{ .frame = src_frame_addr.index }, .mod = .{ .rm = .{ .size = .qword, .index = index_reg.to64(), .scale = .@"8", - .disp = src_mcv.load_frame.off, + .disp = src_frame_addr.off, } }, }); self.performReloc(zero); @@ -6935,7 +7468,7 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void { .{ ._, .lzcnt }, Type.u64, dst_mcv, - if (mat_src_mcv.isMemory()) + if (mat_src_mcv.isBase()) mat_src_mcv else .{ .register = mat_src_mcv.register_pair[0] }, @@ -6945,7 +7478,7 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void { .{ ._, .lzcnt }, Type.u64, tmp_mcv, - if (mat_src_mcv.isMemory()) + if (mat_src_mcv.isBase()) mat_src_mcv.address().offset(8).deref() else .{ .register = mat_src_mcv.register_pair[1] }, @@ -7053,6 +7586,15 @@ fn airCtz(self: *CodeGen, inst: Air.Inst.Index) !void { const src_bits: u31 = @intCast(src_ty.bitSize(zcu)); const has_bmi = self.hasFeature(.bmi); if (src_bits > @as(u32, if (has_bmi) 128 else 64)) { + const src_frame_addr: bits.FrameAddr = src_frame_addr: switch (src_mcv) { + .load_frame => |src_frame_addr| src_frame_addr, + else => { + const src_frame_addr = try self.allocFrameIndex(.initSpill(src_ty, zcu)); + try self.genSetMem(.{ .frame = src_frame_addr }, 0, src_ty, src_mcv, .{}); + break :src_frame_addr .{ .index = src_frame_addr }; + }, + }; + const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable; const extra_bits = abi_size * 8 - src_bits; @@ -7079,22 +7621,22 @@ fn airCtz(self: *CodeGen, inst: Air.Inst.Index) !void { try self.asmRegisterImmediate(.{ ._, .cmp }, index_reg.to32(), .u(limbs_len)); const zero = try self.asmJccReloc(.nb, undefined); try self.asmMemoryImmediate(.{ ._, .cmp }, .{ - .base = .{ .frame = src_mcv.load_frame.index }, + .base = .{ .frame = src_frame_addr.index }, .mod = .{ .rm = .{ .size = .qword, .index = index_reg.to64(), .scale = .@"8", - .disp = src_mcv.load_frame.off, + .disp = src_frame_addr.off, } }, }, .u(0)); _ = try self.asmJccReloc(.e, loop); try self.asmRegisterMemory(.{ ._, .bsf }, dst_reg.to64(), .{ - .base = .{ .frame = src_mcv.load_frame.index }, + .base = .{ .frame = src_frame_addr.index }, .mod = .{ .rm = .{ .size = .qword, .index = index_reg.to64(), .scale = .@"8", - .disp = src_mcv.load_frame.off, + .disp = src_frame_addr.off, } }, }); self.performReloc(zero); @@ -7131,11 +7673,11 @@ fn airCtz(self: *CodeGen, inst: Air.Inst.Index) !void { const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - const lo_mat_src_mcv: MCValue = if (mat_src_mcv.isMemory()) + const lo_mat_src_mcv: MCValue = if (mat_src_mcv.isBase()) mat_src_mcv else .{ .register = mat_src_mcv.register_pair[0] }; - const hi_mat_src_mcv: MCValue = if (mat_src_mcv.isMemory()) + const hi_mat_src_mcv: MCValue = if (mat_src_mcv.isBase()) mat_src_mcv.address().offset(8).deref() else .{ .register = mat_src_mcv.register_pair[1] }; @@ -7224,13 +7766,13 @@ fn airPopCount(self: *CodeGen, inst: Air.Inst.Index) !void { const tmp_locks = self.register_manager.lockRegsAssumeUnused(2, tmp_regs); defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock); - try self.genPopCount(tmp_regs[0], Type.usize, if (mat_src_mcv.isMemory()) + try self.genPopCount(tmp_regs[0], Type.usize, if (mat_src_mcv.isBase()) mat_src_mcv else .{ .register = mat_src_mcv.register_pair[0] }, false); const src_info = src_ty.intInfo(zcu); const hi_ty = try pt.intType(src_info.signedness, (src_info.bits - 1) % 64 + 1); - try self.genPopCount(tmp_regs[1], hi_ty, if (mat_src_mcv.isMemory()) + try self.genPopCount(tmp_regs[1], hi_ty, if (mat_src_mcv.isBase()) mat_src_mcv.address().offset(8).deref() else .{ .register = mat_src_mcv.register_pair[1] }, false); @@ -7388,7 +7930,7 @@ fn genByteSwap( defer for (dst_locks) |lock| self.register_manager.unlockReg(lock); for (dst_regs, 0..) |dst_reg, limb_index| { - if (src_mcv.isMemory()) { + if (src_mcv.isBase()) { try self.asmRegisterMemory( .{ ._, if (has_movbe) .movbe else .mov }, dst_reg.to64(), @@ -7706,7 +8248,7 @@ fn floatSign(self: *CodeGen, inst: Air.Inst.Index, operand: Air.Inst.Ref, ty: Ty .abs => try vec_ty.maxInt(pt, vec_ty), else => unreachable, }); - const sign_mem: Memory = if (sign_mcv.isMemory()) + const sign_mem: Memory = if (sign_mcv.isBase()) try sign_mcv.mem(self, .{ .size = .fromSize(abi_size) }) else .{ @@ -7888,7 +8430,7 @@ fn genRound(self: *CodeGen, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: const abi_size: u32 = @intCast(ty.abiSize(pt.zcu)); const dst_alias = registerAlias(dst_reg, abi_size); switch (mir_tag[0]) { - .v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + .v_ss, .v_sd => if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( mir_tag, dst_alias, dst_alias, @@ -7904,7 +8446,7 @@ fn genRound(self: *CodeGen, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: try self.copyToTmpRegister(ty, src_mcv), abi_size), .u(@as(u5, @bitCast(mode))), ), - else => if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( + else => if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate( mir_tag, dst_alias, try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }), @@ -8105,7 +8647,7 @@ fn airAbs(self: *CodeGen, inst: Air.Inst.Index) !void { else try self.register_manager.allocReg(inst, self.regSetForType(ty)); const dst_alias = registerAlias(dst_reg, abi_size); - if (src_mcv.isMemory()) try self.asmRegisterMemory( + if (src_mcv.isBase()) try self.asmRegisterMemory( mir_tag, dst_alias, try src_mcv.mem(self, .{ .size = self.memSize(ty) }), @@ -8212,7 +8754,7 @@ fn airSqrt(self: *CodeGen, inst: Air.Inst.Index) !void { }, 2...8 => { const wide_reg = registerAlias(dst_reg, abi_size * 2); - if (src_mcv.isMemory()) try self.asmRegisterMemory( + if (src_mcv.isBase()) try self.asmRegisterMemory( .{ .v_ps, .cvtph2 }, wide_reg, try src_mcv.mem(self, .{ .size = .fromSize( @@ -8257,7 +8799,7 @@ fn airSqrt(self: *CodeGen, inst: Air.Inst.Index) !void { else => unreachable, }) orelse return self.fail("TODO implement airSqrt for {}", .{ty.fmt(pt)}); switch (mir_tag[0]) { - .v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( + .v_ss, .v_sd => if (src_mcv.isBase()) try self.asmRegisterRegisterMemory( mir_tag, dst_reg, dst_reg, @@ -8271,7 +8813,7 @@ fn airSqrt(self: *CodeGen, inst: Air.Inst.Index) !void { else try self.copyToTmpRegister(ty, src_mcv), abi_size), ), - else => if (src_mcv.isMemory()) try self.asmRegisterMemory( + else => if (src_mcv.isBase()) try self.asmRegisterMemory( mir_tag, dst_reg, try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }), @@ -8339,7 +8881,7 @@ fn reuseOperandAdvanced( return false; switch (mcv) { - .register, .register_pair, .register_overflow => for (mcv.getRegs()) |reg| { + .register, .register_pair, .register_overflow, .register_mask => for (mcv.getRegs()) |reg| { // If it's in the registers table, need to associate the register(s) with the // new instruction. if (maybe_tracked_inst) |tracked_inst| { @@ -8486,7 +9028,10 @@ fn load(self: *CodeGen, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerE .undef, .eflags, .register_pair, + .register_triple, + .register_quadruple, .register_overflow, + .register_mask, .elementwise_regs_then_frame, .reserved_frame, => unreachable, // not a valid pointer @@ -8694,7 +9239,10 @@ fn store( .undef, .eflags, .register_pair, + .register_triple, + .register_quadruple, .register_overflow, + .register_mask, .elementwise_regs_then_frame, .reserved_frame, => unreachable, // not a valid pointer @@ -8986,7 +9534,7 @@ fn airStructFieldVal(self: *CodeGen, inst: Air.Inst.Index) !void { try self.genCopy(field_ty, dst_mcv, off_mcv, .{}); break :dst dst_mcv; }; - if (field_abi_size * 8 > field_bit_size and dst_mcv.isMemory()) { + if (field_abi_size * 8 > field_bit_size and dst_mcv.isBase()) { const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); @@ -9194,6 +9742,7 @@ fn genUnOpMir(self: *CodeGen, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv: .register_offset, .eflags, .register_overflow, + .register_mask, .lea_symbol, .lea_direct, .lea_got, @@ -9204,7 +9753,7 @@ fn genUnOpMir(self: *CodeGen, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv: .air_ref, => unreachable, // unmodifiable destination .register => |dst_reg| try self.asmRegister(mir_tag, registerAlias(dst_reg, abi_size)), - .register_pair => unreachable, // unimplemented + .register_pair, .register_triple, .register_quadruple => unreachable, // unimplemented .memory, .load_symbol, .load_got, .load_direct, .load_tlv => { const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg); @@ -9974,27 +10523,27 @@ fn genMulDivBinOp( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - if (mat_lhs_mcv.isMemory()) + if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory(.{ ._, .mov }, .rax, try mat_lhs_mcv.mem(self, .{ .size = .qword })) else try self.asmRegisterRegister(.{ ._, .mov }, .rax, mat_lhs_mcv.register_pair[0]); - if (mat_rhs_mcv.isMemory()) try self.asmRegisterMemory( + if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory( .{ ._, .mov }, tmp_reg, try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), ) else try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, mat_rhs_mcv.register_pair[1]); try self.asmRegisterRegister(.{ .i_, .mul }, tmp_reg, .rax); - if (mat_rhs_mcv.isMemory()) + if (mat_rhs_mcv.isBase()) try self.asmMemory(.{ ._, .mul }, try mat_rhs_mcv.mem(self, .{ .size = .qword })) else try self.asmRegister(.{ ._, .mul }, mat_rhs_mcv.register_pair[0]); try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_reg); - if (mat_lhs_mcv.isMemory()) try self.asmRegisterMemory( + if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory( .{ ._, .mov }, tmp_reg, try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), ) else try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, mat_lhs_mcv.register_pair[1]); - if (mat_rhs_mcv.isMemory()) + if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory(.{ .i_, .mul }, tmp_reg, try mat_rhs_mcv.mem(self, .{ .size = .qword })) else try self.asmRegisterRegister(.{ .i_, .mul }, tmp_reg, mat_rhs_mcv.register_pair[0]); @@ -10414,7 +10963,7 @@ fn genBinOp( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( .{ .vp_w, .insr }, dst_reg, dst_reg, @@ -10439,7 +10988,7 @@ fn genBinOp( 64 => .{ .v_sd, .add }, else => unreachable, }; - if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( + if (src_mcv.isBase()) try self.asmRegisterRegisterMemory( mir_tag, dst_reg, dst_reg, @@ -10459,7 +11008,7 @@ fn genBinOp( 64 => .{ ._sd, .add }, else => unreachable, }; - if (src_mcv.isMemory()) try self.asmRegisterMemory( + if (src_mcv.isBase()) try self.asmRegisterMemory( mir_tag, dst_reg, try src_mcv.mem(self, .{ .size = .fromBitSize(float_bits) }), @@ -10514,7 +11063,7 @@ fn genBinOp( }; if (sse_op and ((lhs_ty.scalarType(zcu).isRuntimeFloat() and lhs_ty.scalarType(zcu).floatBits(self.target.*) == 80) or - lhs_ty.abiSize(zcu) > @as(u6, if (self.hasFeature(.avx)) 32 else 16))) + lhs_ty.abiSize(zcu) > self.vectorSize(.float))) return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(air_tag), lhs_ty.fmt(pt) }); const maybe_mask_reg = switch (air_tag) { @@ -10731,7 +11280,7 @@ fn genBinOp( }; try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_regs[1]); - if (src_mcv.isMemory()) { + if (src_mcv.isBase()) { try self.asmRegisterMemory( .{ ._, .cmp }, dst_regs[0], @@ -10828,8 +11377,11 @@ fn genBinOp( .immediate, .eflags, .register_pair, + .register_triple, + .register_quadruple, .register_offset, .register_overflow, + .register_mask, .load_symbol, .lea_symbol, .load_direct, @@ -10909,7 +11461,7 @@ fn genBinOp( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( .{ .vp_w, .insr }, dst_reg, dst_reg, @@ -11355,7 +11907,7 @@ fn genBinOp( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( .{ .vp_w, .insr }, dst_reg, dst_reg, @@ -11402,7 +11954,7 @@ fn genBinOp( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( + if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate( .{ .vp_d, .insr }, dst_reg, try src_mcv.mem(self, .{ .size = .dword }), @@ -11454,7 +12006,7 @@ fn genBinOp( defer self.register_manager.unlockReg(tmp_lock); try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); - if (src_mcv.isMemory()) try self.asmRegisterMemory( + if (src_mcv.isBase()) try self.asmRegisterMemory( .{ .v_ps, .cvtph2 }, tmp_reg, try src_mcv.mem(self, .{ .size = .qword }), @@ -11497,7 +12049,7 @@ fn genBinOp( defer self.register_manager.unlockReg(tmp_lock); try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg.to256(), dst_reg); - if (src_mcv.isMemory()) try self.asmRegisterMemory( + if (src_mcv.isBase()) try self.asmRegisterMemory( .{ .v_ps, .cvtph2 }, tmp_reg, try src_mcv.mem(self, .{ .size = .xword }), @@ -11659,7 +12211,7 @@ fn genBinOp( else => if (self.hasFeature(.avx)) { const lhs_reg = if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); - if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( + if (src_mcv.isBase()) try self.asmRegisterRegisterMemory( mir_tag, dst_reg, lhs_reg, @@ -11678,7 +12230,7 @@ fn genBinOp( ); } else { assert(copied_to_dst); - if (src_mcv.isMemory()) try self.asmRegisterMemory( + if (src_mcv.isBase()) try self.asmRegisterMemory( mir_tag, dst_reg, try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) { @@ -11705,7 +12257,7 @@ fn genBinOp( if (self.hasFeature(.avx)) { const lhs_reg = if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); - if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( mir_tag, dst_reg, lhs_reg, @@ -11726,7 +12278,7 @@ fn genBinOp( ); } else { assert(copied_to_dst); - if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( + if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate( mir_tag, dst_reg, try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) { @@ -11937,7 +12489,7 @@ fn genBinOp( => { const unsigned_ty = try lhs_ty.toUnsigned(pt); const not_mcv = try self.genTypedValue(try unsigned_ty.maxInt(pt, unsigned_ty)); - const not_mem: Memory = if (not_mcv.isMemory()) + const not_mem: Memory = if (not_mcv.isBase()) try not_mcv.mem(self, .{ .size = .fromSize(abi_size) }) else .{ .base = .{ @@ -12017,6 +12569,7 @@ fn genBinOpMir( .immediate, .eflags, .register_overflow, + .register_mask, .lea_direct, .lea_got, .lea_tlv, @@ -12026,9 +12579,9 @@ fn genBinOpMir( .reserved_frame, .air_ref, => unreachable, // unmodifiable destination - .register, .register_pair, .register_offset => { + .register, .register_pair, .register_triple, .register_quadruple, .register_offset => { switch (dst_mcv) { - .register, .register_pair => {}, + .register, .register_pair, .register_triple, .register_quadruple => {}, .register_offset => |ro| assert(ro.off == 0), else => unreachable, } @@ -12057,10 +12610,15 @@ fn genBinOpMir( .dead, .undef, .register_overflow, + .register_mask, .elementwise_regs_then_frame, .reserved_frame, => unreachable, - .register, .register_pair => try self.asmRegisterRegister( + .register, + .register_pair, + .register_triple, + .register_quadruple, + => try self.asmRegisterRegister( mir_limb_tag, dst_alias, registerAlias(src_mcv.getRegs()[dst_reg_i], limb_abi_size), @@ -12216,6 +12774,7 @@ fn genBinOpMir( .dead, .undef, .register_overflow, + .register_mask, .elementwise_regs_then_frame, .reserved_frame, .air_ref, @@ -12224,6 +12783,8 @@ fn genBinOpMir( .eflags, .register, .register_pair, + .register_triple, + .register_quadruple, .register_offset, .indirect, .lea_direct, @@ -12311,6 +12872,7 @@ fn genBinOpMir( .dead, .undef, .register_overflow, + .register_mask, .elementwise_regs_then_frame, .reserved_frame, .air_ref, @@ -12364,6 +12926,8 @@ fn genBinOpMir( }, .register, .register_pair, + .register_triple, + .register_quadruple, .register_offset, .eflags, .memory, @@ -12382,7 +12946,7 @@ fn genBinOpMir( const src_limb_mcv: MCValue = if (src_info) |info| .{ .indirect = .{ .reg = info.addr_reg, .off = off }, } else switch (resolved_src_mcv) { - .register, .register_pair => .{ + .register, .register_pair, .register_triple, .register_quadruple => .{ .register = resolved_src_mcv.getRegs()[limb_i], }, .eflags, @@ -12438,6 +13002,7 @@ fn genIntMulComplexOpMir(self: *CodeGen, dst_ty: Type, dst_mcv: MCValue, src_mcv .eflags, .register_offset, .register_overflow, + .register_mask, .lea_symbol, .lea_direct, .lea_got, @@ -12462,7 +13027,10 @@ fn genIntMulComplexOpMir(self: *CodeGen, dst_ty: Type, dst_mcv: MCValue, src_mcv .dead, .undef, .register_pair, + .register_triple, + .register_quadruple, .register_overflow, + .register_mask, .elementwise_regs_then_frame, .reserved_frame, .air_ref, @@ -12539,7 +13107,7 @@ fn genIntMulComplexOpMir(self: *CodeGen, dst_ty: Type, dst_mcv: MCValue, src_mcv ), } }, - .register_pair => unreachable, // unimplemented + .register_pair, .register_triple, .register_quadruple => unreachable, // unimplemented .memory, .indirect, .load_symbol, .load_direct, .load_got, .load_tlv, .load_frame => { const tmp_reg = try self.copyToTmpRegister(dst_ty, dst_mcv); const tmp_mcv = MCValue{ .register = tmp_reg }; @@ -12892,7 +13460,7 @@ fn genCall(self: *CodeGen, info: union(enum) { const index_lock = self.register_manager.lockRegAssumeUnused(index_reg); defer self.register_manager.unlockReg(index_lock); - const src_mem: Memory = if (src_arg.isMemory()) try src_arg.mem(self, .{ .size = .dword }) else .{ + const src_mem: Memory = if (src_arg.isBase()) try src_arg.mem(self, .{ .size = .dword }) else .{ .base = .{ .reg = try self.copyToTmpRegister( Type.usize, switch (src_arg) { @@ -12984,7 +13552,7 @@ fn genCall(self: *CodeGen, info: union(enum) { .lea_frame = .{ .index = frame_index, .off = -reg_off.off }, }, .{}), .elementwise_regs_then_frame => |regs_frame_addr| { - const src_mem: Memory = if (src_arg.isMemory()) try src_arg.mem(self, .{ .size = .dword }) else .{ + const src_mem: Memory = if (src_arg.isBase()) try src_arg.mem(self, .{ .size = .dword }) else .{ .base = .{ .reg = try self.copyToTmpRegister( Type.usize, switch (src_arg) { @@ -13100,6 +13668,8 @@ fn airRet(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void { .none => {}, .register, .register_pair, + .register_triple, + .register_quadruple, => try self.genCopy(ret_ty, self.ret_mcv.short, .{ .air_ref = un_op }, .{ .safety = safety }), .indirect => |reg_off| { try self.register_manager.getReg(reg_off.reg, null); @@ -13226,7 +13796,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v const temp_lhs_lock = self.register_manager.lockRegAssumeUnused(temp_lhs_reg); defer self.register_manager.unlockReg(temp_lhs_lock); - if (lhs_mcv.isMemory()) try self.asmRegisterMemory( + if (lhs_mcv.isBase()) try self.asmRegisterMemory( .{ ._, .mov }, temp_lhs_reg.to8(), try lhs_mcv.address().offset(payload_abi_size).deref().mem(self, .{ .size = .byte }), @@ -13240,7 +13810,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v } const payload_compare = payload_compare: { - if (rhs_mcv.isMemory()) { + if (rhs_mcv.isBase()) { const rhs_mem = try rhs_mcv.address().offset(payload_abi_size).deref().mem(self, .{ .size = .byte }); try self.asmMemoryRegister(.{ ._, .@"test" }, rhs_mem, temp_lhs_reg.to8()); @@ -13291,13 +13861,13 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v } else .may_flip; const flipped = switch (may_flip) { - .may_flip => !lhs_mcv.isRegister() and !lhs_mcv.isMemory(), + .may_flip => !lhs_mcv.isRegister() and !lhs_mcv.isBase(), .must_flip => true, .must_not_flip => false, }; const unmat_dst_mcv = if (flipped) rhs_mcv else lhs_mcv; const dst_mcv = if (unmat_dst_mcv.isRegister() or - (abi_size <= 8 and unmat_dst_mcv.isMemory())) unmat_dst_mcv else dst: { + (abi_size <= 8 and unmat_dst_mcv.isBase())) unmat_dst_mcv else dst: { const dst_mcv = try self.allocTempRegOrMem(ty, true); try self.genCopy(ty, dst_mcv, unmat_dst_mcv, .{}); break :dst dst_mcv; @@ -13335,6 +13905,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v .register, .register_offset, .register_overflow, + .register_mask, .indirect, .lea_direct, .lea_got, @@ -13345,7 +13916,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v .reserved_frame, .air_ref, => unreachable, - .register_pair, .load_frame => null, + .register_pair, .register_triple, .register_quadruple, .load_frame => null, .memory, .load_symbol, .load_got, .load_direct, .load_tlv => dst: { switch (resolved_dst_mcv) { .memory => |addr| if (std.math.cast( @@ -13396,6 +13967,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v .register, .register_offset, .register_overflow, + .register_mask, .indirect, .lea_symbol, .lea_direct, @@ -13406,7 +13978,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v .reserved_frame, .air_ref, => unreachable, - .register_pair, .load_frame => null, + .register_pair, .register_triple, .register_quadruple, .load_frame => null, .memory, .load_symbol, .load_got, .load_direct, .load_tlv => src: { switch (resolved_src_mcv) { .memory => |addr| if (std.math.cast( @@ -13457,7 +14029,10 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v try self.genSetReg(tmp_reg, Type.usize, if (dst_info) |info| .{ .indirect = .{ .reg = info.addr_reg, .off = off }, } else switch (resolved_dst_mcv) { - .register_pair => |dst_regs| .{ .register = dst_regs[limb_i] }, + inline .register_pair, + .register_triple, + .register_quadruple, + => |dst_regs| .{ .register = dst_regs[limb_i] }, .memory => |dst_addr| .{ .memory = @bitCast(@as(i64, @bitCast(dst_addr)) + off), }, @@ -13479,9 +14054,10 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v if (src_info) |info| .{ .indirect = .{ .reg = info.addr_reg, .off = off }, } else switch (resolved_src_mcv) { - .register_pair => |src_regs| .{ - .register = src_regs[limb_i], - }, + inline .register_pair, + .register_triple, + .register_quadruple, + => |src_regs| .{ .register = src_regs[limb_i] }, .memory => |src_addr| .{ .memory = @bitCast(@as(i64, @bitCast(src_addr)) + off), }, @@ -13539,7 +14115,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v const tmp2_lock = self.register_manager.lockRegAssumeUnused(tmp2_reg); defer self.register_manager.unlockReg(tmp2_lock); - if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( .{ .vp_w, .insr }, tmp1_reg, dst_reg.to128(), @@ -13840,8 +14416,11 @@ fn isNull(self: *CodeGen, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) .undef, .immediate, .eflags, + .register_triple, + .register_quadruple, .register_offset, .register_overflow, + .register_mask, .lea_direct, .lea_got, .lea_tlv, @@ -15481,6 +16060,7 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C .immediate, .eflags, .register_overflow, + .register_mask, .lea_direct, .lea_got, .lea_tlv, @@ -15510,7 +16090,7 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C .off = -dst_reg_off.off, } }, }, opts), - .register_pair => |dst_regs| { + inline .register_pair, .register_triple, .register_quadruple => |dst_regs| { const src_info: ?struct { addr_reg: Register, addr_lock: RegisterLock } = switch (src_mcv) { .register_pair, .memory, .indirect, .load_frame => null, .load_symbol, .load_direct, .load_got, .load_tlv => src: { @@ -15535,9 +16115,12 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C defer if (src_info) |info| self.register_manager.unlockReg(info.addr_lock); var part_disp: i32 = 0; - for (dst_regs, try self.splitType(ty), 0..) |dst_reg, dst_ty, part_i| { + for (dst_regs, try self.splitType(dst_regs.len, ty), 0..) |dst_reg, dst_ty, part_i| { try self.genSetReg(dst_reg, dst_ty, switch (src_mcv) { - .register_pair => |src_regs| .{ .register = src_regs[part_i] }, + inline .register_pair, + .register_triple, + .register_quadruple, + => |src_regs| .{ .register = src_regs[part_i] }, .memory, .indirect, .load_frame => src_mcv.address().offset(part_disp).deref(), .load_symbol, .load_direct, .load_got, .load_tlv => .{ .indirect = .{ .reg = src_info.?.addr_reg, @@ -15733,7 +16316,10 @@ fn genSetReg( }, .ip => unreachable, }, - .register_pair => |src_regs| try self.genSetReg(dst_reg, ty, .{ .register = src_regs[0] }, opts), + inline .register_pair, + .register_triple, + .register_quadruple, + => |src_regs| try self.genSetReg(dst_reg, ty, .{ .register = src_regs[0] }, opts), .register_offset, .indirect, .load_frame, @@ -15770,6 +16356,47 @@ fn genSetReg( }, else => unreachable, }), + .register_mask => |src_reg_mask| { + assert(src_reg_mask.reg.class() == .sse); + const has_avx = self.hasFeature(.avx); + const bits_reg = switch (dst_reg.class()) { + .general_purpose => dst_reg, + else => try self.register_manager.allocReg(null, abi.RegisterClass.gp), + }; + const bits_lock = self.register_manager.lockReg(bits_reg); + defer if (bits_lock) |lock| self.register_manager.unlockReg(lock); + + const pack_reg = switch (src_reg_mask.scalar) { + else => src_reg_mask.reg, + .word => try self.register_manager.allocReg(null, abi.RegisterClass.sse), + }; + const pack_lock = self.register_manager.lockReg(pack_reg); + defer if (pack_lock) |lock| self.register_manager.unlockReg(lock); + + var mask_size: u32 = @intCast(ty.vectorLen(zcu) * @divExact(src_reg_mask.scalar.bitSize(), 8)); + switch (src_reg_mask.scalar) { + else => {}, + .word => { + const src_alias = registerAlias(src_reg_mask.reg, mask_size); + const pack_alias = registerAlias(pack_reg, mask_size); + if (has_avx) { + try self.asmRegisterRegisterRegister(.{ .vp_b, .ackssw }, pack_alias, src_alias, src_alias); + } else { + try self.asmRegisterRegister(.{ ._, .movdqa }, pack_alias, src_alias); + try self.asmRegisterRegister(.{ .p_b, .ackssw }, pack_alias, pack_alias); + } + mask_size = std.math.divCeil(u32, mask_size, 2) catch unreachable; + }, + } + try self.asmRegisterRegister(.{ switch (src_reg_mask.scalar) { + .byte, .word => if (has_avx) .vp_b else .p_b, + .dword => if (has_avx) .v_ps else ._ps, + .qword => if (has_avx) .v_pd else ._pd, + else => unreachable, + }, .movmsk }, bits_reg.to32(), registerAlias(pack_reg, mask_size)); + if (src_reg_mask.inverted) try self.asmRegister(.{ ._, .not }, registerAlias(bits_reg, abi_size)); + try self.genSetReg(dst_reg, ty, .{ .register = bits_reg }, .{}); + }, .memory, .load_symbol, .load_direct, .load_got, .load_tlv => { switch (src_mcv) { .memory => |addr| if (std.math.cast(i32, @as(i64, @bitCast(addr)))) |small_addr| @@ -15998,9 +16625,9 @@ fn genSetMem( src_alias, ); }, - .register_pair => |src_regs| { + inline .register_pair, .register_triple, .register_quadruple => |src_regs| { var part_disp: i32 = disp; - for (try self.splitType(ty), src_regs) |src_ty, src_reg| { + for (try self.splitType(src_regs.len, ty), src_regs) |src_ty, src_reg| { try self.genSetMem(base, part_disp, src_ty, .{ .register = src_reg }, opts); part_disp += @intCast(src_ty.abiSize(zcu)); } @@ -16065,6 +16692,13 @@ fn genSetMem( try self.genSetMem(base, disp, ty, .{ .register = src_reg }, opts); }, + .register_mask => { + const src_reg = try self.copyToTmpRegister(ty, src_mcv); + const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); + defer self.register_manager.unlockReg(src_lock); + + try self.genSetMem(base, disp, ty, .{ .register = src_reg }, opts); + }, .memory, .indirect, .load_direct, @@ -16283,7 +16917,7 @@ fn airBitCast(self: *CodeGen, inst: Air.Inst.Index) !void { const dst_mcv = try self.allocRegOrMem(inst, true); try self.genCopy(switch (std.math.order(dst_ty.abiSize(zcu), src_ty.abiSize(zcu))) { .lt => dst_ty, - .eq => if (!dst_mcv.isMemory() or src_mcv.isMemory()) dst_ty else src_ty, + .eq => if (!dst_mcv.isBase() or src_mcv.isBase()) dst_ty else src_ty, .gt => src_ty, }, dst_mcv, src_mcv, .{}); break :dst dst_mcv; @@ -16729,7 +17363,7 @@ fn atomicOp( }); try self.genSetReg(sse_reg, val_ty, .{ .register = .rax }, .{}); switch (mir_tag[0]) { - .v_ss, .v_sd => if (val_mcv.isMemory()) try self.asmRegisterRegisterMemory( + .v_ss, .v_sd => if (val_mcv.isBase()) try self.asmRegisterRegisterMemory( mir_tag, sse_reg.to128(), sse_reg.to128(), @@ -16743,7 +17377,7 @@ fn atomicOp( else try self.copyToTmpRegister(val_ty, val_mcv)).to128(), ), - ._ss, ._sd => if (val_mcv.isMemory()) try self.asmRegisterMemory( + ._ss, ._sd => if (val_mcv.isBase()) try self.asmRegisterMemory( mir_tag, sse_reg.to128(), try val_mcv.mem(self, .{ .size = self.memSize(val_ty) }), @@ -17443,7 +18077,7 @@ fn airSplat(self: *CodeGen, inst: Air.Inst.Index) !void { defer self.register_manager.unlockReg(dst_lock); const src_mcv = try self.resolveInst(ty_op.operand); - if (src_mcv.isMemory()) try self.asmRegisterMemory( + if (src_mcv.isBase()) try self.asmRegisterMemory( mir_tag, registerAlias(dst_reg, @intCast(vector_ty.abiSize(zcu))), try src_mcv.mem(self, .{ .size = self.memSize(scalar_ty) }), @@ -17519,7 +18153,7 @@ fn airSplat(self: *CodeGen, inst: Air.Inst.Index) !void { const src_mcv = try self.resolveInst(ty_op.operand); if (self.hasFeature(.avx)) { const dst_reg = try self.register_manager.allocReg(inst, dst_rc); - if (src_mcv.isMemory()) try self.asmRegisterMemory( + if (src_mcv.isBase()) try self.asmRegisterMemory( .{ .v_ss, .broadcast }, dst_reg.to128(), try src_mcv.mem(self, .{ .size = .dword }), @@ -17556,7 +18190,7 @@ fn airSplat(self: *CodeGen, inst: Air.Inst.Index) !void { 5...8 => if (self.hasFeature(.avx)) { const src_mcv = try self.resolveInst(ty_op.operand); const dst_reg = try self.register_manager.allocReg(inst, dst_rc); - if (src_mcv.isMemory()) try self.asmRegisterMemory( + if (src_mcv.isBase()) try self.asmRegisterMemory( .{ .v_ss, .broadcast }, dst_reg.to256(), try src_mcv.mem(self, .{ .size = .dword }), @@ -17602,7 +18236,7 @@ fn airSplat(self: *CodeGen, inst: Air.Inst.Index) !void { const src_mcv = try self.resolveInst(ty_op.operand); const dst_reg = try self.register_manager.allocReg(inst, dst_rc); if (self.hasFeature(.sse3)) { - if (src_mcv.isMemory()) try self.asmRegisterMemory( + if (src_mcv.isBase()) try self.asmRegisterMemory( if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup }, dst_reg.to128(), try src_mcv.mem(self, .{ .size = .qword }), @@ -17627,7 +18261,7 @@ fn airSplat(self: *CodeGen, inst: Air.Inst.Index) !void { 3...4 => if (self.hasFeature(.avx)) { const src_mcv = try self.resolveInst(ty_op.operand); const dst_reg = try self.register_manager.allocReg(inst, dst_rc); - if (src_mcv.isMemory()) try self.asmRegisterMemory( + if (src_mcv.isBase()) try self.asmRegisterMemory( .{ .v_sd, .broadcast }, dst_reg.to256(), try src_mcv.mem(self, .{ .size = .qword }), @@ -17670,7 +18304,7 @@ fn airSplat(self: *CodeGen, inst: Air.Inst.Index) !void { 2 => if (self.hasFeature(.avx)) { const src_mcv = try self.resolveInst(ty_op.operand); const dst_reg = try self.register_manager.allocReg(inst, dst_rc); - if (src_mcv.isMemory()) try self.asmRegisterMemory( + if (src_mcv.isBase()) try self.asmRegisterMemory( .{ .v_f128, .broadcast }, dst_reg.to256(), try src_mcv.mem(self, .{ .size = .xword }), @@ -17779,7 +18413,7 @@ fn airSelect(self: *CodeGen, inst: Air.Inst.Index) !void { else => unreachable, }, .broadcast }, mask_alias, - if (pred_mcv.isMemory()) try pred_mcv.mem(self, .{ .size = .byte }) else .{ + if (pred_mcv.isBase()) try pred_mcv.mem(self, .{ .size = .byte }) else .{ .base = .{ .reg = (try self.copyToTmpRegister( Type.usize, pred_mcv.address(), @@ -17973,7 +18607,7 @@ fn airSelect(self: *CodeGen, inst: Air.Inst.Index) !void { try self.genSetReg(dst_reg, ty, rhs_mcv, .{}); break :rhs dst_alias; }; - if (lhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryRegister( + if (lhs_mcv.isBase()) try self.asmRegisterRegisterMemoryRegister( mir_tag, dst_alias, rhs_alias, @@ -17989,7 +18623,7 @@ fn airSelect(self: *CodeGen, inst: Air.Inst.Index) !void { try self.copyToTmpRegister(ty, lhs_mcv), abi_size), mask_alias, ); - } else if (has_blend) if (lhs_mcv.isMemory()) try self.asmRegisterMemoryRegister( + } else if (has_blend) if (lhs_mcv.isBase()) try self.asmRegisterMemoryRegister( mir_tag, dst_alias, try lhs_mcv.mem(self, .{ .size = self.memSize(ty) }), @@ -18014,7 +18648,7 @@ fn airSelect(self: *CodeGen, inst: Air.Inst.Index) !void { }, }) orelse return self.fail("TODO implement airSelect for {}", .{ty.fmt(pt)}); try self.asmRegisterRegister(.{ mir_fixes, .@"and" }, dst_alias, mask_alias); - if (rhs_mcv.isMemory()) try self.asmRegisterMemory( + if (rhs_mcv.isBase()) try self.asmRegisterMemory( .{ mir_fixes, .andn }, mask_alias, try rhs_mcv.mem(self, .{ .size = .fromSize(abi_size) }), @@ -18093,10 +18727,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { for ([_]Mir.Inst.Tag{ .unpckl, .unpckh }) |variant| unpck: { if (elem_abi_size > 8) break :unpck; - if (dst_abi_size > @as(u32, if (if (elem_abi_size >= 4) - has_avx - else - self.hasFeature(.avx2)) 32 else 16)) break :unpck; + if (dst_abi_size > self.vectorSize(if (elem_abi_size >= 4) .float else .int)) break :unpck; var sources: [2]?u1 = @splat(null); for (mask_elems, 0..) |maybe_mask_elem, elem_index| { @@ -18154,7 +18785,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { }, else => unreachable, } }; - if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemory( + if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemory( mir_tag, dst_alias, registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), @@ -18167,7 +18798,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { rhs_mcv.getReg().? else try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), - ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemory( + ) else if (rhs_mcv.isBase()) try self.asmRegisterMemory( mir_tag, dst_alias, try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), @@ -18184,7 +18815,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { pshufd: { if (elem_abi_size != 4) break :pshufd; - if (max_abi_size > @as(u32, if (has_avx) 32 else 16)) break :pshufd; + if (max_abi_size > self.vectorSize(.float)) break :pshufd; var control: u8 = 0b00_00_00_00; var sources: [1]?u1 = @splat(null); @@ -18216,7 +18847,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { try self.register_manager.allocReg(inst, abi.RegisterClass.sse); const dst_alias = registerAlias(dst_reg, max_abi_size); - if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( + if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate( .{ if (has_avx) .vp_d else .p_d, .shuf }, dst_alias, try src_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), @@ -18235,7 +18866,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { shufps: { if (elem_abi_size != 4) break :shufps; - if (max_abi_size > @as(u32, if (has_avx) 32 else 16)) break :shufps; + if (max_abi_size > self.vectorSize(.float)) break :shufps; var control: u8 = 0b00_00_00_00; var sources: [2]?u1 = @splat(null); @@ -18272,7 +18903,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { const dst_reg = dst_mcv.getReg().?; const dst_alias = registerAlias(dst_reg, max_abi_size); - if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( .{ .v_ps, .shuf }, dst_alias, registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), @@ -18287,7 +18918,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { else try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), .u(control), - ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate( + ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate( .{ ._ps, .shuf }, dst_alias, try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), @@ -18306,7 +18937,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { shufpd: { if (elem_abi_size != 8) break :shufpd; - if (max_abi_size > @as(u32, if (has_avx) 32 else 16)) break :shufpd; + if (max_abi_size > self.vectorSize(.float)) break :shufpd; var control: u4 = 0b0_0_0_0; var sources: [2]?u1 = @splat(null); @@ -18339,7 +18970,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { const dst_reg = dst_mcv.getReg().?; const dst_alias = registerAlias(dst_reg, max_abi_size); - if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( .{ .v_pd, .shuf }, dst_alias, registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), @@ -18354,7 +18985,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { else try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), .u(control), - ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate( + ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate( .{ ._pd, .shuf }, dst_alias, try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }), @@ -18373,7 +19004,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { blend: { if (elem_abi_size < 2) break :blend; - if (dst_abi_size > @as(u32, if (has_avx) 32 else 16)) break :blend; + if (dst_abi_size > self.vectorSize(.float)) break :blend; if (!self.hasFeature(.sse4_1)) break :blend; var control: u8 = 0b0_0_0_0_0_0_0_0; @@ -18409,7 +19040,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { const rhs_mcv = try self.resolveInst(extra.b); const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse); - if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( .{ .vp_d, .blend }, registerAlias(dst_reg, dst_abi_size), registerAlias(lhs_reg, dst_abi_size), @@ -18461,7 +19092,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv); const dst_reg = dst_mcv.getReg().?; - if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( .{ .vp_w, .blend }, registerAlias(dst_reg, dst_abi_size), registerAlias(if (lhs_mcv.isRegister()) @@ -18482,7 +19113,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { else try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), .u(expanded_control), - ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate( + ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate( .{ .p_w, .blend }, registerAlias(dst_reg, dst_abi_size), try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), @@ -18518,7 +19149,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv); const dst_reg = dst_mcv.getReg().?; - if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( switch (elem_abi_size) { 4 => .{ .v_ps, .blend }, 8, 16 => .{ .v_pd, .blend }, @@ -18547,7 +19178,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { else try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), .u(expanded_control), - ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate( + ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate( switch (elem_abi_size) { 4 => .{ ._ps, .blend }, 8, 16 => .{ ._pd, .blend }, @@ -18573,10 +19204,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { } blendv: { - if (dst_abi_size > @as(u32, if (if (elem_abi_size >= 4) - has_avx - else - self.hasFeature(.avx2)) 32 else 16)) break :blendv; + if (dst_abi_size > self.vectorSize(if (elem_abi_size >= 4) .float else .int)) break :blendv; const select_mask_elem_ty = try pt.intType(.unsigned, elem_abi_size * 8); const select_mask_ty = try pt.vectorType(.{ @@ -18637,7 +19265,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { const dst_reg = dst_mcv.getReg().?; const dst_alias = registerAlias(dst_reg, dst_abi_size); - if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryRegister( + if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryRegister( mir_tag, dst_alias, if (lhs_mcv.isRegister()) @@ -18658,7 +19286,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { else try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), select_mask_alias, - ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryRegister( + ) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryRegister( mir_tag, dst_alias, try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), @@ -18701,7 +19329,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { else .p_; try self.asmRegisterRegister(.{ mir_fixes, .@"and" }, dst_alias, mask_alias); - if (lhs_mcv.isMemory()) try self.asmRegisterMemory( + if (lhs_mcv.isBase()) try self.asmRegisterMemory( .{ mir_fixes, .andn }, mask_alias, try lhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }), @@ -18851,22 +19479,61 @@ fn airReduce(self: *CodeGen, inst: Air.Inst.Index) !void { if (operand_ty.isVector(zcu) and operand_ty.childType(zcu).toIntern() == .bool_type) { try self.spillEflagsIfOccupied(); - const operand_mcv = try self.resolveInst(reduce.operand); - const mask_len = (std.math.cast(u6, operand_ty.vectorLen(zcu)) orelse - return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(pt)})); - const mask = (@as(u64, 1) << mask_len) - 1; const abi_size: u32 = @intCast(operand_ty.abiSize(zcu)); + const operand_mcv = try self.resolveInst(reduce.operand); + const mask_len = operand_ty.vectorLen(zcu); + const mask_len_minus_one = (std.math.cast(u6, mask_len - 1) orelse { + const acc_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); + const acc_lock = self.register_manager.lockRegAssumeUnused(acc_reg); + defer self.register_manager.unlockReg(acc_lock); + var limb_offset: i31 = 0; + while (limb_offset < abi_size) : (limb_offset += 8) { + try self.asmRegisterMemory( + .{ ._, if (limb_offset == 0) .mov else switch (reduce.operation) { + .Or => .@"or", + .And => .@"and", + else => return self.fail("TODO implement airReduce for {}", .{operand_ty.fmt(pt)}), + } }, + acc_reg.to64(), + try operand_mcv.mem(self, .{ + .size = .qword, + .disp = limb_offset, + }), + ); + } + switch (reduce.operation) { + .Or => { + try self.asmRegisterRegister(.{ ._, .@"test" }, acc_reg.to64(), acc_reg.to64()); + break :result .{ .eflags = .nz }; + }, + .And => { + try self.asmRegisterImmediate(.{ ._, .cmp }, acc_reg.to64(), .s(-1)); + break :result .{ .eflags = .z }; + }, + else => unreachable, + } + }); + const mask = @as(u64, std.math.maxInt(u64)) >> ~mask_len_minus_one; switch (reduce.operation) { .Or => { - if (operand_mcv.isMemory()) try self.asmMemoryImmediate( + if (operand_mcv.isBase()) try self.asmMemoryImmediate( .{ ._, .@"test" }, try operand_mcv.mem(self, .{ .size = .fromSize(abi_size) }), - .u(mask), - ) else { - const operand_reg = registerAlias(if (operand_mcv.isRegister()) - operand_mcv.getReg().? + if (mask_len < abi_size * 8) + .u(mask) else - try self.copyToTmpRegister(operand_ty, operand_mcv), abi_size); + .s(-1), + ) else { + const operand_reg = registerAlias(operand_reg: { + if (operand_mcv.isRegister()) { + const operand_reg = operand_mcv.getReg().?; + if (operand_reg.class() == .general_purpose) break :operand_reg operand_reg; + } + break :operand_reg try self.copyToTmpRegister(operand_ty, operand_mcv); + }, abi_size); + const operand_lock = self.register_manager.lockReg(operand_reg); + defer if (operand_lock) |lock| self.register_manager.unlockReg(lock); + if (mask_len < abi_size * 8) try self.asmRegisterImmediate( .{ ._, .@"test" }, operand_reg, @@ -18880,7 +19547,10 @@ fn airReduce(self: *CodeGen, inst: Air.Inst.Index) !void { break :result .{ .eflags = .nz }; }, .And => { - const tmp_reg = try self.copyToTmpRegister(operand_ty, operand_mcv); + const tmp_reg = registerAlias( + try self.copyToTmpRegister(operand_ty, operand_mcv), + abi_size, + ); const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); @@ -19184,7 +19854,7 @@ fn airMulAdd(self: *CodeGen, inst: Air.Inst.Index) !void { if (unused.isSet(0) and mcv.isRegister() and self.reuseOperand(inst, op, op_index, mcv.*)) { order[op_index] = 1; unused.unset(0); - } else if (unused.isSet(2) and mcv.isMemory()) { + } else if (unused.isSet(2) and mcv.isBase()) { order[op_index] = 3; unused.unset(2); } @@ -19531,7 +20201,7 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void { const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - if (self.hasFeature(.avx)) if (promote_mcv.isMemory()) try self.asmRegisterRegisterMemory( + if (self.hasFeature(.avx)) if (promote_mcv.isBase()) try self.asmRegisterRegisterMemory( .{ .v_ss, .cvtsd2 }, dst_reg, dst_reg, @@ -19544,7 +20214,7 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void { promote_mcv.getReg().? else try self.copyToTmpRegister(promote_ty, promote_mcv)).to128(), - ) else if (promote_mcv.isMemory()) try self.asmRegisterMemory( + ) else if (promote_mcv.isBase()) try self.asmRegisterMemory( .{ ._ss, .cvtsd2 }, dst_reg, try promote_mcv.mem(self, .{ .size = .qword }), @@ -19751,7 +20421,7 @@ fn resolveCallingConventionValues( // TODO: is this even possible for C calling convention? result.return_value = .init(.none); } else { - var ret_tracking: [2]InstTracking = undefined; + var ret_tracking: [4]InstTracking = undefined; var ret_tracking_i: usize = 0; const classes = switch (resolved_cc) { @@ -19771,14 +20441,17 @@ fn resolveCallingConventionValues( ret_tracking_i += 1; }, .sse, .float, .float_combine, .win_i128 => { - const ret_sse_reg = registerAlias( - abi.getCAbiSseReturnRegs(resolved_cc)[ret_sse_reg_i], - @intCast(ret_ty.abiSize(zcu)), - ); - ret_sse_reg_i += 1; + const ret_sse_regs = abi.getCAbiSseReturnRegs(resolved_cc); + const abi_size: u32 = @intCast(ret_ty.abiSize(zcu)); + const reg_size = @min(abi_size, self.vectorSize(.float)); + var byte_offset: u32 = 0; + while (byte_offset < abi_size) : (byte_offset += reg_size) { + const ret_sse_reg = registerAlias(ret_sse_regs[ret_sse_reg_i], reg_size); + ret_sse_reg_i += 1; - ret_tracking[ret_tracking_i] = .init(.{ .register = ret_sse_reg }); - ret_tracking_i += 1; + ret_tracking[ret_tracking_i] = .init(.{ .register = ret_sse_reg }); + ret_tracking_i += 1; + } }, .sseup => assert(ret_tracking[ret_tracking_i - 1].short.register.class() == .sse), .x87 => { @@ -19811,6 +20484,17 @@ fn resolveCallingConventionValues( ret_tracking[0].short.register, ret_tracking[1].short.register, } }), + 3 => .init(.{ .register_triple = .{ + ret_tracking[0].short.register, + ret_tracking[1].short.register, + ret_tracking[2].short.register, + } }), + 4 => .init(.{ .register_quadruple = .{ + ret_tracking[0].short.register, + ret_tracking[1].short.register, + ret_tracking[2].short.register, + ret_tracking[3].short.register, + } }), }; } @@ -19826,7 +20510,7 @@ fn resolveCallingConventionValues( else => unreachable, } - var arg_mcv: [2]MCValue = undefined; + var arg_mcv: [4]MCValue = undefined; var arg_mcv_i: usize = 0; const classes = switch (resolved_cc) { @@ -19834,15 +20518,13 @@ fn resolveCallingConventionValues( .x86_64_win => &.{abi.classifyWindows(ty, zcu)}, else => unreachable, }; - for (classes) |class| switch (class) { + classes: for (classes) |class| switch (class) { .integer => { const param_int_regs = abi.getCAbiIntParamRegs(resolved_cc); if (param_int_reg_i >= param_int_regs.len) break; - const param_int_reg = registerAlias( - abi.getCAbiIntParamRegs(resolved_cc)[param_int_reg_i], - @intCast(@min(ty.abiSize(zcu), 8)), - ); + const param_int_reg = + registerAlias(param_int_regs[param_int_reg_i], @intCast(@min(ty.abiSize(zcu), 8))); param_int_reg_i += 1; arg_mcv[arg_mcv_i] = .{ .register = param_int_reg }; @@ -19850,16 +20532,18 @@ fn resolveCallingConventionValues( }, .sse, .float, .float_combine => { const param_sse_regs = abi.getCAbiSseParamRegs(resolved_cc); - if (param_sse_reg_i >= param_sse_regs.len) break; + const abi_size: u32 = @intCast(ty.abiSize(zcu)); + const reg_size = @min(abi_size, self.vectorSize(.float)); + var byte_offset: u32 = 0; + while (byte_offset < abi_size) : (byte_offset += reg_size) { + if (param_sse_reg_i >= param_sse_regs.len) break :classes; - const param_sse_reg = registerAlias( - abi.getCAbiSseParamRegs(resolved_cc)[param_sse_reg_i], - @intCast(ty.abiSize(zcu)), - ); - param_sse_reg_i += 1; + const param_sse_reg = registerAlias(param_sse_regs[param_sse_reg_i], reg_size); + param_sse_reg_i += 1; - arg_mcv[arg_mcv_i] = .{ .register = param_sse_reg }; - arg_mcv_i += 1; + arg_mcv[arg_mcv_i] = .{ .register = param_sse_reg }; + arg_mcv_i += 1; + } }, .sseup => assert(arg_mcv[arg_mcv_i - 1].register.class() == .sse), .x87, .x87up, .complex_x87, .memory, .win_i128 => switch (resolved_cc) { @@ -19908,7 +20592,21 @@ fn resolveCallingConventionValues( arg.* = switch (arg_mcv_i) { else => unreachable, 1 => arg_mcv[0], - 2 => .{ .register_pair = .{ arg_mcv[0].register, arg_mcv[1].register } }, + 2 => .{ .register_pair = .{ + arg_mcv[0].register, + arg_mcv[1].register, + } }, + 3 => .{ .register_triple = .{ + arg_mcv[0].register, + arg_mcv[1].register, + arg_mcv[2].register, + } }, + 4 => .{ .register_quadruple = .{ + arg_mcv[0].register, + arg_mcv[1].register, + arg_mcv[2].register, + arg_mcv[3].register, + } }, }; continue; } @@ -20052,32 +20750,40 @@ fn memSize(self: *CodeGen, ty: Type) Memory.Size { }; } -fn splitType(self: *CodeGen, ty: Type) ![2]Type { +fn splitType(self: *CodeGen, comptime parts_len: usize, ty: Type) ![parts_len]Type { const pt = self.pt; const zcu = pt.zcu; + var parts: [parts_len]Type = undefined; + if (ty.isVector(zcu)) if (std.math.divExact(u32, ty.vectorLen(zcu), parts_len)) |vec_len| return .{ + try pt.vectorType(.{ .len = vec_len, .child = ty.scalarType(zcu).toIntern() }), + } ** parts_len else |err| switch (err) { + error.DivisionByZero => unreachable, + error.UnexpectedRemainder => {}, + }; const classes = std.mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target.*, .other), .none); - var parts: [2]Type = undefined; - if (classes.len == 2) for (&parts, classes, 0..) |*part, class, part_i| { + if (classes.len == parts_len) for (&parts, classes, 0..) |*part, class, part_i| { part.* = switch (class) { - .integer => switch (part_i) { - 0 => Type.u64, - 1 => part: { - const elem_size = ty.abiAlignment(zcu).minStrict(.@"8").toByteUnits().?; - const elem_ty = try pt.intType(.unsigned, @intCast(elem_size * 8)); - break :part switch (@divExact(ty.abiSize(zcu) - 8, elem_size)) { - 1 => elem_ty, - else => |len| try pt.arrayType(.{ .len = len, .child = elem_ty.toIntern() }), - }; - }, - else => unreachable, + .integer => if (part_i < parts_len - 1) + Type.u64 + else part: { + const elem_size = ty.abiAlignment(zcu).minStrict(.@"8").toByteUnits().?; + const elem_ty = try pt.intType(.unsigned, @intCast(elem_size * 8)); + break :part switch (@divExact(ty.abiSize(zcu) - part_i * 8, elem_size)) { + 1 => elem_ty, + else => |array_len| try pt.arrayType(.{ .len = array_len, .child = elem_ty.toIntern() }), + }; }, .float => Type.f32, .float_combine => try pt.arrayType(.{ .len = 2, .child = .f32_type }), .sse => Type.f64, else => break, }; - } else if (parts[0].abiSize(zcu) + parts[1].abiSize(zcu) == ty.abiSize(zcu)) return parts; - return self.fail("TODO implement splitType for {}", .{ty.fmt(pt)}); + } else { + var part_sizes: u64 = 0; + for (parts) |part| part_sizes += part.abiSize(zcu); + if (part_sizes == ty.abiSize(zcu)) return parts; + }; + return self.fail("TODO implement splitType({d}, {})", .{ parts_len, ty.fmt(pt) }); } /// Truncates the value in the register in place. @@ -20297,6 +21003,7 @@ const Temp = struct { .immediate, .eflags, .register_offset, + .register_mask, .memory, .load_symbol, .lea_symbol, @@ -20314,6 +21021,8 @@ const Temp = struct { => false, .register, .register_pair, + .register_triple, + .register_quadruple, .register_overflow, => true, .load_frame => |frame_addr| !frame_addr.index.isNamed(), @@ -20330,7 +21039,7 @@ const Temp = struct { cg.temp_type[@intFromEnum(new_temp_index)] = Type.usize; cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); switch (temp.tracking(cg).short) { - else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }), + else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), .register => |reg| { const new_reg = try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); @@ -20434,7 +21143,7 @@ const Temp = struct { const new_temp_index = cg.next_temp_index; cg.temp_type[@intFromEnum(new_temp_index)] = Type.usize; switch (temp.tracking(cg).short) { - else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }), + else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), .immediate => |imm| { assert(limb_index == 0); new_temp_index.tracking(cg).* = .init(.{ .immediate = imm }); @@ -20635,6 +21344,17 @@ const Temp = struct { first_temp.* = result_temp; } + fn asMask(temp: Temp, kind: MaskKind, inverted: bool, scalar: Memory.Size, cg: *CodeGen) void { + assert(scalar != .none); + const mcv = &temp.unwrap(cg).temp.tracking(cg).short; + mcv.* = .{ .register_mask = .{ + .reg = mcv.register, + .kind = kind, + .inverted = inverted, + .scalar = scalar, + } }; + } + fn toLea(temp: *Temp, cg: *CodeGen) !bool { switch (temp.tracking(cg).short) { .none, @@ -20643,7 +21363,10 @@ const Temp = struct { .undef, .eflags, .register_pair, + .register_triple, + .register_quadruple, .register_overflow, + .register_mask, .elementwise_regs_then_frame, .reserved_frame, .air_ref, @@ -20677,10 +21400,7 @@ const Temp = struct { fn toBase(temp: *Temp, cg: *CodeGen) !bool { const temp_tracking = temp.tracking(cg); - switch (temp_tracking.short) { - else => {}, - .indirect, .load_frame => return false, - } + if (temp_tracking.short.isBase()) return false; const new_temp_index = cg.next_temp_index; cg.temp_type[@intFromEnum(new_temp_index)] = temp.typeOf(cg); const new_reg = @@ -20697,7 +21417,7 @@ const Temp = struct { const val_abi_size: u32 = @intCast(val_ty.abiSize(cg.pt.zcu)); const val = try cg.tempAlloc(val_ty); switch (val.tracking(cg).short) { - else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }), + else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), .register => |val_reg| { while (try ptr.toLea(cg)) {} switch (val_reg.class()) { @@ -20706,7 +21426,7 @@ const Temp = struct { registerAlias(val_reg, val_abi_size), try ptr.tracking(cg).short.deref().mem(cg, .{ .size = cg.memSize(val_ty) }), ), - else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }), + else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), } }, .load_frame => |val_frame_addr| { @@ -20724,7 +21444,7 @@ const Temp = struct { const val_ty = val.typeOf(cg); const val_abi_size: u32 = @intCast(val_ty.abiSize(cg.pt.zcu)); val: switch (val.tracking(cg).short) { - else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }), + else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), .immediate => |imm| if (std.math.cast(i32, imm)) |s| { while (try ptr.toLea(cg)) {} try cg.asmMemoryImmediate( @@ -20742,7 +21462,7 @@ const Temp = struct { try ptr.tracking(cg).short.deref().mem(cg, .{ .size = cg.memSize(val_ty) }), registerAlias(val_reg, val_abi_size), ), - else => |tag| std.debug.panic("{s}: {any}\n", .{ @src().fn_name, tag }), + else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), } }, } @@ -20876,6 +21596,7 @@ fn reuseTemp( .register_pair, .register_offset, .register_overflow, + .register_mask, .indirect, => for (tracking.short.getRegs()) |tracked_reg| { if (RegisterManager.indexOfRegIntoTracked(tracked_reg)) |tracked_index| { @@ -20913,6 +21634,26 @@ fn tempAllocReg(cg: *CodeGen, ty: Type, rc: RegisterManager.RegisterBitSet) !Tem return .{ .index = temp_index.toIndex() }; } +fn tempAllocRegPair(cg: *CodeGen, ty: Type, rc: RegisterManager.RegisterBitSet) !Temp { + const temp_index = cg.next_temp_index; + temp_index.tracking(cg).* = .init( + .{ .register_pair = try cg.register_manager.allocRegs(2, temp_index.toIndex(), rc) }, + ); + cg.temp_type[@intFromEnum(temp_index)] = ty; + cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); + return .{ .index = temp_index.toIndex() }; +} + +fn tempAllocMem(cg: *CodeGen, ty: Type) !Temp { + const temp_index = cg.next_temp_index; + temp_index.tracking(cg).* = .init( + try cg.allocRegOrMemAdvanced(ty, temp_index.toIndex(), false), + ); + cg.temp_type[@intFromEnum(temp_index)] = ty; + cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); + return .{ .index = temp_index.toIndex() }; +} + fn tempFromValue(cg: *CodeGen, ty: Type, value: MCValue) !Temp { const temp_index = cg.next_temp_index; temp_index.tracking(cg).* = .init(value); @@ -20993,30 +21734,23 @@ const Operand = union(enum) { inst: Mir.Inst.Index, }; -const SelectLoop = struct { - element_reloc: Mir.Inst.Index, - element_offset: union(enum) { - unused, - known: u31, - temp: Temp, - }, - element_size: ?u13, - limb_reloc: Mir.Inst.Index, - limb_offset: union(enum) { - unused, - known: u31, - temp: Temp, - }, - limb_size: ?u8, - remaining_size: ?u64, -}; - const Pattern = struct { ops: []const Op, commute: struct { u8, u8 } = .{ 0, 0 }, const Set = struct { required_features: []const std.Target.x86.Feature = &.{}, + scalar: union(enum) { + any, + bool, + float: Memory.Size, + any_int: Memory.Size, + signed_int: Memory.Size, + unsigned_int: Memory.Size, + any_float_or_int: Memory.Size, + } = .any, + clobbers: struct { eflags: bool = false } = .{}, + invert_result: bool = false, loop: enum { /// only execute the instruction once once, @@ -21050,6 +21784,18 @@ const Pattern = struct { xmm, /// any 256-bit sse register ymm, + /// a 64-bit mmx register mask + mm_mask, + /// a 128-bit sse register mask + xmm_mask, + /// a 256-bit sse register mask + ymm_mask, + /// a 64-bit mmx register sign mask + mm_sign_mask, + /// a 128-bit sse register sign mask + xmm_sign_mask, + /// a 256-bit sse register sign mask + ymm_sign_mask, /// any memory mem, /// a limb stored in a gpr @@ -21062,41 +21808,82 @@ const Pattern = struct { ymm_limb, /// a limb stored in memory mem_limb, + /// a limb stored in a 64-bit mmx register mask + mm_mask_limb, + /// a limb stored in a 128-bit sse register masuk + xmm_mask_limb, + /// a limb stored in a 256-bit sse register masuk + ymm_mask_limb, /// specific immediate imm: i8, /// any immediate signed extended from 32 bits simm32, + /// a temp general purpose register containing all ones + umax_gpr, + /// a temp 64-bit mmx register containing all ones + umax_mm, + /// a temp 128-bit sse register containing all ones + umax_xmm, + /// a temp 256-bit sse register containing all ones + umax_ymm, fn matches(op: Op, is_mut: bool, temp: Temp, cg: *CodeGen) bool { - const abi_size = temp.typeOf(cg).abiSize(cg.pt.zcu); - return switch (op) { + switch (op) { .implicit, .explicit => unreachable, + else => {}, + // temp is undefined + .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => return true, + } + const temp_ty = temp.typeOf(cg); + const abi_size = temp_ty.abiSize(cg.pt.zcu); + return switch (op) { + .implicit, .explicit, .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => unreachable, .gpr => abi_size <= 8 and switch (temp.tracking(cg).short) { .register => |reg| reg.class() == .general_purpose, .register_offset => |reg_off| reg_off.reg.class() == .general_purpose and reg_off.off == 0, - else => cg.regClassForType(temp.typeOf(cg)) == .general_purpose, + else => cg.regClassForType(temp_ty) == .general_purpose, }, - .mm => abi_size <= 8 and switch (temp.tracking(cg).short) { + .mm, .mm_mask, .mm_sign_mask => abi_size <= 8 and switch (temp.tracking(cg).short) { .register => |reg| reg.class() == .mmx, .register_offset => |reg_off| reg_off.reg.class() == .mmx and reg_off.off == 0, - else => cg.regClassForType(temp.typeOf(cg)) == .mmx, + else => cg.regClassForType(temp_ty) == .mmx, }, - .xmm => abi_size > 8 and abi_size <= 16 and switch (temp.tracking(cg).short) { + .xmm, .xmm_mask, .xmm_sign_mask => abi_size > 8 and abi_size <= 16 and switch (temp.tracking(cg).short) { .register => |reg| reg.class() == .sse, .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0, - else => cg.regClassForType(temp.typeOf(cg)) == .sse, + else => cg.regClassForType(temp_ty) == .sse, }, - .ymm => abi_size > 16 and abi_size <= 32 and switch (temp.tracking(cg).short) { + .ymm, .ymm_mask, .ymm_sign_mask => abi_size > 16 and abi_size <= 32 and switch (temp.tracking(cg).short) { .register => |reg| reg.class() == .sse, .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0, - else => cg.regClassForType(temp.typeOf(cg)) == .sse, + else => cg.regClassForType(temp_ty) == .sse, }, .mem, .mem_limb => (!is_mut or temp.isMut(cg)) and temp.tracking(cg).short.isMemory(), - .gpr_limb => abi_size > 8, - .mm_limb => abi_size > 8 and (!is_mut or temp.isMut(cg)) and temp.tracking(cg).short.isMemory() and cg.regClassForType(temp.typeOf(cg)) == .mmx, - .xmm_limb => abi_size > 16 and (!is_mut or temp.isMut(cg)) and temp.tracking(cg).short.isMemory(), - .ymm_limb => abi_size > 32 and (!is_mut or temp.isMut(cg)) and temp.tracking(cg).short.isMemory(), + .gpr_limb => abi_size > 8 and switch (temp.tracking(cg).short) { + inline .register_pair, .register_triple, .register_quadruple => |regs| for (regs) |reg| { + if (reg.class() != .general_purpose) break false; + } else true, + else => |mcv| mcv.isMemory(), + }, + .mm_limb, .mm_mask_limb => abi_size > 8 and switch (temp.tracking(cg).short) { + inline .register_pair, .register_triple, .register_quadruple => |regs| for (regs) |reg| { + if (reg.class() != .mmx) break false; + } else true, + else => |mcv| mcv.isMemory() and cg.regClassForType(temp_ty) == .mmx, + }, + .xmm_limb, .xmm_mask_limb => abi_size > 16 and switch (temp.tracking(cg).short) { + inline .register_pair, .register_triple, .register_quadruple => |regs| for (regs) |reg| { + if (reg.class() != .sse) break false; + } else true, + else => |mcv| mcv.isMemory(), + }, + .ymm_limb, .ymm_mask_limb => abi_size > 32 and switch (temp.tracking(cg).short) { + inline .register_pair, .register_triple, .register_quadruple => |regs| for (regs) |reg| { + if (reg.class() != .sse) break false; + } else true, + else => |mcv| mcv.isMemory(), + }, .imm => |specific_imm| if (is_mut) unreachable else switch (temp.tracking(cg).short) { .immediate => |imm| @as(i64, @bitCast(imm)) == specific_imm, else => false, @@ -21109,78 +21896,176 @@ const Pattern = struct { } }; }; -fn select(cg: *CodeGen, dst_temps: []Temp, src_temps: []const *Temp, pattern_sets: []const Pattern.Set) !void { - var loop: SelectLoop = .{ +const SelectOptions = struct { + invert_result: bool = false, +}; +fn select( + cg: *CodeGen, + dst_temps: []Temp, + dst_tys: []const Type, + src_temps: []Temp, + pattern_sets: []const Pattern.Set, + opts: SelectOptions, +) !void { + var loop: struct { + element_reloc: Mir.Inst.Index, + element_offset: Offset, + element_size: ?u13, + limb_reloc: Mir.Inst.Index, + limb_offset: Offset, + limb_size: ?u8, + mask_limb_temp: ?Temp, + mask_limb_offset: Offset, + mask_limb_offset_lock: ?RegisterLock, + mask_limb_bit_size: ?u7, + mask_store_temp: ?Temp, + mask_store_reg: ?Register, + mask_store_bit_size: ?u7, + remaining_size: ?u64, + + const Offset = union(enum) { + unused, + known: u31, + temp: Temp, + }; + } = .{ .element_reloc = undefined, .element_offset = .unused, .element_size = null, .limb_reloc = undefined, .limb_offset = .unused, .limb_size = null, + .mask_limb_temp = null, + .mask_limb_offset = .unused, + .mask_limb_offset_lock = null, + .mask_limb_bit_size = null, + .mask_store_temp = null, + .mask_store_reg = null, + .mask_store_bit_size = null, .remaining_size = null, }; var extra_temps: [4]?Temp = @splat(null); pattern_sets: for (pattern_sets) |pattern_set| { for (pattern_set.required_features) |required_feature| if (!cg.hasFeature(required_feature)) continue :pattern_sets; + for (src_temps) |src_temp| switch (pattern_set.scalar) { + .any => {}, + .bool => if (src_temp.typeOf(cg).scalarType(cg.pt.zcu).toIntern() != .bool_type) continue :pattern_sets, + .float => |size| { + const scalar_ty = src_temp.typeOf(cg).scalarType(cg.pt.zcu); + if (!scalar_ty.isRuntimeFloat()) continue :pattern_sets; + if (scalar_ty.floatBits(cg.target.*) != size.bitSize()) continue :pattern_sets; + }, + .any_int => |size| { + const scalar_ty = src_temp.typeOf(cg).scalarType(cg.pt.zcu); + if (!scalar_ty.isAbiInt(cg.pt.zcu)) continue :pattern_sets; + if (scalar_ty.intInfo(cg.pt.zcu).bits > size.bitSize()) continue :pattern_sets; + }, + .signed_int => |size| { + const scalar_ty = src_temp.typeOf(cg).scalarType(cg.pt.zcu); + if (!scalar_ty.isAbiInt(cg.pt.zcu)) continue :pattern_sets; + const scalar_info = scalar_ty.intInfo(cg.pt.zcu); + if (scalar_info.signedness != .signed) continue :pattern_sets; + if (scalar_info.bits > size.bitSize()) continue :pattern_sets; + }, + .unsigned_int => |size| { + const scalar_ty = src_temp.typeOf(cg).scalarType(cg.pt.zcu); + if (!scalar_ty.isAbiInt(cg.pt.zcu)) continue :pattern_sets; + const scalar_info = scalar_ty.intInfo(cg.pt.zcu); + if (scalar_info.signedness != .unsigned) continue :pattern_sets; + if (scalar_info.bits > size.bitSize()) continue :pattern_sets; + }, + .any_float_or_int => |size| { + const scalar_ty = src_temp.typeOf(cg).scalarType(cg.pt.zcu); + if (scalar_ty.isRuntimeFloat()) { + if (scalar_ty.floatBits(cg.target.*) != size.bitSize()) continue :pattern_sets; + } else if (scalar_ty.isAbiInt(cg.pt.zcu)) { + if (scalar_ty.intInfo(cg.pt.zcu).bits > size.bitSize()) continue :pattern_sets; + } else continue :pattern_sets; + }, + }; patterns: for (pattern_set.patterns) |pattern| { for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| { const ref_src_op, const is_mut = switch (src_op) { - .implicit, .explicit => |op_index| .{ pattern.ops[op_index], true }, + .implicit, .explicit => |linked_index| .{ pattern.ops[linked_index], true }, else => .{ src_op, false }, }; - if (!ref_src_op.matches(is_mut, src_temp.*, cg)) continue :patterns; + if (!ref_src_op.matches(is_mut, src_temp, cg)) continue :patterns; } - while (true) for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| { - if (switch (switch (src_op) { - .implicit, .explicit => |op_index| pattern.ops[op_index], + + while (true) for (src_temps, pattern.ops[dst_temps.len..]) |*src_temp, src_op| { + if (changed: switch (switch (src_op) { + .implicit, .explicit => |linked_index| pattern.ops[linked_index], else => src_op, }) { .implicit, .explicit => unreachable, .gpr => try src_temp.toRegClass(.general_purpose, cg), - .mm => try src_temp.toRegClass(.mmx, cg), - .xmm, .ymm => try src_temp.toRegClass(.sse, cg), - .mem, .imm, .simm32 => false, - .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb, .mem_limb => switch (src_temp.tracking(cg).short) { - .register_pair => false, + .mm, .mm_mask, .mm_sign_mask => try src_temp.toRegClass(.mmx, cg), + .xmm, + .ymm, + .xmm_mask, + .ymm_mask, + .xmm_sign_mask, + .ymm_sign_mask, + => try src_temp.toRegClass(.sse, cg), + .mem => try src_temp.toBase(cg), + .imm, .simm32 => false, + .gpr_limb, + .mm_limb, + .xmm_limb, + .ymm_limb, + .mem_limb, + => switch (src_temp.tracking(cg).short) { + .register_pair, .register_triple, .register_quadruple => false, else => try src_temp.toBase(cg), }, + .mm_mask_limb, + .xmm_mask_limb, + .ymm_mask_limb, + => if (!cg.hasFeature(.bmi2) and !cg.register_manager.isKnownRegFree(.rcx)) { + try cg.register_manager.getKnownReg(.rcx, null); + loop.mask_limb_offset_lock = cg.register_manager.lockKnownRegAssumeUnused(.rcx); + break :changed true; + } else false, + .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => false, }) break; } else break; + + var dst_is_linked: std.StaticBitSet(4) = .initEmpty(); var mir_ops_len = dst_temps.len; for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| { - const ref_src_op, const extra_temp = op: switch (src_op) { - .implicit => |op_index| { - dst_temps[op_index] = if (src_temp.isMut(cg)) - src_temp.* - else - try cg.tempAlloc(src_temp.typeOf(cg)); - break :op .{ pattern.ops[op_index], &extra_temps[op_index] }; - }, - .explicit => |op_index| { - dst_temps[op_index] = if (src_temp.isMut(cg)) - src_temp.* - else - try cg.tempAlloc(src_temp.typeOf(cg)); - defer mir_ops_len += 1; - break :op .{ pattern.ops[op_index], &extra_temps[mir_ops_len] }; - }, - else => { - defer mir_ops_len += 1; - break :op .{ src_op, &extra_temps[mir_ops_len] }; + defer mir_ops_len += @intFromBool(src_op != .implicit); + const linked_src_op, const extra_temp = op: switch (src_op) { + .implicit, .explicit => |linked_index| { + if (src_temp.isMut(cg)) { + dst_temps[linked_index] = src_temp; + dst_is_linked.set(linked_index); + } + break :op .{ pattern.ops[linked_index], &extra_temps[linked_index] }; }, + else => .{ src_op, &extra_temps[mir_ops_len] }, }; - const limb_size: u8, const rc = switch (ref_src_op) { + const limb_size: u8, const rc = switch (linked_src_op) { else => continue, .gpr_limb => .{ 8, abi.RegisterClass.gp }, - .mm_limb => .{ 8, @panic("TODO") }, - .xmm_limb => .{ 16, abi.RegisterClass.sse }, - .ymm_limb => .{ 32, abi.RegisterClass.sse }, + .mm_limb, .mm_mask_limb => .{ 8, @panic("TODO") }, + .xmm_limb, .xmm_mask_limb => .{ 16, abi.RegisterClass.sse }, + .ymm_limb, .ymm_mask_limb => .{ 32, abi.RegisterClass.sse }, + .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => { + assert(extra_temp.* == null); + extra_temp.* = try cg.tempAllocReg(Type.usize, switch (linked_src_op) { + else => unreachable, + .umax_gpr => abi.RegisterClass.gp, + .umax_mm => @panic("TODO"), + .umax_xmm, .umax_ymm => abi.RegisterClass.sse, + }); + continue; + }, }; assert(loop.limb_size == null or loop.limb_size == limb_size); loop.limb_size = limb_size; loop.remaining_size = loop.remaining_size orelse src_temp.typeOf(cg).abiSize(cg.pt.zcu); switch (src_temp.tracking(cg).short) { - .register_pair => switch (loop.limb_offset) { + .register_pair, .register_triple, .register_quadruple => switch (loop.limb_offset) { .unused, .temp => loop.limb_offset = .{ .known = 0 }, .known => {}, }, @@ -21189,25 +22074,130 @@ fn select(cg: *CodeGen, dst_temps: []Temp, src_temps: []const *Temp, pattern_set .unused => loop.limb_offset = .{ .temp = undefined }, .known, .temp => {}, } + assert(extra_temp.* == null); extra_temp.* = try cg.tempAllocReg(Type.usize, rc); }, } } + for ( + 0.., + dst_temps, + pattern.ops[0..dst_temps.len], + dst_tys, + extra_temps[0..dst_temps.len], + ) |dst_index, *dst_temp, dst_op, dst_ty, *extra_temp| switch (dst_op) { + else => if (!dst_is_linked.isSet(dst_index)) { + dst_temp.* = dst_temp: switch (dst_op) { + .implicit => unreachable, + .explicit => |linked_index| dst_temps[linked_index], + .gpr => try cg.tempAllocReg(dst_ty, abi.RegisterClass.gp), + .mm, .mm_mask, .mm_sign_mask => @panic("TODO"), + .xmm, .xmm_mask, .xmm_sign_mask => try cg.tempAllocReg(dst_ty, abi.RegisterClass.sse), + .ymm, .ymm_mask, .ymm_sign_mask => try cg.tempAllocReg(dst_ty, abi.RegisterClass.sse), + .mem => @panic("TODO"), + .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb => { + if (extra_temp.* == null) extra_temp.* = try cg.tempAllocReg(Type.usize, switch (dst_op) { + else => unreachable, + .gpr_limb => abi.RegisterClass.gp, + .mm_limb => @panic("TODO"), + .xmm_limb, .ymm_limb => abi.RegisterClass.sse, + }); + break :dst_temp try cg.tempAlloc(dst_ty); + }, + .mem_limb => try cg.tempAlloc(dst_ty), + .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => unreachable, // already checked + .imm, .simm32, .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => unreachable, // unmodifiable destination + }; + }, + .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => { + const scalar_size = @divExact(switch (pattern_set.scalar) { + .any, .bool => unreachable, + .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size, + }.bitSize(), 8); + const mask_bit_size = @divExact(loop.remaining_size.?, scalar_size); + const mask_limb_bit_size: u7 = @intCast(@divExact(loop.limb_size.?, scalar_size)); + assert(loop.mask_limb_bit_size == null or loop.mask_limb_bit_size == mask_limb_bit_size); + loop.mask_limb_bit_size = mask_limb_bit_size; + const mask_store_bit_size = mask_store_bit_size: { + // Try to match limb size so that no shifting will be needed. + if (mask_limb_bit_size % 8 == 0) break :mask_store_bit_size mask_limb_bit_size; + // If abi size <= 8 the entire value can be stored at once, + // enabling store forwarding and minimizing store buffer usage. + // Otherwise, we will be performing shifts that need to wrap at + // store size, which for x86 requires 32 or 64, so just pick 64 + // for the same reasons as above. + break :mask_store_bit_size @min(mask_bit_size, 64); + }; + assert(loop.mask_store_bit_size == null or loop.mask_store_bit_size == mask_store_bit_size); + loop.mask_store_bit_size = mask_store_bit_size; + loop.mask_limb_offset = loop.limb_offset; + if (extra_temp.* == null) extra_temp.* = try cg.tempAllocReg(Type.usize, switch (dst_op) { + else => unreachable, + .mm_mask_limb => @panic("TODO"), + .xmm_mask_limb, .ymm_mask_limb => abi.RegisterClass.sse, + }); + if (loop.mask_limb_temp == null) loop.mask_limb_temp = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp); + if (mask_limb_bit_size < mask_store_bit_size and loop.mask_store_reg == null) { + loop.mask_store_temp = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp); + loop.mask_store_reg = loop.mask_store_temp.?.tracking(cg).short.register; + } + dst_temp.* = if (mask_store_bit_size < mask_bit_size) + try cg.tempAllocMem(dst_ty) + else if (loop.mask_store_temp) |mask_store_temp| dst_temp: { + loop.mask_store_temp = null; + break :dst_temp mask_store_temp; + } else try cg.tempAlloc(dst_ty); + }, + }; + switch (loop.mask_limb_offset) { + .unused, .known => {}, + .temp => |*mask_limb_offset| { + if (cg.hasFeature(.bmi2)) { + assert(loop.mask_limb_offset_lock == null); + mask_limb_offset.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp); + } else { + if (loop.mask_limb_offset_lock) |lock| cg.register_manager.unlockReg(lock); + loop.mask_limb_offset_lock = null; + mask_limb_offset.* = try cg.tempFromValue(Type.usize, .{ .register = .rcx }); + } + if (loop.mask_store_reg) |mask_store_reg| { + const mask_store_alias = registerAlias( + mask_store_reg, + @min(std.math.divCeil(u7, loop.mask_store_bit_size.?, 8) catch unreachable, 4), + ); + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterRegister(.{ ._, .xor }, mask_store_alias, mask_store_alias); + } + }, + } switch (loop.element_offset) { .unused, .known => {}, .temp => |*element_offset| { element_offset.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp); const element_offset_reg = element_offset.tracking(cg).short.register; + try cg.spillEflagsIfOccupied(); try cg.asmRegisterRegister(.{ ._, .xor }, element_offset_reg.to32(), element_offset_reg.to32()); loop.element_reloc = @intCast(cg.mir_instructions.len); }, } + switch (loop.limb_offset) { + .unused, .known => {}, + .temp => |*limb_offset| limb_offset.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp), + } while (true) { + switch (loop.mask_limb_offset) { + .unused, .known => {}, + .temp => |mask_limb_offset| { + const mask_limb_offset_reg = mask_limb_offset.tracking(cg).short.register.to32(); + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterRegister(.{ ._, .xor }, mask_limb_offset_reg, mask_limb_offset_reg); + }, + } switch (loop.limb_offset) { .unused, .known => {}, - .temp => |*limb_offset| { - limb_offset.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp); - const limb_offset_reg = limb_offset.tracking(cg).short.register; + .temp => |limb_offset| { + const limb_offset_reg = limb_offset.tracking(cg).short.register.to32(); + try cg.spillEflagsIfOccupied(); try cg.asmRegisterRegister(.{ ._, .xor }, limb_offset_reg.to32(), limb_offset_reg.to32()); loop.limb_reloc = @intCast(cg.mir_instructions.len); }, @@ -21216,56 +22206,67 @@ fn select(cg: *CodeGen, dst_temps: []Temp, src_temps: []const *Temp, pattern_set var mir_ops: [4]Operand = @splat(.none); mir_ops_len = dst_temps.len; for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| { - const mir_op, const ref_src_op, const extra_temp = op: switch (src_op) { - .implicit => |op_index| .{ &mir_ops[op_index], pattern.ops[op_index], &extra_temps[op_index] }, - .explicit => |op_index| { - defer mir_ops_len += 1; - break :op .{ &mir_ops[mir_ops_len], pattern.ops[op_index], &extra_temps[mir_ops_len] }; - }, - else => { - defer mir_ops_len += 1; - break :op .{ &mir_ops[mir_ops_len], src_op, &extra_temps[mir_ops_len] }; - }, + defer mir_ops_len += @intFromBool(src_op != .implicit); + const mir_op, const linked_src_op, const extra_temp = switch (src_op) { + .implicit => |linked_index| .{ &mir_ops[linked_index], pattern.ops[linked_index], extra_temps[linked_index] }, + .explicit => |linked_index| .{ &mir_ops[mir_ops_len], pattern.ops[linked_index], extra_temps[linked_index] }, + else => .{ &mir_ops[mir_ops_len], src_op, extra_temps[mir_ops_len] }, }; - const src_mcv = src_temp.tracking(cg).short; - switch (ref_src_op) { - else => {}, - .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb => switch (src_mcv) { - .register_pair => {}, - else => try cg.asmRegisterMemory( - switch (ref_src_op) { - else => unreachable, - .gpr_limb => .{ ._, .mov }, - .mm_limb => .{ ._q, .mov }, - .xmm_limb, .ymm_limb => .{ if (cg.hasFeature(.avx)) .v_ else ._, .movdqu }, + const src_mcv = switch (linked_src_op) { + else => src_temp, + // src_temp is undefined + .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => extra_temp.?, + }.tracking(cg).short; + copy_limb: switch (src_mcv) { + .register_pair, .register_triple, .register_quadruple => {}, + else => try cg.asmRegisterMemory( + switch (linked_src_op) { + else => break :copy_limb, + .gpr_limb => .{ ._, .mov }, + .mm_limb, .mm_mask_limb => .{ ._q, .mov }, + .xmm_limb, + .ymm_limb, + .xmm_mask_limb, + .ymm_mask_limb, + => .{ if (cg.hasFeature(.avx)) .v_ else ._, .movdqu }, + }, + registerAlias(extra_temp.?.tracking(cg).short.register, loop.limb_size.?), + try src_mcv.mem(cg, switch (loop.limb_offset) { + .unused => unreachable, + .known => |limb_offset| .{ + .size = .fromSize(loop.limb_size.?), + .disp = limb_offset, }, - registerAlias(extra_temp.*.?.tracking(cg).short.register, loop.limb_size.?), - try src_mcv.mem(cg, switch (loop.limb_offset) { - .unused => unreachable, - .known => |limb_offset| .{ - .size = .fromSize(loop.limb_size.?), - .disp = limb_offset, - }, - .temp => |limb_offset| .{ - .size = .fromSize(loop.limb_size.?), - .index = limb_offset.tracking(cg).short.register.to64(), - }, - }), - ), - }, + .temp => |limb_offset| .{ + .size = .fromSize(loop.limb_size.?), + .index = limb_offset.tracking(cg).short.register.to64(), + }, + }), + ), } - mir_op.* = switch (ref_src_op) { + mir_op.* = switch (linked_src_op) { .implicit, .explicit => unreachable, .gpr => .{ .reg = registerAlias( src_mcv.register, @intCast(src_temp.typeOf(cg).abiSize(cg.pt.zcu)), ) }, - .mm => .{ .reg = src_mcv.register }, - .xmm => .{ .reg = src_mcv.register.to128() }, - .ymm => .{ .reg = src_mcv.register.to256() }, + .umax_gpr => .{ .reg = src_mcv.register.to64() }, // TODO: use other op size? + .mm, .mm_mask, .mm_sign_mask, .umax_mm => .{ .reg = src_mcv.register }, + .xmm, .xmm_mask, .xmm_sign_mask, .umax_xmm => .{ .reg = src_mcv.register.to128() }, + .ymm, .ymm_mask, .ymm_sign_mask, .umax_ymm => .{ .reg = src_mcv.register.to256() }, .mem => .{ .mem = try src_mcv.mem(cg, .{ .size = cg.memSize(src_temp.typeOf(cg)) }) }, - .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb => switch (src_mcv) { - .register_pair => |src_regs| switch (loop.limb_offset) { + .gpr_limb, + .mm_limb, + .xmm_limb, + .ymm_limb, + .mm_mask_limb, + .xmm_mask_limb, + .ymm_mask_limb, + => switch (src_mcv) { + inline .register_pair, + .register_triple, + .register_quadruple, + => |src_regs| switch (loop.limb_offset) { .unused => unreachable, .known => |limb_offset| .{ .reg = registerAlias( src_regs[@divExact(limb_offset, loop.limb_size.?)], @@ -21274,12 +22275,12 @@ fn select(cg: *CodeGen, dst_temps: []Temp, src_temps: []const *Temp, pattern_set .temp => unreachable, }, else => .{ .reg = registerAlias( - extra_temp.*.?.tracking(cg).short.register, + extra_temp.?.tracking(cg).short.register, loop.limb_size.?, ) }, }, .mem_limb => .{ .mem = switch (src_mcv) { - .register_pair => unreachable, + .register_pair, .register_triple, .register_quadruple => unreachable, else => switch (loop.limb_offset) { .unused => unreachable, .known => |limb_offset| try src_mcv.mem(cg, .{ @@ -21309,76 +22310,53 @@ fn select(cg: *CodeGen, dst_temps: []Temp, src_temps: []const *Temp, pattern_set .u(@as(u32, @intCast(src_mcv.immediate))) }, }, }; - } - for ( - pattern.ops[0..dst_temps.len], - dst_temps, - mir_ops[0..dst_temps.len], - extra_temps[0..dst_temps.len], - ) |dst_op, *dst_temp, *mir_op, *extra_temp| { - if (mir_op.* != .none) continue; - const ty = src_temps[0].typeOf(cg); - switch (dst_op) { - .implicit => unreachable, - .explicit => |op_index| { - dst_temp.* = dst_temps[op_index]; - mir_op.* = mir_ops[op_index]; - }, - .gpr => { - dst_temp.* = try cg.tempAllocReg(ty, abi.RegisterClass.gp); - mir_op.* = .{ .reg = registerAlias( - dst_temp.tracking(cg).short.register, - @intCast(ty.abiSize(cg.pt.zcu)), - ) }; - }, - .mm => @panic("TODO"), - .xmm => { - dst_temp.* = try cg.tempAllocReg(ty, abi.RegisterClass.sse); - mir_op.* = .{ .reg = dst_temp.tracking(cg).short.register.to128() }; - }, - .ymm => { - dst_temp.* = try cg.tempAllocReg(ty, abi.RegisterClass.sse); - mir_op.* = .{ .reg = dst_temp.tracking(cg).short.register.to256() }; - }, - .mem => @panic("TODO"), - .gpr_limb => { - dst_temp.* = try cg.tempAlloc(ty); - extra_temp.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp); - mir_op.* = .{ .reg = extra_temp.*.?.tracking(cg).short.register.to64() }; - }, - .mm_limb => { - dst_temp.* = try cg.tempAlloc(ty); - extra_temp.* = try cg.tempAllocReg(Type.usize, @panic("TODO")); - mir_op.* = .{ .reg = extra_temp.*.?.tracking(cg).short.register }; - }, - .xmm_limb => { - dst_temp.* = try cg.tempAlloc(ty); - extra_temp.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.sse); - mir_op.* = .{ .reg = extra_temp.*.?.tracking(cg).short.register.to128() }; - }, - .ymm_limb => { - dst_temp.* = try cg.tempAlloc(ty); - extra_temp.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.sse); - mir_op.* = .{ .reg = extra_temp.*.?.tracking(cg).short.register.to256() }; - }, - .mem_limb => { - dst_temp.* = try cg.tempAlloc(ty); - mir_op.* = .{ .mem = try dst_temp.tracking(cg).short.mem(cg, switch (loop.limb_offset) { - .unused => unreachable, - .known => |limb_offset| .{ - .size = .fromSize(loop.limb_size.?), - .disp = limb_offset, - }, - .temp => |limb_offset| .{ - .size = .fromSize(loop.limb_size.?), - .index = limb_offset.tracking(cg).short.register.to64(), - }, - }) }; - }, - .imm, .simm32 => unreachable, // unmodifiable destination + switch (src_op) { + else => {}, + .explicit => |linked_index| mir_ops[linked_index] = mir_op.*, } } + for ( + mir_ops[0..dst_temps.len], + pattern.ops[0..dst_temps.len], + dst_temps, + dst_tys, + extra_temps[0..dst_temps.len], + ) |*mir_op, dst_op, dst_temp, dst_ty, extra_temp| { + if (mir_op.* != .none) continue; + mir_op.* = switch (dst_op) { + .implicit => unreachable, + .explicit => |linked_index| mir_ops[linked_index], + .gpr => .{ .reg = registerAlias( + dst_temp.tracking(cg).short.register, + @intCast(dst_ty.abiSize(cg.pt.zcu)), + ) }, + .mm, .mm_mask, .mm_sign_mask => @panic("TODO"), + .xmm, .xmm_mask, .xmm_sign_mask => .{ .reg = dst_temp.tracking(cg).short.register.to128() }, + .ymm, .ymm_mask, .ymm_sign_mask => .{ .reg = dst_temp.tracking(cg).short.register.to256() }, + .mem => @panic("TODO"), + .gpr_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to64() }, + .mm_limb => .{ .reg = extra_temp.?.tracking(cg).short.register }, + .xmm_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to128() }, + .ymm_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to256() }, + .mem_limb => .{ .mem = try dst_temp.tracking(cg).short.mem(cg, switch (loop.limb_offset) { + .unused => unreachable, + .known => |limb_offset| .{ + .size = .fromSize(loop.limb_size.?), + .disp = limb_offset, + }, + .temp => |limb_offset| .{ + .size = .fromSize(loop.limb_size.?), + .index = limb_offset.tracking(cg).short.register.to64(), + }, + }) }, + .mm_mask_limb => .{ .reg = extra_temp.?.tracking(cg).short.register }, + .xmm_mask_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to128() }, + .ymm_mask_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to256() }, + .imm, .simm32, .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => unreachable, // unmodifiable destination + }; + } std.mem.swap(Operand, &mir_ops[pattern.commute[0]], &mir_ops[pattern.commute[1]]); + if (pattern_set.clobbers.eflags) try cg.spillEflagsIfOccupied(); cg.asmOps(pattern_set.mir_tag, mir_ops) catch |err| switch (err) { error.InvalidInstruction => { const fixes = @tagName(pattern_set.mir_tag[0]); @@ -21398,42 +22376,223 @@ fn select(cg: *CodeGen, dst_temps: []Temp, src_temps: []const *Temp, pattern_set }, else => |e| return e, }; + const invert_result = opts.invert_result != pattern_set.invert_result; for ( extra_temps[0..dst_temps.len], pattern.ops[0..dst_temps.len], + mir_ops[0..dst_temps.len], dst_temps, - ) |maybe_extra_temp, dst_op, dst_temp| if (maybe_extra_temp) |extra_temp| switch (dst_op) { - else => {}, - .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb => switch (dst_temp.tracking(cg).short) { - .register_pair => |dst_regs| switch (loop.limb_offset) { - .unused => unreachable, - .known => |limb_offset| try cg.asmRegisterRegister( - .{ ._, .mov }, - dst_regs[@divExact(limb_offset, loop.limb_size.?)].to64(), - extra_temp.tracking(cg).short.register.to64(), + ) |extra_temp, dst_op, mir_op, dst_temp| switch (dst_op) { + else => if (invert_result) { + try cg.spillEflagsIfOccupied(); + cg.asmOps( + .{ ._, .not }, + .{ mir_op, .none, .none, .none }, + ) catch |err| switch (err) { + error.InvalidInstruction => return cg.fail( + "invalid instruction: 'not {s} none none none'", + .{@tagName(mir_op)}, ), - .temp => unreachable, - }, - else => |dst_mcv| try cg.asmMemoryRegister( - switch (dst_op) { - else => unreachable, - .gpr_limb => .{ ._, .mov }, - .mm_limb => .{ ._q, .mov }, - .xmm_limb, .ymm_limb => .{ if (cg.hasFeature(.avx)) .v_ else ._, .movdqu }, - }, - try dst_mcv.mem(cg, switch (loop.limb_offset) { + else => |e| return e, + }; + }, + .mm_mask, + .xmm_mask, + .ymm_mask, + .mm_sign_mask, + .xmm_sign_mask, + .ymm_sign_mask, + => dst_temp.asMask(switch (dst_op) { + else => unreachable, + .mm_mask, .xmm_mask, .ymm_mask => .all, + .mm_sign_mask, .xmm_sign_mask, .ymm_sign_mask => .sign, + }, invert_result, switch (pattern_set.scalar) { + .any, .bool => unreachable, + .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size, + }, cg), + .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb => if (extra_temp) |limb_temp| + switch (dst_temp.tracking(cg).short) { + inline .register_pair, + .register_triple, + .register_quadruple, + => |dst_regs| switch (loop.limb_offset) { .unused => unreachable, - .known => |limb_offset| .{ - .size = .fromSize(loop.limb_size.?), - .disp = limb_offset, + .known => |limb_offset| try cg.asmRegisterRegister( + .{ ._, .mov }, + dst_regs[@divExact(limb_offset, loop.limb_size.?)].to64(), + limb_temp.tracking(cg).short.register.to64(), + ), + .temp => unreachable, + }, + else => |dst_mcv| try cg.asmMemoryRegister( + switch (dst_op) { + else => unreachable, + .gpr_limb => .{ ._, .mov }, + .mm_limb => .{ ._q, .mov }, + .xmm_limb, .ymm_limb => .{ if (cg.hasFeature(.avx)) .v_ else ._, .movdqu }, }, - .temp => |limb_offset| .{ - .size = .fromSize(loop.limb_size.?), - .index = limb_offset.tracking(cg).short.register.to64(), + try dst_mcv.mem(cg, switch (loop.limb_offset) { + .unused => unreachable, + .known => |limb_offset| .{ + .size = .fromSize(loop.limb_size.?), + .disp = limb_offset, + }, + .temp => |limb_offset| .{ + .size = .fromSize(loop.limb_size.?), + .index = limb_offset.tracking(cg).short.register.to64(), + }, + }), + registerAlias(limb_temp.tracking(cg).short.register, loop.limb_size.?), + ), + }, + .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => { + const scalar_size = switch (pattern_set.scalar) { + .any, .bool => unreachable, + .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size, + }; + switch (scalar_size) { + else => {}, + .word => if (cg.hasFeature(.avx)) try cg.asmRegisterRegisterRegister( + .{ .vp_b, .ackssw }, + mir_op.reg, + mir_op.reg, + mir_op.reg, + ) else try cg.asmRegisterRegister( + .{ .p_b, .ackssw }, + mir_op.reg, + mir_op.reg, + ), + } + const mask_store_size: u4 = + @intCast(std.math.divCeil(u7, loop.mask_store_bit_size.?, 8) catch unreachable); + const mask_limb_reg = registerAlias( + loop.mask_limb_temp.?.tracking(cg).short.register, + mask_store_size, + ); + try cg.asmRegisterRegister(switch (scalar_size) { + else => unreachable, + .byte, .word => .{ if (cg.hasFeature(.avx)) .vp_b else .p_b, .movmsk }, + .dword => .{ if (cg.hasFeature(.avx)) .v_ps else ._ps, .movmsk }, + .qword => .{ if (cg.hasFeature(.avx)) .v_pd else ._pd, .movmsk }, + }, mask_limb_reg.to32(), mir_op.reg); + if (invert_result) if (loop.mask_store_reg) |_| { + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterImmediate( + .{ ._, .xor }, + registerAlias(mask_limb_reg, @min(mask_store_size, 4)), + .u((@as(u32, 1) << @intCast(loop.mask_limb_bit_size.?)) - 1), + ); + } else try cg.asmRegister(.{ ._, .not }, mask_limb_reg); + if (loop.mask_store_reg) |mask_store_reg| { + const mask_store_alias = registerAlias(mask_store_reg, mask_store_size); + switch (loop.mask_limb_offset) { + .unused => unreachable, + .known => |mask_limb_offset| switch (mask_limb_offset & (loop.mask_store_bit_size.? - 1)) { + 0 => try cg.asmRegisterRegister(.{ ._, .mov }, mask_store_alias, mask_limb_reg), + else => |shl_count| { + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterImmediate(.{ ._l, .sh }, mask_limb_reg, .u(shl_count)); + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterRegister(.{ ._, .@"or" }, mask_store_alias, mask_limb_reg); + }, }, - }), - registerAlias(extra_temp.tracking(cg).short.register, loop.limb_size.?), - ), + .temp => |mask_limb_offset| { + if (cg.hasFeature(.bmi2)) { + const shlx_size = @max(mask_store_size, 4); + const shlx_mask_limb_reg = registerAlias(mask_limb_reg, shlx_size); + try cg.asmRegisterRegisterRegister( + .{ ._lx, .sh }, + shlx_mask_limb_reg, + shlx_mask_limb_reg, + registerAlias(mask_limb_offset.tracking(cg).short.register, shlx_size), + ); + } else { + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterRegister( + .{ ._l, .sh }, + mask_limb_reg, + mask_limb_offset.tracking(cg).short.register.to8(), + ); + } + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterRegister(.{ ._, .@"or" }, mask_store_alias, mask_limb_reg); + }, + } + } + const dst_mcv = dst_temp.tracking(cg).short; + switch (loop.mask_limb_offset) { + .unused => unreachable, + .known => |*mask_limb_offset| { + mask_limb_offset.* += loop.mask_limb_bit_size.?; + if (mask_limb_offset.* & (loop.mask_store_bit_size.? - 1) == 0) { + switch (dst_mcv) { + .register => {}, + else => try cg.asmMemoryRegister( + .{ ._, .mov }, + try dst_mcv.mem(cg, .{ + .size = .fromSize(mask_store_size), + .disp = @divExact(mask_limb_offset.*, 8) - mask_store_size, + }), + registerAlias(loop.mask_store_reg orelse mask_limb_reg, mask_store_size), + ), + } + if (loop.mask_store_reg) |mask_store_reg| { + const mask_store_alias = registerAlias(mask_store_reg, @min(mask_store_size, 4)); + try cg.asmRegisterRegister(.{ ._, .xor }, mask_store_alias, mask_store_alias); + } + } + }, + .temp => |mask_limb_offset| { + const mask_limb_offset_reg = mask_limb_offset.tracking(cg).short.register.to32(); + if (loop.mask_store_reg) |mask_store_reg| { + try cg.asmRegisterMemory(.{ ._, .lea }, mask_limb_offset_reg, .{ + .base = .{ .reg = mask_limb_offset_reg.to64() }, + .mod = .{ .rm = .{ + .size = .qword, + .disp = loop.mask_limb_bit_size.?, + } }, + }); + switch (dst_mcv) { + .register => {}, + else => { + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterImmediate( + .{ ._, .@"test" }, + mask_limb_offset_reg, + .u(loop.mask_store_bit_size.? - 1), + ); + const skip_store_reloc = try cg.asmJccReloc(.nz, undefined); + const mask_store_offset_reg = mask_limb_reg.to32(); + try cg.asmRegisterRegister(.{ ._, .mov }, mask_store_offset_reg, mask_limb_offset_reg); + try cg.asmRegisterImmediate(.{ ._r, .sh }, mask_store_offset_reg, .u(3)); + try cg.asmMemoryRegister(.{ ._, .mov }, try dst_mcv.mem(cg, .{ + .size = .fromSize(mask_store_size), + .index = mask_store_offset_reg.to64(), + .disp = -@as(i8, mask_store_size), + }), registerAlias(mask_store_reg, mask_store_size)); + const mask_store_alias = registerAlias(mask_store_reg, @min(mask_store_size, 4)); + try cg.asmRegisterRegister(.{ ._, .xor }, mask_store_alias, mask_store_alias); + cg.performReloc(skip_store_reloc); + }, + } + } else { + switch (dst_mcv) { + .register => {}, + else => try cg.asmMemoryRegister(.{ ._, .mov }, try dst_mcv.mem(cg, .{ + .size = .fromSize(mask_store_size), + .index = mask_limb_offset_reg.to64(), + }), mask_limb_reg), + } + try cg.asmRegisterMemory(.{ ._, .lea }, mask_limb_offset_reg, .{ + .base = .{ .reg = mask_limb_offset_reg.to64() }, + .mod = .{ .rm = .{ + .size = .qword, + .disp = mask_store_size, + } }, + }); + } + }, + } }, }; switch (pattern_set.loop) { @@ -21442,7 +22601,7 @@ fn select(cg: *CodeGen, dst_temps: []Temp, src_temps: []const *Temp, pattern_set .limbwise_carry => @panic("TODO"), .limbwise_pairs_forward => @panic("TODO"), .limbwise_pairs_reverse => @panic("TODO"), - .elementwise => @panic("TODO"), + .elementwise => {}, } switch (loop.limb_offset) { .unused => break, @@ -21452,6 +22611,11 @@ fn select(cg: *CodeGen, dst_temps: []Temp, src_temps: []const *Temp, pattern_set if (loop.remaining_size.? < loop.limb_size.? or (loop.element_size != null and limb_offset.* >= loop.element_size.?)) { + switch (loop.mask_limb_offset) { + .unused => {}, + .known => |*mask_limb_offset| mask_limb_offset.* = 0, + .temp => unreachable, + } limb_offset.* = 0; break; } @@ -21465,6 +22629,7 @@ fn select(cg: *CodeGen, dst_temps: []Temp, src_temps: []const *Temp, pattern_set .disp = loop.limb_size.?, } }, }); + try cg.spillEflagsIfOccupied(); try cg.asmRegisterImmediate( .{ ._, .cmp }, limb_offset_reg.to32(), @@ -21476,6 +22641,12 @@ fn select(cg: *CodeGen, dst_temps: []Temp, src_temps: []const *Temp, pattern_set }, } } + if (loop.mask_limb_temp) |mask_limb_temp| try mask_limb_temp.die(cg); + if (loop.mask_store_temp) |mask_store_temp| try mask_store_temp.die(cg); + switch (loop.mask_limb_offset) { + .unused, .known => {}, + .temp => |mask_limb_offset| try mask_limb_offset.die(cg), + } switch (loop.element_offset) { .unused => break :pattern_sets, .known => |*element_offset| { diff --git a/src/arch/x86_64/Disassembler.zig b/src/arch/x86_64/Disassembler.zig index f6eeedba2c..85f4f3edef 100644 --- a/src/arch/x86_64/Disassembler.zig +++ b/src/arch/x86_64/Disassembler.zig @@ -223,7 +223,7 @@ pub fn next(dis: *Disassembler) Error!?Instruction { .op3 = op3, }); }, - .rm0, .vmi, .rvm, .rvmr, .rvmi, .mvr => unreachable, // TODO + .rm0, .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => unreachable, // TODO } } diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 1738a382f6..a172a948d2 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -177,7 +177,7 @@ pub fn format( try writer.print("+{s} ", .{tag}); }, .m, .mi, .m1, .mc, .vmi => try writer.print("/{d} ", .{encoding.modRmExt()}), - .mr, .rm, .rmi, .mri, .mrc, .rm0, .rvm, .rvmr, .rvmi, .mvr => try writer.writeAll("/r "), + .mr, .rm, .rmi, .mri, .mrc, .rm0, .rvm, .rvmr, .rvmi, .mvr, .rmv => try writer.writeAll("/r "), } switch (encoding.data.op_en) { @@ -202,7 +202,7 @@ pub fn format( try writer.print("{s} ", .{tag}); }, .rvmr => try writer.writeAll("/is4 "), - .zo, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rm0, .rvm, .mvr => {}, + .zo, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rm0, .rvm, .mvr, .rmv => {}, } try writer.print("{s} ", .{@tagName(encoding.mnemonic)}); @@ -260,10 +260,10 @@ pub const Mnemonic = enum { neg, nop, not, @"or", pause, pop, popcnt, popfq, push, pushfq, - rcl, rcr, ret, rol, ror, - sal, sar, sbb, + rcl, rcr, ret, rol, ror, rorx, + sal, sar, sarx, sbb, scas, scasb, scasd, scasq, scasw, - shl, shld, shr, shrd, sub, syscall, + shl, shld, shlx, shr, shrd, shrx, sub, syscall, seta, setae, setb, setbe, setc, sete, setg, setge, setl, setle, setna, setnae, setnb, setnbe, setnc, setne, setng, setnge, setnl, setnle, setno, setnp, setns, setnz, seto, setp, setpe, setpo, sets, setz, @@ -444,7 +444,7 @@ pub const OpEn = enum { fd, td, m1, mc, mi, mr, rm, rmi, mri, mrc, - rm0, vmi, rvm, rvmr, rvmi, mvr, + rm0, vmi, rvm, rvmr, rvmi, mvr, rmv, // zig fmt: on }; @@ -808,6 +808,7 @@ pub const Feature = enum { avx, avx2, bmi, + bmi2, f16c, fma, lzcnt, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index d00d5b2e8a..45ad2c13e9 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -29,10 +29,14 @@ pub const Inst = struct { _l, /// ___ Left Double _ld, + /// ___ Left Without Affecting Flags + _lx, /// ___ Right _r, /// ___ Right Double _rd, + /// ___ Right Without Affecting Flags + _rx, /// ___ Above _a, @@ -401,9 +405,11 @@ pub const Inst = struct { ret, /// Rotate left /// Rotate right + /// Rotate right logical without affecting flags ro, /// Arithmetic shift left /// Arithmetic shift right + /// Shift left arithmetic without affecting flags sa, /// Integer subtraction with borrow sbb, @@ -417,6 +423,8 @@ pub const Inst = struct { /// Double precision shift left /// Logical shift right /// Double precision shift right + /// Shift left logical without affecting flags + /// Shift right logical without affecting flags sh, /// Subtract /// Subtract packed integers diff --git a/src/arch/x86_64/abi.zig b/src/arch/x86_64/abi.zig index 85ece4f93c..3d710c426a 100644 --- a/src/arch/x86_64/abi.zig +++ b/src/arch/x86_64/abi.zig @@ -242,8 +242,12 @@ pub fn classifySystemV(ty: Type, zcu: *Zcu, target: std.Target, ctx: Context) [8 .sse, .sseup, .sseup, .sseup, .sseup, .sseup, .sseup, .none, }; - // LLVM always returns vectors byval - if (bits <= 512 or ctx == .ret) return .{ + if (bits <= 512 or (ctx == .ret and bits <= @as(u64, if (std.Target.x86.featureSetHas(target.cpu.features, .avx512f)) + 2048 + else if (std.Target.x86.featureSetHas(target.cpu.features, .avx)) + 1024 + else + 512))) return .{ .sse, .sseup, .sseup, .sseup, .sseup, .sseup, .sseup, .sseup, }; @@ -416,7 +420,7 @@ pub const SysV = struct { pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 }; pub const c_abi_sse_param_regs = sse_avx_regs[0..8].*; pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx }; - pub const c_abi_sse_return_regs = sse_avx_regs[0..2].*; + pub const c_abi_sse_return_regs = sse_avx_regs[0..4].*; }; pub const Win64 = struct { @@ -496,7 +500,7 @@ pub fn getCAbiSseReturnRegs(cc: std.builtin.CallingConvention) []const Register } const gp_regs = [_]Register{ - .rax, .rcx, .rdx, .rbx, .rsi, .rdi, .r8, .r9, .r10, .r11, .r12, .r13, .r14, .r15, + .rax, .rdx, .rbx, .rcx, .rsi, .rdi, .r8, .r9, .r10, .r11, .r12, .r13, .r14, .r15, }; const x87_regs = [_]Register{ .st0, .st1, .st2, .st3, .st4, .st5, .st6, .st7, diff --git a/src/arch/x86_64/encoder.zig b/src/arch/x86_64/encoder.zig index 674c81fa62..cf03b9729f 100644 --- a/src/arch/x86_64/encoder.zig +++ b/src/arch/x86_64/encoder.zig @@ -403,7 +403,7 @@ pub const Instruction = struct { else => { const mem_op = switch (data.op_en) { .m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0], - .rm, .rmi, .rm0, .vmi => inst.ops[1], + .rm, .rmi, .rm0, .vmi, .rmv => inst.ops[1], .rvm, .rvmr, .rvmi => inst.ops[2], else => unreachable, }; @@ -412,7 +412,7 @@ pub const Instruction = struct { const rm = switch (data.op_en) { .m, .mi, .m1, .mc, .vmi => enc.modRmExt(), .mr, .mri, .mrc => inst.ops[1].reg.lowEnc(), - .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi => inst.ops[0].reg.lowEnc(), + .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi, .rmv => inst.ops[0].reg.lowEnc(), .mvr => inst.ops[2].reg.lowEnc(), else => unreachable, }; @@ -422,7 +422,7 @@ pub const Instruction = struct { const op = switch (data.op_en) { .m, .mi, .m1, .mc, .vmi => .none, .mr, .mri, .mrc => inst.ops[1], - .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi => inst.ops[0], + .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi, .rmv => inst.ops[0], .mvr => inst.ops[2], else => unreachable, }; @@ -493,7 +493,7 @@ pub const Instruction = struct { } else null, - .vmi, .rvm, .rvmr, .rvmi, .mvr => unreachable, + .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => unreachable, }; if (segment_override) |seg| { legacy.setSegmentOverride(seg); @@ -512,9 +512,9 @@ pub const Instruction = struct { switch (op_en) { .zo, .i, .zi, .fd, .td, .d => {}, .o, .oi => rex.b = inst.ops[0].reg.isExtended(), - .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0 => { + .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .rmv => { const r_op = switch (op_en) { - .rm, .rmi, .rm0 => inst.ops[0], + .rm, .rmi, .rm0, .rmv => inst.ops[0], .mr, .mri, .mrc => inst.ops[1], else => .none, }; @@ -546,9 +546,9 @@ pub const Instruction = struct { switch (op_en) { .zo, .i, .zi, .fd, .td, .d => {}, .o, .oi => vex.b = inst.ops[0].reg.isExtended(), - .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .vmi, .rvm, .rvmr, .rvmi, .mvr => { + .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => { const r_op = switch (op_en) { - .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi => inst.ops[0], + .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi, .rmv => inst.ops[0], .mr, .mri, .mrc => inst.ops[1], .mvr => inst.ops[2], .m, .mi, .m1, .mc, .vmi => .none, @@ -557,7 +557,7 @@ pub const Instruction = struct { vex.r = r_op.isBaseExtended(); const b_x_op = switch (op_en) { - .rm, .rmi, .rm0, .vmi => inst.ops[1], + .rm, .rmi, .rm0, .vmi, .rmv => inst.ops[1], .m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0], .rvm, .rvmr, .rvmi => inst.ops[2], else => unreachable, @@ -588,6 +588,7 @@ pub const Instruction = struct { else => {}, .vmi => vex.v = inst.ops[0].reg, .rvm, .rvmr, .rvmi => vex.v = inst.ops[1].reg, + .rmv => vex.v = inst.ops[2].reg, } try encoder.vex(vex); diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index d4a7dcafe7..a204aa017f 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -1287,6 +1287,16 @@ pub const table = [_]Entry{ .{ .sha256rnds2, .rm0, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x0f, 0x38, 0xcb }, 0, .none, .sha }, // AVX + .{ .rorx, .rmi, &.{ .r32, .rm32, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w0, .bmi2 }, + .{ .rorx, .rmi, &.{ .r64, .rm64, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w1, .bmi2 }, + + .{ .sarx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0xf3, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 }, + .{ .shlx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 }, + .{ .shrx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 }, + .{ .sarx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf3, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 }, + .{ .shlx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 }, + .{ .shrx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 }, + .{ .vaddpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_128_wig, .avx }, .{ .vaddpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_256_wig, .avx }, diff --git a/src/register_manager.zig b/src/register_manager.zig index 9450c92d25..4176dd7d83 100644 --- a/src/register_manager.zig +++ b/src/register_manager.zig @@ -112,6 +112,9 @@ pub fn RegisterManager( pub fn indexOfRegIntoTracked(reg: Register) ?TrackedIndex { return indexOfReg(tracked_registers, reg); } + pub inline fn indexOfKnownRegIntoTracked(comptime reg: Register) ?TrackedIndex { + return comptime indexOfRegIntoTracked(reg); + } pub fn regAtTrackedIndex(tracked_index: TrackedIndex) Register { return tracked_registers[tracked_index]; @@ -124,6 +127,9 @@ pub fn RegisterManager( pub fn isRegFree(self: Self, reg: Register) bool { return self.isRegIndexFree(indexOfRegIntoTracked(reg) orelse return true); } + pub fn isKnownRegFree(self: Self, comptime reg: Register) bool { + return self.isRegIndexFree(indexOfKnownRegIntoTracked(reg) orelse return true); + } /// Returns whether this register was allocated in the course /// of this function. @@ -143,6 +149,9 @@ pub fn RegisterManager( pub fn isRegLocked(self: Self, reg: Register) bool { return self.isRegIndexLocked(indexOfRegIntoTracked(reg) orelse return false); } + pub fn isKnownRegLocked(self: Self, comptime reg: Register) bool { + return self.isRegIndexLocked(indexOfKnownRegIntoTracked(reg) orelse return false); + } pub const RegisterLock = struct { tracked_index: TrackedIndex }; @@ -176,6 +185,9 @@ pub fn RegisterManager( pub fn lockRegAssumeUnused(self: *Self, reg: Register) RegisterLock { return self.lockRegIndexAssumeUnused(indexOfRegIntoTracked(reg) orelse unreachable); } + pub fn lockKnownRegAssumeUnused(self: *Self, comptime reg: Register) RegisterLock { + return self.lockRegIndexAssumeUnused(indexOfKnownRegIntoTracked(reg) orelse unreachable); + } /// Like `lockReg` but locks multiple registers. pub fn lockRegs( @@ -366,7 +378,7 @@ pub fn RegisterManager( comptime reg: Register, inst: ?Air.Inst.Index, ) AllocationError!void { - return self.getRegIndex((comptime indexOfRegIntoTracked(reg)) orelse return, inst); + return self.getRegIndex(indexOfKnownRegIntoTracked(reg) orelse return, inst); } /// Allocates the specified register with the specified diff --git a/test/behavior.zig b/test/behavior.zig index e0e07b24cd..8a774606e6 100644 --- a/test/behavior.zig +++ b/test/behavior.zig @@ -110,6 +110,8 @@ test { _ = @import("behavior/widening.zig"); _ = @import("behavior/abs.zig"); + _ = @import("behavior/x86_64.zig"); + if (builtin.cpu.arch == .wasm32) { _ = @import("behavior/wasm.zig"); } diff --git a/test/behavior/x86_64.zig b/test/behavior/x86_64.zig new file mode 100644 index 0000000000..ffb1750ff6 --- /dev/null +++ b/test/behavior/x86_64.zig @@ -0,0 +1,9 @@ +//! CodeGen tests for the x86_64 backend. + +const builtin = @import("builtin"); + +test { + if (builtin.zig_backend != .stage2_x86_64) return error.SkipZigTest; + if (builtin.object_format == .coff) return error.SkipZigTest; + _ = @import("x86_64/math.zig"); +} diff --git a/test/behavior/x86_64/math.zig b/test/behavior/x86_64/math.zig new file mode 100644 index 0000000000..900e5747a8 --- /dev/null +++ b/test/behavior/x86_64/math.zig @@ -0,0 +1,230 @@ +fn testBinary(comptime op: anytype) !void { + const testType = struct { + fn testType(comptime Type: type, comptime imm_lhs: Type, comptime imm_rhs: Type) !void { + const expected = op(Type, imm_lhs, imm_rhs); + try struct { + fn testOne(actual: @TypeOf(expected)) !void { + if (switch (@typeInfo(@TypeOf(expected))) { + else => actual != expected, + .vector => @reduce(.Or, actual != expected), + }) return error.Unexpected; + } + noinline fn testOps(mem_lhs: Type, mem_rhs: Type) !void { + var reg_lhs = mem_lhs; + var reg_rhs = mem_rhs; + _ = .{ ®_lhs, ®_rhs }; + try testOne(op(Type, reg_lhs, reg_rhs)); + try testOne(op(Type, reg_lhs, mem_rhs)); + try testOne(op(Type, reg_lhs, imm_rhs)); + try testOne(op(Type, mem_lhs, reg_rhs)); + try testOne(op(Type, mem_lhs, mem_rhs)); + try testOne(op(Type, mem_lhs, imm_rhs)); + try testOne(op(Type, imm_lhs, reg_rhs)); + try testOne(op(Type, imm_lhs, mem_rhs)); + } + }.testOps(imm_lhs, imm_rhs); + } + }.testType; + + try testType(u8, 0xbb, 0x43); + try testType(u16, 0xb8bf, 0x626d); + try testType(u32, 0x80d7a2c6, 0xbff6a402); + try testType(u64, 0x71138bc6b4a38898, 0x1bc4043de9438c7b); + try testType(u128, 0xe05fc132ef2cd8affee00a907f0a851f, 0x29f912a72cfc6a7c6973426a9636da9a); + + try testType(@Vector(16, u8), .{ + 0xea, 0x80, 0xbb, 0xe8, 0x74, 0x81, 0xc8, 0x66, 0x7b, 0x41, 0x90, 0xcb, 0x30, 0x70, 0x4b, 0x0f, + }, .{ + 0x61, 0x26, 0xbe, 0x47, 0x00, 0x9c, 0x55, 0xa5, 0x59, 0xf0, 0xb2, 0x20, 0x30, 0xaf, 0x82, 0x3e, + }); + try testType(@Vector(32, u8), .{ + 0xa1, 0x88, 0xc4, 0xf4, 0x77, 0x0b, 0xf5, 0xbb, 0x09, 0x03, 0xbf, 0xf5, 0xcc, 0x7f, 0x6b, 0x2a, + 0x4c, 0x05, 0x37, 0xc9, 0x8a, 0xcb, 0x91, 0x23, 0x09, 0x5f, 0xb8, 0x99, 0x4a, 0x75, 0x26, 0xe4, + }, .{ + 0xff, 0x0f, 0x99, 0x49, 0xa6, 0x25, 0xa7, 0xd4, 0xc9, 0x2f, 0x97, 0x6a, 0x01, 0xd6, 0x6e, 0x41, + 0xa4, 0xb5, 0x3c, 0x03, 0xea, 0x82, 0x9c, 0x5f, 0xac, 0x07, 0x16, 0x15, 0x1c, 0x64, 0x25, 0x2f, + }); + try testType(@Vector(64, u8), .{ + 0xaa, 0x08, 0xeb, 0xb2, 0xd7, 0x89, 0x0f, 0x98, 0xda, 0x9f, 0xa6, 0x4e, 0x3c, 0xce, 0x1b, 0x1b, + 0x9e, 0x5f, 0x2b, 0xd6, 0x59, 0x26, 0x47, 0x05, 0x2a, 0xb7, 0xd1, 0x10, 0xde, 0xd9, 0x84, 0x00, + 0x07, 0xc0, 0xaa, 0x6e, 0xfa, 0x3b, 0x97, 0x85, 0xa8, 0x42, 0xd7, 0xa5, 0x90, 0xe6, 0x10, 0x1a, + 0x47, 0x84, 0xe1, 0x3e, 0xb0, 0x70, 0x26, 0x3f, 0xea, 0x24, 0xb8, 0x5f, 0xe3, 0xe3, 0x4c, 0xed, + }, .{ + 0x3b, 0xc5, 0xe0, 0x3d, 0x4f, 0x2e, 0x1d, 0xa9, 0xf7, 0x7b, 0xc7, 0xc1, 0x48, 0xc6, 0xe5, 0x9e, + 0x4d, 0xa8, 0x21, 0x37, 0xa1, 0x1a, 0x95, 0x69, 0x89, 0x2f, 0x15, 0x07, 0x3d, 0x7b, 0x69, 0x89, + 0xea, 0x87, 0xf0, 0x94, 0x67, 0xf2, 0x3d, 0x04, 0x96, 0x8a, 0xd6, 0x70, 0x7c, 0x16, 0xe7, 0x62, + 0xf0, 0x8d, 0x96, 0x65, 0xd1, 0x4a, 0x35, 0x3e, 0x7a, 0x67, 0xa6, 0x1f, 0x37, 0x66, 0xe3, 0x45, + }); + try testType(@Vector(128, u8), .{ + 0xa1, 0xd0, 0x7b, 0xf9, 0x7b, 0x77, 0x7b, 0x3d, 0x2d, 0x68, 0xc2, 0x7b, 0xb0, 0xb8, 0xd4, 0x7c, + 0x1a, 0x1f, 0xd2, 0x92, 0x3e, 0xcb, 0xc1, 0x6b, 0xb9, 0x4d, 0xf1, 0x67, 0x58, 0x8e, 0x77, 0xa6, + 0xb9, 0xdf, 0x10, 0x6f, 0xbe, 0xe3, 0x33, 0xb6, 0x93, 0x77, 0x80, 0xef, 0x09, 0x9d, 0x61, 0x40, + 0xa2, 0xf4, 0x52, 0x18, 0x9d, 0xe4, 0xb0, 0xaf, 0x0a, 0xa7, 0x0b, 0x09, 0x67, 0x38, 0x71, 0x04, + 0x72, 0xa1, 0xd2, 0xfd, 0xf8, 0xf0, 0xa7, 0x23, 0x24, 0x5b, 0x7d, 0xfb, 0x43, 0xba, 0x6c, 0xc4, + 0x83, 0x46, 0x0e, 0x4d, 0x6c, 0x92, 0xab, 0x4f, 0xd2, 0x70, 0x9d, 0xfe, 0xce, 0xf8, 0x05, 0x9f, + 0x98, 0x36, 0x9c, 0x90, 0x9a, 0xd0, 0xb5, 0x76, 0x16, 0xe8, 0x25, 0xc2, 0xbd, 0x91, 0xab, 0xf9, + 0x6f, 0x6c, 0xc5, 0x60, 0xe5, 0x30, 0xf2, 0xb7, 0x59, 0xc4, 0x9c, 0xdd, 0xdf, 0x04, 0x65, 0xd9, + }, .{ + 0xed, 0xe1, 0x8a, 0xf6, 0xf3, 0x8b, 0xfd, 0x1d, 0x3c, 0x87, 0xbf, 0xfe, 0x04, 0x52, 0x15, 0x82, + 0x0b, 0xb0, 0xcf, 0xcf, 0xf8, 0x03, 0x9c, 0xef, 0xc1, 0x76, 0x7e, 0xe3, 0xe9, 0xa8, 0x18, 0x90, + 0xd4, 0xc4, 0x91, 0x15, 0x68, 0x7f, 0x65, 0xd8, 0xe1, 0xb3, 0x23, 0xc2, 0x7d, 0x84, 0x3b, 0xaf, + 0x74, 0x69, 0x07, 0x2a, 0x1b, 0x5f, 0x0e, 0x44, 0x0d, 0x2b, 0x9c, 0x82, 0x41, 0xf9, 0x7f, 0xb5, + 0xc4, 0xd9, 0xcb, 0xd3, 0xc5, 0x31, 0x8b, 0x5f, 0xda, 0x09, 0x9b, 0x29, 0xa3, 0xb7, 0x13, 0x0d, + 0x55, 0x9b, 0x59, 0x33, 0x2a, 0x59, 0x3a, 0x44, 0x1f, 0xd3, 0x40, 0x4e, 0xde, 0x2c, 0xe4, 0x16, + 0xfd, 0xc3, 0x02, 0x74, 0xaa, 0x65, 0xfd, 0xc8, 0x2a, 0x8a, 0xdb, 0xae, 0x44, 0x28, 0x62, 0xa4, + 0x56, 0x4f, 0xf1, 0xaa, 0x0a, 0x0f, 0xdb, 0x1b, 0xc8, 0x45, 0x9b, 0x12, 0xb4, 0x1a, 0xe4, 0xa3, + }); + + try testType(@Vector(8, u16), .{ + 0xcf61, 0xb121, 0x3cf1, 0x3e9f, 0x43a7, 0x8d69, 0x96f5, 0xc11e, + }, .{ + 0xee30, 0x82f0, 0x270b, 0x1498, 0x4c60, 0x6e72, 0x0b64, 0x02d4, + }); + try testType(@Vector(16, u16), .{ + 0x9191, 0xd23e, 0xf844, 0xd84a, 0xe907, 0xf1e8, 0x712d, 0x90af, + 0x6541, 0x3fa6, 0x92eb, 0xe35a, 0xc0c9, 0xcb47, 0xb790, 0x4453, + }, .{ + 0x21c3, 0x4039, 0x9b71, 0x60bd, 0xcd7f, 0x2ec8, 0x50ba, 0xe810, + 0xebd4, 0x06e5, 0xed18, 0x2f66, 0x7e31, 0xe282, 0xad63, 0xb25e, + }); + try testType(@Vector(32, u16), .{ + 0x6b6a, 0x30a9, 0xc267, 0x2231, 0xbf4c, 0x00bc, 0x9c2c, 0x2928, + 0xecad, 0x82df, 0xcfb0, 0xa4e5, 0x909b, 0x1b05, 0xaf40, 0x1fd9, + 0xcec6, 0xd8dc, 0xd4b5, 0x6d59, 0x8e3f, 0x4d8a, 0xb83a, 0x808e, + 0x47e2, 0x5782, 0x59bf, 0xcefc, 0x5179, 0x3f48, 0x93dc, 0x66d2, + }, .{ + 0x1be8, 0xe98c, 0xf9b3, 0xb008, 0x2f8d, 0xf087, 0xc9b9, 0x75aa, + 0xbd16, 0x9540, 0xc5bd, 0x2b2c, 0xd43f, 0x9394, 0x3e1d, 0xf695, + 0x167d, 0xff7a, 0xf09d, 0xdff8, 0xdfa2, 0xc779, 0x70b7, 0x01bd, + 0x46b3, 0x995a, 0xb7bc, 0xa79d, 0x5542, 0x961e, 0x37cd, 0x9c2a, + }); + try testType(@Vector(64, u16), .{ + 0x6b87, 0xfd84, 0x436b, 0xe345, 0xfb82, 0x81fc, 0x0992, 0x45f9, + 0x5527, 0x1f6d, 0xda46, 0x6a16, 0xf6e1, 0x8fb7, 0x3619, 0xdfe3, + 0x64ce, 0x8ac6, 0x3ae8, 0x30e3, 0xec3b, 0x4ba7, 0x02a4, 0xa694, + 0x8e68, 0x8f0c, 0x5e30, 0x0e55, 0x6538, 0x9852, 0xea35, 0x7be2, + 0xdabd, 0x57e6, 0x5b38, 0x0fb2, 0x2604, 0x85e7, 0x6595, 0x8de9, + 0x49b1, 0xe9a2, 0x3758, 0xa4d9, 0x505b, 0xc9d3, 0xddc5, 0x9a43, + 0xfd44, 0x50f5, 0x379e, 0x03b6, 0x6375, 0x692f, 0x5586, 0xc717, + 0x94dd, 0xee06, 0xb32d, 0x0bb9, 0x0e35, 0x5f8f, 0x0ba4, 0x19a8, + }, .{ + 0xbeeb, 0x3e54, 0x6486, 0x5167, 0xe432, 0x57cf, 0x9cac, 0x922e, + 0xd2f8, 0x5614, 0x2e7f, 0x19cf, 0x9a07, 0x0524, 0x168f, 0x4464, + 0x4def, 0x83ce, 0x97b4, 0xf269, 0xda5f, 0x28c1, 0x9cc3, 0xfa7c, + 0x25a0, 0x912d, 0x25b2, 0xd60d, 0xcd82, 0x0e03, 0x40cc, 0xc9dc, + 0x18eb, 0xc609, 0xb06d, 0x29e0, 0xf3c7, 0x997b, 0x8ca2, 0xa750, + 0xc9bc, 0x8f0e, 0x3916, 0xd905, 0x94f8, 0x397f, 0x98b5, 0xc61d, + 0x05db, 0x3e7a, 0xf750, 0xe8de, 0x3225, 0x81d9, 0x612e, 0x0a7e, + 0x2c02, 0xff5b, 0x19ca, 0xbbf5, 0x870e, 0xc9ca, 0x47bb, 0xcfcc, + }); + + try testType(@Vector(4, u32), .{ + 0x234d576e, 0x4151cc9c, 0x39f558e4, 0xba935a32, + }, .{ + 0x398f2a9d, 0x4540f093, 0x9225551c, 0x3bac865b, + }); + try testType(@Vector(8, u32), .{ + 0xb8336635, 0x2fc3182c, 0x27a00123, 0x71587fbe, + 0x9cbc65d2, 0x6f4bb0e6, 0x362594ce, 0x9971df38, + }, .{ + 0x5727e734, 0x972b0313, 0xff25f5dc, 0x924f8e55, + 0x04920a61, 0xa1c3b334, 0xf52df4b6, 0x5ef72ecc, + }); + try testType(@Vector(16, u32), .{ + 0xfb566f9e, 0x9ad4691a, 0x5b5f9ec0, 0x5a572d2a, + 0x8f2f226b, 0x2dfc7e33, 0x9fb07e32, 0x9d672a2e, + 0xbedc3cee, 0x6872428d, 0xbc73a9fd, 0xd4d5f055, + 0x69c1e9ee, 0x65038deb, 0x1449061a, 0x48412ec2, + }, .{ + 0x96cbe946, 0x3f24f60b, 0xaeacdc53, 0x7611a8b4, + 0x031a67a8, 0x52a26828, 0x75646f4b, 0xb75902c3, + 0x1f881f08, 0x834e02a4, 0x5e5b40eb, 0xc75c264d, + 0xa8251e09, 0x28e46bbd, 0x12cb1f31, 0x9a2af615, + }); + try testType(@Vector(32, u32), .{ + 0x131bbb7b, 0xa7311026, 0x9d5e59a0, 0x99b090d6, + 0xfe969e2e, 0x04547697, 0x357d3250, 0x43be6d7a, + 0x16ecf5c5, 0xf60febcc, 0x1d1e2602, 0x138a96d2, + 0x9117ba72, 0x9f185b32, 0xc10e23fd, 0x3e6b7fd8, + 0x4dc9be70, 0x2ee30047, 0xaffeab60, 0x7172d362, + 0x6154bfcf, 0x5388dc3e, 0xd6e5a76e, 0x8b782f2d, + 0xacbef4a2, 0x843aca71, 0x25d8ab5c, 0xe1a63a39, + 0xc26212e5, 0x0847b84b, 0xb53541e5, 0x0c8e44db, + }, .{ + 0x4ad92822, 0x715b623f, 0xa5bed8a7, 0x937447a9, + 0x7ecb38eb, 0x0a2f3dfc, 0x96f467a2, 0xec882793, + 0x41a8707f, 0xf7310656, 0x76217b80, 0x2058e5fc, + 0x26682154, 0x87313e31, 0x4bdc480a, 0x193572ff, + 0x60b03c75, 0x0fe45908, 0x56c73703, 0xdb86554c, + 0xdda2dd7d, 0x34371b27, 0xe4e6ad50, 0x422d1828, + 0x1de3801b, 0xdce268d3, 0x20af9ec8, 0x188a591f, + 0xf080e943, 0xc8718d14, 0x3f920382, 0x18d101b5, + }); + + // TODO: implement fallback for pcmpeqq + if (!comptime @import("std").Target.x86.featureSetHas(@import("builtin").cpu.features, .sse4_1)) return; + + try testType(@Vector(2, u64), .{ + 0x4cd89a317b03d430, 0x28998f61842f63a9, + }, .{ + 0x6c34db64af0e217e, 0x57aa5d02cd45dceb, + }); + try testType(@Vector(4, u64), .{ + 0x946cf7e7484691c9, 0xf4fc5be2a762fcbf, + 0x71cc83bc25abaf14, 0xc69cef44c6f833a1, + }, .{ + 0x9f90cbd6c3ce1d4e, 0x182f65295dff4e84, + 0x4dfe62c59fed0040, 0x18402347c1db1999, + }); + try testType(@Vector(8, u64), .{ + 0x92c6281333943e2c, 0xa97750504668efb5, + 0x234be51057c0181f, 0xefbc1f407f3df4fb, + 0x8da6cc7c39cebb94, 0xb408f7e56feee497, + 0x2363f1f8821592ed, 0x01716e800c0619e1, + }, .{ + 0xa617426684147e7e, 0x7542da7ebe093a7b, + 0x3f21d99ac57606b7, 0x65cd36d697d22de4, + 0xed23d6bdf176c844, 0x2d4573f100ff7b58, + 0x4968f4d21b49f8ab, 0xf5d9a205d453e933, + }); + try testType(@Vector(16, u64), .{ + 0x2f61a4ee66177b4a, 0xf13b286b279f6a93, + 0x36b46beb63665318, 0x74294dbde0da98d2, + 0x3aa872ba60b936eb, 0xe8f698b36e62600b, + 0x9e8930c21a6a1a76, 0x876998b09b8eb03c, + 0xa0244771a2ec0adb, 0xb4c72bff3d3ac1a2, + 0xd70677210830eced, 0x6622abc1734dd72d, + 0x157e2bb0d57d6596, 0x2aac8192fb7ef973, + 0xc4a0ca92f34d7b13, 0x04300f8ad1845246, + }, .{ + 0xeaf71dcf0eb76f5d, 0x0e84b1b63dc97139, + 0x0f64cc38d23c94a1, 0x12775cf0816349b7, + 0xfdcf13387ba48d54, 0xf8d3c672cacd8779, + 0xe728c1f5eb56ab1e, 0x05931a34877f7a69, + 0x1861a763c8dafd1f, 0x4ac97573ecd5739f, + 0x3384414c9bf77b8c, 0x32c15bbd04a5ddc4, + 0xbfd88aee1d82ed32, 0x20e91c15b701059a, + 0xed533d18f8657f3f, 0x1ddd7cd7f6bab957, + }); +} + +inline fn bitAnd(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs & rhs) { + return lhs & rhs; +} +test bitAnd { + try testBinary(bitAnd); +} + +inline fn bitOr(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs | rhs) { + return lhs | rhs; +} +test bitOr { + try testBinary(bitOr); +} + +inline fn bitXor(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs ^ rhs) { + return lhs ^ rhs; +} +test bitXor { + try testBinary(bitXor); +} From b9c44007762b9b302c263a89b7c2e8a8470b97ec Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sat, 21 Dec 2024 15:18:50 -0500 Subject: [PATCH 05/25] x86_64: implement fallback for pcmpeqq --- src/Type.zig | 36 +- src/arch/x86_64/CodeGen.zig | 1176 ++++++++++++++++++--------------- src/arch/x86_64/encoder.zig | 4 +- src/codegen/c/Type.zig | 10 +- src/codegen/llvm.zig | 2 +- src/link/C.zig | 2 +- test/behavior/align.zig | 32 +- test/behavior/x86_64/math.zig | 63 +- 8 files changed, 720 insertions(+), 605 deletions(-) diff --git a/src/Type.zig b/src/Type.zig index 350e3f10ec..2980034a34 100644 --- a/src/Type.zig +++ b/src/Type.zig @@ -962,7 +962,6 @@ pub fn abiAlignmentInner( ) SemaError!AbiAlignmentInner { const pt = strat.pt(zcu, tid); const target = zcu.getTarget(); - const use_llvm = zcu.comp.config.use_llvm; const ip = &zcu.intern_pool; switch (ty.toIntern()) { @@ -970,7 +969,7 @@ pub fn abiAlignmentInner( else => switch (ip.indexToKey(ty.toIntern())) { .int_type => |int_type| { if (int_type.bits == 0) return .{ .scalar = .@"1" }; - return .{ .scalar = intAbiAlignment(int_type.bits, target, use_llvm) }; + return .{ .scalar = intAbiAlignment(int_type.bits, target) }; }, .ptr_type, .anyframe_type => { return .{ .scalar = ptrAbiAlignment(target) }; @@ -1023,7 +1022,7 @@ pub fn abiAlignmentInner( .error_set_type, .inferred_error_set_type => { const bits = zcu.errorSetBits(); if (bits == 0) return .{ .scalar = .@"1" }; - return .{ .scalar = intAbiAlignment(bits, target, use_llvm) }; + return .{ .scalar = intAbiAlignment(bits, target) }; }, // represents machine code; not a pointer @@ -1036,7 +1035,7 @@ pub fn abiAlignmentInner( .usize, .isize, - => return .{ .scalar = intAbiAlignment(target.ptrBitWidth(), target, use_llvm) }, + => return .{ .scalar = intAbiAlignment(target.ptrBitWidth(), target) }, .c_char => return .{ .scalar = cTypeAlign(target, .char) }, .c_short => return .{ .scalar = cTypeAlign(target, .short) }, @@ -1067,7 +1066,7 @@ pub fn abiAlignmentInner( .anyerror, .adhoc_inferred_error_set => { const bits = zcu.errorSetBits(); if (bits == 0) return .{ .scalar = .@"1" }; - return .{ .scalar = intAbiAlignment(bits, target, use_llvm) }; + return .{ .scalar = intAbiAlignment(bits, target) }; }, .void, @@ -1291,7 +1290,6 @@ pub fn abiSizeInner( tid: strat.Tid(), ) SemaError!AbiSizeInner { const target = zcu.getTarget(); - const use_llvm = zcu.comp.config.use_llvm; const ip = &zcu.intern_pool; switch (ty.toIntern()) { @@ -1300,7 +1298,7 @@ pub fn abiSizeInner( else => switch (ip.indexToKey(ty.toIntern())) { .int_type => |int_type| { if (int_type.bits == 0) return .{ .scalar = 0 }; - return .{ .scalar = intAbiSize(int_type.bits, target, use_llvm) }; + return .{ .scalar = intAbiSize(int_type.bits, target) }; }, .ptr_type => |ptr_type| switch (ptr_type.flags.size) { .slice => return .{ .scalar = @divExact(target.ptrBitWidth(), 8) * 2 }, @@ -1362,7 +1360,7 @@ pub fn abiSizeInner( .error_set_type, .inferred_error_set_type => { const bits = zcu.errorSetBits(); if (bits == 0) return .{ .scalar = 0 }; - return .{ .scalar = intAbiSize(bits, target, use_llvm) }; + return .{ .scalar = intAbiSize(bits, target) }; }, .error_union_type => |error_union_type| { @@ -1455,7 +1453,7 @@ pub fn abiSizeInner( .anyerror, .adhoc_inferred_error_set => { const bits = zcu.errorSetBits(); if (bits == 0) return .{ .scalar = 0 }; - return .{ .scalar = intAbiSize(bits, target, use_llvm) }; + return .{ .scalar = intAbiSize(bits, target) }; }, .noreturn => unreachable, @@ -1609,11 +1607,11 @@ pub fn ptrAbiAlignment(target: Target) Alignment { return Alignment.fromNonzeroByteUnits(@divExact(target.ptrBitWidth(), 8)); } -pub fn intAbiSize(bits: u16, target: Target, use_llvm: bool) u64 { - return intAbiAlignment(bits, target, use_llvm).forward(@as(u16, @intCast((@as(u17, bits) + 7) / 8))); +pub fn intAbiSize(bits: u16, target: Target) u64 { + return intAbiAlignment(bits, target).forward(@as(u16, @intCast((@as(u17, bits) + 7) / 8))); } -pub fn intAbiAlignment(bits: u16, target: Target, use_llvm: bool) Alignment { +pub fn intAbiAlignment(bits: u16, target: Target) Alignment { return switch (target.cpu.arch) { .x86 => switch (bits) { 0 => .none, @@ -1632,19 +1630,16 @@ pub fn intAbiAlignment(bits: u16, target: Target, use_llvm: bool) Alignment { 9...16 => .@"2", 17...32 => .@"4", 33...64 => .@"8", - else => switch (target_util.zigBackend(target, use_llvm)) { - .stage2_x86_64 => .@"8", - else => .@"16", - }, + else => .@"16", }, else => return Alignment.fromByteUnits(@min( std.math.ceilPowerOfTwoPromote(u16, @as(u16, @intCast((@as(u17, bits) + 7) / 8))), - maxIntAlignment(target, use_llvm), + maxIntAlignment(target), )), }; } -pub fn maxIntAlignment(target: std.Target, use_llvm: bool) u16 { +pub fn maxIntAlignment(target: std.Target) u16 { return switch (target.cpu.arch) { .avr => 1, .msp430 => 2, @@ -1685,10 +1680,7 @@ pub fn maxIntAlignment(target: std.Target, use_llvm: bool) u16 { else => 8, }, - .x86_64 => switch (target_util.zigBackend(target, use_llvm)) { - .stage2_x86_64 => 8, - else => 16, - }, + .x86_64 => 16, // Even LLVMABIAlignmentOfType(i128) agrees on these targets. .x86, diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 51e4c5ce68..dffcbf7b78 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2097,11 +2097,11 @@ fn gen(self: *CodeGen) InnerError!void { // The address where to store the return value for the caller is in a // register which the callee is free to clobber. Therefore, we purposely // spill it to stack immediately. - const frame_index = try self.allocFrameIndex(.initSpill(Type.usize, zcu)); + const frame_index = try self.allocFrameIndex(.initSpill(.usize, zcu)); try self.genSetMem( .{ .frame = frame_index }, 0, - Type.usize, + .usize, self.ret_mcv.long.address().offset(-self.ret_mcv.short.indirect.off), .{}, ); @@ -2122,13 +2122,7 @@ fn gen(self: *CodeGen) InnerError!void { info.reg_save_area = .{ .index = reg_save_area_fi }; for (abi.SysV.c_abi_int_param_regs[info.gp_count..], info.gp_count..) |reg, reg_i| - try self.genSetMem( - .{ .frame = reg_save_area_fi }, - @intCast(reg_i * 8), - Type.usize, - .{ .register = reg }, - .{}, - ); + try self.genSetMem(.{ .frame = reg_save_area_fi }, @intCast(reg_i * 8), .usize, .{ .register = reg }, .{}); try self.asmRegisterImmediate(.{ ._, .cmp }, .al, .u(info.fp_count)); const skip_sse_reloc = try self.asmJccReloc(.na, undefined); @@ -2556,6 +2550,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .ops = &.{ .xmm, .xmm, .{ .implicit = 0 } } }, }, }, + .{ + .required_features = &.{.sse}, + .mir_tag = .{ ._ps, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .xmm, .{ .implicit = 0 }, .mem } }, + .{ .ops = &.{ .xmm, .mem, .{ .implicit = 0 } } }, + .{ .ops = &.{ .xmm, .{ .implicit = 0 }, .xmm } }, + .{ .ops = &.{ .xmm, .xmm, .{ .implicit = 0 } } }, + }, + }, .{ .required_features = &.{.mmx}, .mir_tag = .{ .p_, mir_tag }, @@ -2615,6 +2619,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .ops = &.{ .xmm_limb, .{ .implicit = 0 }, .xmm_limb } }, }, }, + .{ + .required_features = &.{.sse}, + .loop = .bitwise, + .mir_tag = .{ ._ps, mir_tag }, + .patterns = &.{ + .{ .ops = &.{ .xmm_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .xmm_limb, .mem_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .xmm_limb, .{ .implicit = 0 }, .xmm_limb } }, + }, + }, .{ .required_features = &.{.mmx}, .loop = .bitwise, @@ -3070,11 +3084,58 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .gpr_limb } }, }, }, - }, .{ .invert_result = switch (cmp_op) { - .eq => false, - .neq => true, - else => unreachable, - } }), + .{ + .scalar = .{ .any_int = .byte }, + .clobbers = .{ .eflags = true }, + .loop = .elementwise, + .mir_tag = .{ ._, .cmp }, + .patterns = &.{ + .{ .ops = &.{ .cc_mask_limb, .mem_limb, .gpr_limb } }, + .{ .ops = &.{ .cc_mask_limb, .gpr_limb, .mem_limb } }, + .{ .ops = &.{ .cc_mask_limb, .gpr_limb, .gpr_limb } }, + }, + }, + .{ + .scalar = .{ .any_int = .word }, + .clobbers = .{ .eflags = true }, + .loop = .elementwise, + .mir_tag = .{ ._, .cmp }, + .patterns = &.{ + .{ .ops = &.{ .cc_mask_limb, .mem_limb, .gpr_limb } }, + .{ .ops = &.{ .cc_mask_limb, .gpr_limb, .mem_limb } }, + .{ .ops = &.{ .cc_mask_limb, .gpr_limb, .gpr_limb } }, + }, + }, + .{ + .scalar = .{ .any_int = .dword }, + .clobbers = .{ .eflags = true }, + .loop = .elementwise, + .mir_tag = .{ ._, .cmp }, + .patterns = &.{ + .{ .ops = &.{ .cc_mask_limb, .mem_limb, .gpr_limb } }, + .{ .ops = &.{ .cc_mask_limb, .gpr_limb, .mem_limb } }, + .{ .ops = &.{ .cc_mask_limb, .gpr_limb, .gpr_limb } }, + }, + }, + .{ + .scalar = .{ .any_int = .qword }, + .clobbers = .{ .eflags = true }, + .loop = .elementwise, + .mir_tag = .{ ._, .cmp }, + .patterns = &.{ + .{ .ops = &.{ .cc_mask_limb, .mem_limb, .gpr_limb } }, + .{ .ops = &.{ .cc_mask_limb, .gpr_limb, .mem_limb } }, + .{ .ops = &.{ .cc_mask_limb, .gpr_limb, .gpr_limb } }, + }, + }, + }, .{ + .cc = .e, + .invert_result = switch (cmp_op) { + .eq => false, + .neq => true, + else => unreachable, + }, + }), .gte => unreachable, .gt => unreachable, } @@ -3242,7 +3303,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { const opt_child_ty = opt_ty.optionalChild(zcu); const opt_child_abi_size: i32 = @intCast(opt_child_ty.abiSize(zcu)); try ops[0].toOffset(opt_child_abi_size, cg); - var has_value = try cg.tempFromValue(Type.bool, .{ .immediate = 1 }); + var has_value = try cg.tempFromValue(.bool, .{ .immediate = 1 }); try ops[0].store(&has_value, cg); try has_value.die(cg); try ops[0].toOffset(-opt_child_abi_size, cg); @@ -3426,7 +3487,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .array_to_slice => if (use_old) try cg.airArrayToSlice(inst) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); - var len = try cg.tempFromValue(Type.usize, .{ + var len = try cg.tempFromValue(.usize, .{ .immediate = cg.typeOf(ty_op.operand).childType(zcu).arrayLen(zcu), }); try ops[0].toPair(&len, cg); @@ -3510,17 +3571,11 @@ fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void { try self.genSetMem( .{ .reg = ret_reg }, 0, - Type.usize, + .usize, .{ .register_offset = .{ .reg = data_reg, .off = data_off } }, .{}, ); - try self.genSetMem( - .{ .reg = ret_reg }, - 8, - Type.usize, - .{ .immediate = tag_name_len }, - .{}, - ); + try self.genSetMem(.{ .reg = ret_reg }, 8, .usize, .{ .immediate = tag_name_len }, .{}); exitlude_jump_reloc.* = try self.asmJmpReloc(undefined); self.performReloc(skip_reloc); @@ -4495,8 +4550,8 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void { high_mcv.getReg().? else try self.copyToTmpRegister(switch (src_int_info.signedness) { - .signed => Type.isize, - .unsigned => Type.usize, + .signed => .isize, + .unsigned => .usize, }, high_mcv); const high_lock = self.register_manager.lockRegAssumeUnused(high_reg); defer self.register_manager.unlockReg(high_lock); @@ -4504,7 +4559,7 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void { const high_bits = src_int_info.bits % 64; if (high_bits > 0) { try self.truncateRegister(src_ty, high_reg); - const high_ty = if (dst_int_info.bits >= 64) Type.usize else dst_ty; + const high_ty: Type = if (dst_int_info.bits >= 64) .usize else dst_ty; try self.genCopy(high_ty, high_mcv, .{ .register = high_reg }, .{}); } @@ -4513,13 +4568,7 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void { switch (extend) { .signed => extend: { const extend_mcv = MCValue{ .register = high_reg }; - try self.genShiftBinOpMir( - .{ ._r, .sa }, - Type.isize, - extend_mcv, - Type.u8, - .{ .immediate = 63 }, - ); + try self.genShiftBinOpMir(.{ ._r, .sa }, .isize, extend_mcv, .u8, .{ .immediate = 63 }); break :extend extend_mcv; }, .unsigned => .{ .immediate = 0 }, @@ -4621,7 +4670,7 @@ fn airTrunc(self: *CodeGen, inst: Air.Inst.Index) !void { const splat_mcv = try self.genTypedValue(.fromInterned(splat_val)); const splat_addr_mcv: MCValue = switch (splat_mcv) { .memory, .indirect, .load_frame => splat_mcv.address(), - else => .{ .register = try self.copyToTmpRegister(Type.usize, splat_mcv.address()) }, + else => .{ .register = try self.copyToTmpRegister(.usize, splat_mcv.address()) }, }; const dst_reg = dst_mcv.getReg().?; @@ -4837,7 +4886,7 @@ fn airMulDivBinOp(self: *CodeGen, inst: Air.Inst.Index) !void { state: State, reloc: Mir.Inst.Index, } = if (signed and tag == .div_floor) state: { - const frame_index = try self.allocFrameIndex(.initType(Type.usize, zcu)); + const frame_index = try self.allocFrameIndex(.initType(.usize, zcu)); try self.asmMemoryImmediate( .{ ._, .mov }, .{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{ .size = .qword } } }, @@ -4852,7 +4901,7 @@ fn airMulDivBinOp(self: *CodeGen, inst: Air.Inst.Index) !void { const mat_lhs_mcv = switch (lhs_mcv) { .load_symbol => mat_lhs_mcv: { // TODO clean this up! - const addr_reg = try self.copyToTmpRegister(Type.usize, lhs_mcv.address()); + const addr_reg = try self.copyToTmpRegister(.usize, lhs_mcv.address()); break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; }, else => lhs_mcv, @@ -4876,7 +4925,7 @@ fn airMulDivBinOp(self: *CodeGen, inst: Air.Inst.Index) !void { const mat_rhs_mcv = switch (rhs_mcv) { .load_symbol => mat_rhs_mcv: { // TODO clean this up! - const addr_reg = try self.copyToTmpRegister(Type.usize, rhs_mcv.address()); + const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address()); break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; }, else => rhs_mcv, @@ -4975,7 +5024,7 @@ fn airMulDivBinOp(self: *CodeGen, inst: Air.Inst.Index) !void { const mat_rhs_mcv = switch (rhs_mcv) { .load_symbol => mat_rhs_mcv: { // TODO clean this up! - const addr_reg = try self.copyToTmpRegister(Type.usize, rhs_mcv.address()); + const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address()); break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; }, else => rhs_mcv, @@ -5062,22 +5111,10 @@ fn airAddSat(self: *CodeGen, inst: Air.Inst.Index) !void { const reg_extra_bits = self.regExtraBits(ty); const cc: Condition = if (ty.isSignedInt(zcu)) cc: { if (reg_extra_bits > 0) { - try self.genShiftBinOpMir( - .{ ._l, .sa }, - ty, - dst_mcv, - Type.u8, - .{ .immediate = reg_extra_bits }, - ); + try self.genShiftBinOpMir(.{ ._l, .sa }, ty, dst_mcv, .u8, .{ .immediate = reg_extra_bits }); } try self.genSetReg(limit_reg, ty, dst_mcv, .{}); - try self.genShiftBinOpMir( - .{ ._r, .sa }, - ty, - limit_mcv, - Type.u8, - .{ .immediate = reg_bits - 1 }, - ); + try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .u8, .{ .immediate = reg_bits - 1 }); try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{ .immediate = (@as(u64, 1) << @intCast(reg_bits - 1)) - 1, }); @@ -5087,13 +5124,7 @@ fn airAddSat(self: *CodeGen, inst: Air.Inst.Index) !void { const shifted_rhs_lock = self.register_manager.lockRegAssumeUnused(shifted_rhs_reg); defer self.register_manager.unlockReg(shifted_rhs_lock); - try self.genShiftBinOpMir( - .{ ._l, .sa }, - ty, - shifted_rhs_mcv, - Type.u8, - .{ .immediate = reg_extra_bits }, - ); + try self.genShiftBinOpMir(.{ ._l, .sa }, ty, shifted_rhs_mcv, .u8, .{ .immediate = reg_extra_bits }); try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, shifted_rhs_mcv); } else try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, rhs_mcv); break :cc .o; @@ -5117,13 +5148,8 @@ fn airAddSat(self: *CodeGen, inst: Air.Inst.Index) !void { registerAlias(limit_reg, cmov_abi_size), ); - if (reg_extra_bits > 0 and ty.isSignedInt(zcu)) try self.genShiftBinOpMir( - .{ ._r, .sa }, - ty, - dst_mcv, - Type.u8, - .{ .immediate = reg_extra_bits }, - ); + if (reg_extra_bits > 0 and ty.isSignedInt(zcu)) + try self.genShiftBinOpMir(.{ ._r, .sa }, ty, dst_mcv, .u8, .{ .immediate = reg_extra_bits }); return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -5163,22 +5189,10 @@ fn airSubSat(self: *CodeGen, inst: Air.Inst.Index) !void { const reg_extra_bits = self.regExtraBits(ty); const cc: Condition = if (ty.isSignedInt(zcu)) cc: { if (reg_extra_bits > 0) { - try self.genShiftBinOpMir( - .{ ._l, .sa }, - ty, - dst_mcv, - Type.u8, - .{ .immediate = reg_extra_bits }, - ); + try self.genShiftBinOpMir(.{ ._l, .sa }, ty, dst_mcv, .u8, .{ .immediate = reg_extra_bits }); } try self.genSetReg(limit_reg, ty, dst_mcv, .{}); - try self.genShiftBinOpMir( - .{ ._r, .sa }, - ty, - limit_mcv, - Type.u8, - .{ .immediate = reg_bits - 1 }, - ); + try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .u8, .{ .immediate = reg_bits - 1 }); try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{ .immediate = (@as(u64, 1) << @intCast(reg_bits - 1)) - 1, }); @@ -5188,13 +5202,7 @@ fn airSubSat(self: *CodeGen, inst: Air.Inst.Index) !void { const shifted_rhs_lock = self.register_manager.lockRegAssumeUnused(shifted_rhs_reg); defer self.register_manager.unlockReg(shifted_rhs_lock); - try self.genShiftBinOpMir( - .{ ._l, .sa }, - ty, - shifted_rhs_mcv, - Type.u8, - .{ .immediate = reg_extra_bits }, - ); + try self.genShiftBinOpMir(.{ ._l, .sa }, ty, shifted_rhs_mcv, .u8, .{ .immediate = reg_extra_bits }); try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, shifted_rhs_mcv); } else try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, rhs_mcv); break :cc .o; @@ -5211,13 +5219,8 @@ fn airSubSat(self: *CodeGen, inst: Air.Inst.Index) !void { registerAlias(limit_reg, cmov_abi_size), ); - if (reg_extra_bits > 0 and ty.isSignedInt(zcu)) try self.genShiftBinOpMir( - .{ ._r, .sa }, - ty, - dst_mcv, - Type.u8, - .{ .immediate = reg_extra_bits }, - ); + if (reg_extra_bits > 0 and ty.isSignedInt(zcu)) + try self.genShiftBinOpMir(.{ ._r, .sa }, ty, dst_mcv, .u8, .{ .immediate = reg_extra_bits }); return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -5230,14 +5233,14 @@ fn airMulSat(self: *CodeGen, inst: Air.Inst.Index) !void { const result = result: { if (ty.toIntern() == .i128_type) { - const ptr_c_int = try pt.singleMutPtrType(Type.c_int); - const overflow = try self.allocTempRegOrMem(Type.c_int, false); + const ptr_c_int = try pt.singleMutPtrType(.c_int); + const overflow = try self.allocTempRegOrMem(.c_int, false); const dst_mcv = try self.genCall(.{ .lib = .{ .return_type = .i128_type, .param_types = &.{ .i128_type, .i128_type, ptr_c_int.toIntern() }, .callee = "__muloti4", - } }, &.{ Type.i128, Type.i128, ptr_c_int }, &.{ + } }, &.{ .i128, .i128, ptr_c_int }, &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs }, overflow.address(), @@ -5253,7 +5256,7 @@ fn airMulSat(self: *CodeGen, inst: Air.Inst.Index) !void { const mat_lhs_mcv = switch (lhs_mcv) { .load_symbol => mat_lhs_mcv: { // TODO clean this up! - const addr_reg = try self.copyToTmpRegister(Type.usize, lhs_mcv.address()); + const addr_reg = try self.copyToTmpRegister(.usize, lhs_mcv.address()); break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; }, else => lhs_mcv, @@ -5277,7 +5280,7 @@ fn airMulSat(self: *CodeGen, inst: Air.Inst.Index) !void { const mat_rhs_mcv = switch (rhs_mcv) { .load_symbol => mat_rhs_mcv: { // TODO clean this up! - const addr_reg = try self.copyToTmpRegister(Type.usize, rhs_mcv.address()); + const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address()); break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; }, else => rhs_mcv, @@ -5339,13 +5342,7 @@ fn airMulSat(self: *CodeGen, inst: Air.Inst.Index) !void { const cc: Condition = if (ty.isSignedInt(zcu)) cc: { try self.genSetReg(limit_reg, ty, lhs_mcv, .{}); try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, rhs_mcv); - try self.genShiftBinOpMir( - .{ ._r, .sa }, - ty, - limit_mcv, - Type.u8, - .{ .immediate = reg_bits - 1 }, - ); + try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .u8, .{ .immediate = reg_bits - 1 }); try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{ .immediate = (@as(u64, 1) << @intCast(reg_bits - 1)) - 1, }); @@ -5410,7 +5407,7 @@ fn airAddSubWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { try self.genSetMem( .{ .frame = frame_index }, @intCast(tuple_ty.structFieldOffset(1, zcu)), - Type.u1, + .u1, .{ .eflags = cc }, .{}, ); @@ -5558,12 +5555,7 @@ fn genSetFrameTruncatedOverflowCompare( const eq_reg = temp_regs[2]; if (overflow_cc) |_| { try self.asmSetccRegister(.ne, eq_reg.to8()); - try self.genBinOpMir( - .{ ._, .@"or" }, - Type.u8, - .{ .register = overflow_reg }, - .{ .register = eq_reg }, - ); + try self.genBinOpMir(.{ ._, .@"or" }, .u8, .{ .register = overflow_reg }, .{ .register = eq_reg }); } const payload_off: i32 = @intCast(tuple_ty.structFieldOffset(0, zcu)); @@ -5743,13 +5735,13 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { if (src_bits > 64 and src_bits <= 128 and dst_info.bits > 64 and dst_info.bits <= 128) switch (dst_info.signedness) { .signed => { - const ptr_c_int = try pt.singleMutPtrType(Type.c_int); - const overflow = try self.allocTempRegOrMem(Type.c_int, false); + const ptr_c_int = try pt.singleMutPtrType(.c_int); + const overflow = try self.allocTempRegOrMem(.c_int, false); const result = try self.genCall(.{ .lib = .{ .return_type = .i128_type, .param_types = &.{ .i128_type, .i128_type, ptr_c_int.toIntern() }, .callee = "__muloti4", - } }, &.{ Type.i128, Type.i128, ptr_c_int }, &.{ + } }, &.{ .i128, .i128, ptr_c_int }, &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs }, overflow.address(), @@ -5765,7 +5757,7 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { ); try self.asmMemoryImmediate( .{ ._, .cmp }, - try overflow.mem(self, .{ .size = self.memSize(Type.c_int) }), + try overflow.mem(self, .{ .size = self.memSize(.c_int) }), .s(0), ); try self.genSetMem( @@ -5794,7 +5786,7 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { const mat_lhs_mcv = switch (lhs_mcv) { .load_symbol => mat_lhs_mcv: { // TODO clean this up! - const addr_reg = try self.copyToTmpRegister(Type.usize, lhs_mcv.address()); + const addr_reg = try self.copyToTmpRegister(.usize, lhs_mcv.address()); break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; }, else => lhs_mcv, @@ -5807,7 +5799,7 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { const mat_rhs_mcv = switch (rhs_mcv) { .load_symbol => mat_rhs_mcv: { // TODO clean this up! - const addr_reg = try self.copyToTmpRegister(Type.usize, rhs_mcv.address()); + const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address()); break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; }, else => rhs_mcv, @@ -6100,7 +6092,7 @@ fn airShlShrBinOp(self: *CodeGen, inst: Air.Inst.Index) !void { defer self.register_manager.unlockReg(tmp_lock); const lhs_bits: u31 = @intCast(lhs_ty.bitSize(zcu)); - const tmp_ty = if (lhs_bits > 64) Type.usize else lhs_ty; + const tmp_ty: Type = if (lhs_bits > 64) .usize else lhs_ty; const off = frame_addr.off + (lhs_bits - 1) / 64 * 8; try self.genSetReg( tmp_reg, @@ -6283,13 +6275,12 @@ fn airShlShrBinOp(self: *CodeGen, inst: Air.Inst.Index) !void { const mask_mcv = try self.genTypedValue(.fromInterned(try pt.intern(.{ .aggregate = .{ .ty = mask_ty.toIntern(), .storage = .{ .elems = &([1]InternPool.Index{ - (try rhs_ty.childType(zcu).maxIntScalar(pt, Type.u8)).toIntern(), + (try rhs_ty.childType(zcu).maxIntScalar(pt, .u8)).toIntern(), } ++ [1]InternPool.Index{ - (try pt.intValue(Type.u8, 0)).toIntern(), + (try pt.intValue(.u8, 0)).toIntern(), } ** 15) }, } }))); - const mask_addr_reg = - try self.copyToTmpRegister(Type.usize, mask_mcv.address()); + const mask_addr_reg = try self.copyToTmpRegister(.usize, mask_mcv.address()); const mask_addr_lock = self.register_manager.lockRegAssumeUnused(mask_addr_reg); defer self.register_manager.unlockReg(mask_addr_lock); @@ -6423,7 +6414,7 @@ fn airOptionalPayloadPtrSet(self: *CodeGen, inst: Air.Inst.Index) !void { try self.genSetMem( .{ .reg = dst_mcv.getReg().? }, pl_abi_size, - Type.bool, + .bool, .{ .immediate = 1 }, .{}, ); @@ -6462,9 +6453,9 @@ fn airUnwrapErrUnionErr(self: *CodeGen, inst: Air.Inst.Index) !void { .{ ._r, .sh }, err_union_ty, result, - Type.u8, + .u8, .{ .immediate = @as(u6, @intCast(err_off * 8)) }, - ) else try self.truncateRegister(Type.anyerror, result.register); + ) else try self.truncateRegister(.anyerror, result.register); break :result result; }, .load_frame => |frame_addr| break :result .{ .load_frame = .{ @@ -6623,7 +6614,7 @@ fn genUnwrapErrUnionPayloadMir( .{ ._r, .sh }, err_union_ty, result_mcv, - Type.u8, + .u8, .{ .immediate = @as(u6, @intCast(payload_off * 8)) }, ) else try self.truncateRegister(payload_ty, result_mcv.register); break :result if (payload_in_gp) @@ -6929,7 +6920,7 @@ fn genSliceElemPtr(self: *CodeGen, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValu defer self.register_manager.unlockReg(offset_reg_lock); const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - try self.genSetReg(addr_reg, Type.usize, slice_mcv, .{}); + try self.genSetReg(addr_reg, .usize, slice_mcv, .{}); // TODO we could allocate register here, but need to expect addr register and potentially // offset register. try self.genBinOpMir(.{ ._, .add }, slice_ptr_field_type, .{ .register = addr_reg }, .{ @@ -7064,7 +7055,7 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { .{ ._, .bt }, .{ .base = .{ - .reg = try self.copyToTmpRegister(Type.usize, array_mat_mcv.address()), + .reg = try self.copyToTmpRegister(.usize, array_mat_mcv.address()), }, .mod = .{ .rm = .{ .size = .qword } }, }, @@ -7074,7 +7065,7 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { .{ ._, .bt }, .{ .base = .{ - .reg = try self.copyToTmpRegister(Type.usize, array_mat_mcv.address()), + .reg = try self.copyToTmpRegister(.usize, array_mat_mcv.address()), }, .mod = .{ .rm = .{ .size = .qword } }, }, @@ -7122,7 +7113,7 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { .load_direct, .load_got, .load_tlv, - => try self.genSetReg(addr_reg, Type.usize, array_mcv.address(), .{}), + => try self.genSetReg(addr_reg, .usize, array_mcv.address(), .{}), .lea_symbol, .lea_direct, .lea_tlv => unreachable, else => return self.fail("TODO airArrayElemVal_val for {s} of {}", .{ @tagName(array_mcv), array_ty.fmt(pt), @@ -7136,12 +7127,7 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { // TODO we could allocate register here, but need to expect addr register and potentially // offset register. const dst_mcv = try self.allocRegOrMem(inst, false); - try self.genBinOpMir( - .{ ._, .add }, - Type.usize, - .{ .register = addr_reg }, - .{ .register = offset_reg }, - ); + try self.genBinOpMir(.{ ._, .add }, .usize, .{ .register = addr_reg }, .{ .register = offset_reg }); try self.genCopy(elem_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } }, .{}); break :result dst_mcv; }; @@ -7329,13 +7315,7 @@ fn airGetUnionTag(self: *CodeGen, inst: Air.Inst.Index) !void { .register => { const shift: u6 = @intCast(layout.tagOffset() * 8); const result = try self.copyToRegisterWithInstTracking(inst, union_ty, operand); - try self.genShiftBinOpMir( - .{ ._r, .sh }, - Type.usize, - result, - Type.u8, - .{ .immediate = shift }, - ); + try self.genShiftBinOpMir(.{ ._r, .sh }, .usize, result, .u8, .{ .immediate = shift }); break :blk MCValue{ .register = registerAlias(result.register, @intCast(layout.tag_size)), }; @@ -7444,7 +7424,7 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void { if (src_bits <= 8) { const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv); try self.truncateRegister(src_ty, wide_reg); - try self.genBinOpMir(.{ ._, .lzcnt }, Type.u32, dst_mcv, .{ .register = wide_reg }); + try self.genBinOpMir(.{ ._, .lzcnt }, .u32, dst_mcv, .{ .register = wide_reg }); try self.genBinOpMir( .{ ._, .sub }, dst_ty, @@ -7464,25 +7444,15 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void { const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - try self.genBinOpMir( - .{ ._, .lzcnt }, - Type.u64, - dst_mcv, - if (mat_src_mcv.isBase()) - mat_src_mcv - else - .{ .register = mat_src_mcv.register_pair[0] }, - ); + try self.genBinOpMir(.{ ._, .lzcnt }, .u64, dst_mcv, if (mat_src_mcv.isBase()) + mat_src_mcv + else + .{ .register = mat_src_mcv.register_pair[0] }); try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 }); - try self.genBinOpMir( - .{ ._, .lzcnt }, - Type.u64, - tmp_mcv, - if (mat_src_mcv.isBase()) - mat_src_mcv.address().offset(8).deref() - else - .{ .register = mat_src_mcv.register_pair[1] }, - ); + try self.genBinOpMir(.{ ._, .lzcnt }, .u64, tmp_mcv, if (mat_src_mcv.isBase()) + mat_src_mcv.address().offset(8).deref() + else + .{ .register = mat_src_mcv.register_pair[1] }); try self.asmCmovccRegisterRegister(.nc, dst_reg.to32(), tmp_reg.to32()); if (src_bits < 128) try self.genBinOpMir( @@ -7510,7 +7480,7 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void { defer self.register_manager.unlockReg(wide_lock); try self.truncateRegister(src_ty, wide_reg); - try self.genBinOpMir(.{ ._, .bsr }, Type.u16, dst_mcv, .{ .register = wide_reg }); + try self.genBinOpMir(.{ ._, .bsr }, .u16, dst_mcv, .{ .register = wide_reg }); } else try self.genBinOpMir(.{ ._, .bsr }, src_ty, dst_mcv, mat_src_mcv); try self.asmCmovccRegisterRegister( @@ -7534,7 +7504,7 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void { try self.truncateRegister(src_ty, wide_reg); try self.genBinOpMir( .{ ._, .bsr }, - if (src_bits <= 8) Type.u16 else src_ty, + if (src_bits <= 8) .u16 else src_ty, dst_mcv, .{ .register = wide_reg }, ); @@ -7645,7 +7615,7 @@ fn airCtz(self: *CodeGen, inst: Air.Inst.Index) !void { break :result dst_mcv; } - const wide_ty = if (src_bits <= 8) Type.u16 else src_ty; + const wide_ty: Type = if (src_bits <= 8) .u16 else src_ty; if (has_bmi) { if (src_bits <= 64) { const extra_bits = self.regExtraBits(src_ty) + @as(u64, if (src_bits <= 8) 8 else 0); @@ -7682,18 +7652,18 @@ fn airCtz(self: *CodeGen, inst: Air.Inst.Index) !void { else .{ .register = mat_src_mcv.register_pair[1] }; const masked_mcv = if (src_bits < 128) masked: { - try self.genCopy(Type.u64, dst_mcv, hi_mat_src_mcv, .{}); + try self.genCopy(.u64, dst_mcv, hi_mat_src_mcv, .{}); try self.genBinOpMir( .{ ._, .@"or" }, - Type.u64, + .u64, dst_mcv, .{ .immediate = @as(u64, std.math.maxInt(u64)) << @intCast(src_bits - 64) }, ); break :masked dst_mcv; } else hi_mat_src_mcv; - try self.genBinOpMir(.{ ._, .tzcnt }, Type.u64, dst_mcv, masked_mcv); + try self.genBinOpMir(.{ ._, .tzcnt }, .u64, dst_mcv, masked_mcv); try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 }); - try self.genBinOpMir(.{ ._, .tzcnt }, Type.u64, tmp_mcv, lo_mat_src_mcv); + try self.genBinOpMir(.{ ._, .tzcnt }, .u64, tmp_mcv, lo_mat_src_mcv); try self.asmCmovccRegisterRegister(.nc, dst_reg.to32(), tmp_reg.to32()); } break :result dst_mcv; @@ -7766,7 +7736,7 @@ fn airPopCount(self: *CodeGen, inst: Air.Inst.Index) !void { const tmp_locks = self.register_manager.lockRegsAssumeUnused(2, tmp_regs); defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock); - try self.genPopCount(tmp_regs[0], Type.usize, if (mat_src_mcv.isBase()) + try self.genPopCount(tmp_regs[0], .usize, if (mat_src_mcv.isBase()) mat_src_mcv else .{ .register = mat_src_mcv.register_pair[0] }, false); @@ -7794,7 +7764,7 @@ fn genPopCount( const src_abi_size: u32 = @intCast(src_ty.abiSize(pt.zcu)); if (self.hasFeature(.popcnt)) return self.genBinOpMir( .{ ._, .popcnt }, - if (src_abi_size > 1) src_ty else Type.u32, + if (src_abi_size > 1) src_ty else .u32, .{ .register = dst_reg }, if (src_abi_size > 1) src_mcv else src: { if (!dst_contains_src) try self.genSetReg(dst_reg, src_ty, src_mcv, .{}); @@ -8057,7 +8027,7 @@ fn airByteSwap(self: *CodeGen, inst: Air.Inst.Index) !void { } }, src_ty, dst_mcv, - if (src_bits > 256) Type.u16 else Type.u8, + if (src_bits > 256) .u16 else .u8, .{ .immediate = src_ty.abiSize(zcu) * 8 - src_bits }, ); return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); @@ -8181,7 +8151,7 @@ fn airBitReverse(self: *CodeGen, inst: Air.Inst.Index) !void { if (extra_bits > 0) try self.genShiftBinOpMir(switch (signedness) { .signed => .{ ._r, .sa }, .unsigned => .{ ._r, .sh }, - }, src_ty, dst_mcv, Type.u8, .{ .immediate = extra_bits }); + }, src_ty, dst_mcv, .u8, .{ .immediate = extra_bits }); return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } @@ -8252,7 +8222,7 @@ fn floatSign(self: *CodeGen, inst: Air.Inst.Index, operand: Air.Inst.Ref, ty: Ty try sign_mcv.mem(self, .{ .size = .fromSize(abi_size) }) else .{ - .base = .{ .reg = try self.copyToTmpRegister(Type.usize, sign_mcv.address()) }, + .base = .{ .reg = try self.copyToTmpRegister(.usize, sign_mcv.address()) }, .mod = .{ .rm = .{ .size = .fromSize(abi_size) } }, }; @@ -8947,9 +8917,9 @@ fn packedLoad(self: *CodeGen, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) defer self.register_manager.unlockReg(tmp_lock); const hi_mcv = dst_mcv.address().offset(@intCast(val_bit_size / 64 * 8)).deref(); - try self.genSetReg(tmp_reg, Type.usize, hi_mcv, .{}); + try self.genSetReg(tmp_reg, .usize, hi_mcv, .{}); try self.truncateRegister(val_ty, tmp_reg); - try self.genCopy(Type.usize, hi_mcv, .{ .register = tmp_reg }, .{}); + try self.genCopy(.usize, hi_mcv, .{ .register = tmp_reg }, .{}); } } return; @@ -9104,13 +9074,13 @@ fn airLoad(self: *CodeGen, inst: Air.Inst.Index) !void { const high_reg = if (high_mcv.isRegister()) high_mcv.getReg().? else - try self.copyToTmpRegister(Type.usize, high_mcv); + try self.copyToTmpRegister(.usize, high_mcv); const high_lock = self.register_manager.lockReg(high_reg); defer if (high_lock) |lock| self.register_manager.unlockReg(lock); try self.truncateRegister(elem_ty, high_reg); if (!high_mcv.isRegister()) try self.genCopy( - if (elem_size <= 8) elem_ty else Type.usize, + if (elem_size <= 8) elem_ty else .usize, high_mcv, .{ .register = high_reg }, .{}, @@ -9183,14 +9153,14 @@ fn packedStore(self: *CodeGen, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) .{ ._l, .sh }, limb_ty, tmp_mcv, - Type.u8, + .u8, .{ .immediate = src_bit_off }, ), 1 => try self.genShiftBinOpMir( .{ ._r, .sh }, limb_ty, tmp_mcv, - Type.u8, + .u8, .{ .immediate = limb_abi_bits - src_bit_off }, ), else => unreachable, @@ -9355,7 +9325,10 @@ fn airStructFieldVal(self: *CodeGen, inst: Air.Inst.Index) !void { const src_mcv = try self.resolveInst(operand); const field_off: u32 = switch (container_ty.containerLayout(zcu)) { .auto, .@"extern" => @intCast(container_ty.structFieldOffset(extra.field_index, zcu) * 8), - .@"packed" => if (zcu.typeToStruct(container_ty)) |struct_obj| pt.structPackedFieldBitOffset(struct_obj, extra.field_index) else 0, + .@"packed" => if (zcu.typeToStruct(container_ty)) |struct_obj| + pt.structPackedFieldBitOffset(struct_obj, extra.field_index) + else + 0, }; switch (src_mcv) { @@ -9370,20 +9343,14 @@ fn airStructFieldVal(self: *CodeGen, inst: Air.Inst.Index) !void { else if (field_off == 0) (try self.copyToRegisterWithInstTracking(inst, field_ty, src_mcv)).register else - try self.copyToTmpRegister(Type.usize, .{ .register = src_reg }); + try self.copyToTmpRegister(.usize, .{ .register = src_reg }); const dst_mcv: MCValue = .{ .register = dst_reg }; const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); if (field_off > 0) { try self.spillEflagsIfOccupied(); - try self.genShiftBinOpMir( - .{ ._r, .sh }, - Type.usize, - dst_mcv, - Type.u8, - .{ .immediate = field_off }, - ); + try self.genShiftBinOpMir(.{ ._r, .sh }, .usize, dst_mcv, .u8, .{ .immediate = field_off }); } if (abi.RegisterClass.gp.isSet(RegisterManager.indexOfRegIntoTracked(dst_reg).?) and container_ty.abiSize(zcu) * 8 > field_ty.bitSize(zcu)) @@ -9421,13 +9388,7 @@ fn airStructFieldVal(self: *CodeGen, inst: Air.Inst.Index) !void { if (field_off > 0) { try self.spillEflagsIfOccupied(); - try self.genShiftBinOpMir( - .{ ._r, .sh }, - Type.u128, - dst_mcv, - Type.u8, - .{ .immediate = field_off }, - ); + try self.genShiftBinOpMir(.{ ._r, .sh }, .u128, dst_mcv, .u8, .{ .immediate = field_off }); } if (field_bit_size <= 64) { @@ -9451,20 +9412,14 @@ fn airStructFieldVal(self: *CodeGen, inst: Air.Inst.Index) !void { try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv); }; - const dst_reg = try self.copyToTmpRegister(Type.usize, .{ .register = src_reg }); + const dst_reg = try self.copyToTmpRegister(.usize, .{ .register = src_reg }); const dst_mcv = MCValue{ .register = dst_reg }; const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); if (field_off % 64 > 0) { try self.spillEflagsIfOccupied(); - try self.genShiftBinOpMir( - .{ ._r, .sh }, - Type.usize, - dst_mcv, - Type.u8, - .{ .immediate = field_off % 64 }, - ); + try self.genShiftBinOpMir(.{ ._r, .sh }, .usize, dst_mcv, .u8, .{ .immediate = field_off % 64 }); } if (self.regExtraBits(field_ty) > 0) try self.truncateRegister(field_ty, dst_reg); @@ -9479,11 +9434,7 @@ fn airStructFieldVal(self: *CodeGen, inst: Air.Inst.Index) !void { 0 => if (self.reuseOperand(inst, extra.struct_operand, 0, src_mcv)) { self.eflags_inst = null; // actually stop tracking the overflow part break :result .{ .register = ro.reg }; - } else break :result try self.copyToRegisterWithInstTracking( - inst, - Type.usize, - .{ .register = ro.reg }, - ), + } else break :result try self.copyToRegisterWithInstTracking(inst, .usize, .{ .register = ro.reg }), // Get overflow bit. 1 => if (self.reuseOperandAdvanced(inst, extra.struct_operand, 0, src_mcv, null)) { self.eflags_inst = inst; // actually keep tracking the overflow part @@ -9541,9 +9492,9 @@ fn airStructFieldVal(self: *CodeGen, inst: Air.Inst.Index) !void { const hi_mcv = dst_mcv.address().offset(@intCast(field_bit_size / 64 * 8)).deref(); - try self.genSetReg(tmp_reg, Type.usize, hi_mcv, .{}); + try self.genSetReg(tmp_reg, .usize, hi_mcv, .{}); try self.truncateRegister(field_ty, tmp_reg); - try self.genCopy(Type.usize, hi_mcv, .{ .register = tmp_reg }, .{}); + try self.genCopy(.usize, hi_mcv, .{ .register = tmp_reg }, .{}); } break :result dst_mcv; } @@ -9684,8 +9635,8 @@ fn genUnOp(self: *CodeGen, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_a switch (tag) { .not => { const limb_abi_size: u16 = @min(abi_size, 8); - const int_info = if (src_ty.ip_index == .bool_type) - std.builtin.Type.Int{ .signedness = .unsigned, .bits = 1 } + const int_info: InternPool.Key.IntType = if (src_ty.ip_index == .bool_type) + .{ .signedness = .unsigned, .bits = 1 } else src_ty.intInfo(zcu); var byte_off: i32 = 0; @@ -9718,9 +9669,9 @@ fn genUnOp(self: *CodeGen, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_a defer self.register_manager.unlockReg(tmp_lock); const hi_mcv = dst_mcv.address().offset(@intCast(bit_size / 64 * 8)).deref(); - try self.genSetReg(tmp_reg, Type.usize, hi_mcv, .{}); + try self.genSetReg(tmp_reg, .usize, hi_mcv, .{}); try self.truncateRegister(src_ty, tmp_reg); - try self.genCopy(Type.usize, hi_mcv, .{ .register = tmp_reg }, .{}); + try self.genCopy(.usize, hi_mcv, .{ .register = tmp_reg }, .{}); } } }, @@ -9759,7 +9710,7 @@ fn genUnOpMir(self: *CodeGen, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv: const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg); defer self.register_manager.unlockReg(addr_reg_lock); - try self.genSetReg(addr_reg, Type.usize, dst_mcv.address(), .{}); + try self.genSetReg(addr_reg, .usize, dst_mcv.address(), .{}); try self.asmMemory(mir_tag, .{ .base = .{ .reg = addr_reg }, .mod = .{ .rm = .{ .size = .fromSize(abi_size), } } }); @@ -10495,7 +10446,7 @@ fn genMulDivBinOp( const mat_lhs_mcv = switch (lhs_mcv) { .load_symbol => mat_lhs_mcv: { // TODO clean this up! - const addr_reg = try self.copyToTmpRegister(Type.usize, lhs_mcv.address()); + const addr_reg = try self.copyToTmpRegister(.usize, lhs_mcv.address()); break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; }, else => lhs_mcv, @@ -10508,7 +10459,7 @@ fn genMulDivBinOp( const mat_rhs_mcv = switch (rhs_mcv) { .load_symbol => mat_rhs_mcv: { // TODO clean this up! - const addr_reg = try self.copyToTmpRegister(Type.usize, rhs_mcv.address()); + const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address()); break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; }, else => rhs_mcv, @@ -10696,7 +10647,7 @@ fn genMulDivBinOp( manyptr_u32_ty, manyptr_const_u32_ty, manyptr_const_u32_ty, - Type.usize, + .usize, }, &.{ dst_mcv.address(), lhs_mcv.address(), @@ -12493,7 +12444,7 @@ fn genBinOp( try not_mcv.mem(self, .{ .size = .fromSize(abi_size) }) else .{ .base = .{ - .reg = try self.copyToTmpRegister(Type.usize, not_mcv.address()), + .reg = try self.copyToTmpRegister(.usize, not_mcv.address()), }, .mod = .{ .rm = .{ .size = .fromSize(abi_size) } } }; switch (mir_tag[0]) { .vp_b, .vp_d, .vp_q, .vp_w => try self.asmRegisterRegisterMemory( @@ -12757,7 +12708,7 @@ fn genBinOpMir( const dst_addr_lock = self.register_manager.lockRegAssumeUnused(dst_addr_reg); errdefer self.register_manager.unlockReg(dst_addr_lock); - try self.genSetReg(dst_addr_reg, Type.usize, dst_mcv.address(), .{}); + try self.genSetReg(dst_addr_reg, .usize, dst_mcv.address(), .{}); break :dst .{ .addr_reg = dst_addr_reg, .addr_lock = dst_addr_lock }; }, .load_frame => null, @@ -12808,7 +12759,7 @@ fn genBinOpMir( const src_addr_lock = self.register_manager.lockRegAssumeUnused(src_addr_reg); errdefer self.register_manager.unlockReg(src_addr_lock); - try self.genSetReg(src_addr_reg, Type.usize, resolved_src_mcv.address(), .{}); + try self.genSetReg(src_addr_reg, .usize, resolved_src_mcv.address(), .{}); break :src .{ .addr_reg = src_addr_reg, .addr_lock = src_addr_lock }; }, }; @@ -12816,9 +12767,9 @@ fn genBinOpMir( const ty_signedness = if (ty.isAbiInt(zcu)) ty.intInfo(zcu).signedness else .unsigned; - const limb_ty = if (abi_size <= 8) ty else switch (ty_signedness) { - .signed => Type.usize, - .unsigned => Type.isize, + const limb_ty: Type = if (abi_size <= 8) ty else switch (ty_signedness) { + .signed => .usize, + .unsigned => .isize, }; var limb_i: usize = 0; var off: i32 = 0; @@ -13310,13 +13261,13 @@ fn genLocalDebugInfo( fn airRetAddr(self: *CodeGen, inst: Air.Inst.Index) !void { const dst_mcv = try self.allocRegOrMem(inst, true); - try self.genCopy(Type.usize, dst_mcv, .{ .load_frame = .{ .index = .ret_addr } }, .{}); + try self.genCopy(.usize, dst_mcv, .{ .load_frame = .{ .index = .ret_addr } }, .{}); return self.finishAir(inst, dst_mcv, .{ .none, .none, .none }); } fn airFrameAddress(self: *CodeGen, inst: Air.Inst.Index) !void { const dst_mcv = try self.allocRegOrMem(inst, true); - try self.genCopy(Type.usize, dst_mcv, .{ .lea_frame = .{ .index = .base_ptr } }, .{}); + try self.genCopy(.usize, dst_mcv, .{ .lea_frame = .{ .index = .base_ptr } }, .{}); return self.finishAir(inst, dst_mcv, .{ .none, .none, .none }); } @@ -13461,13 +13412,10 @@ fn genCall(self: *CodeGen, info: union(enum) { defer self.register_manager.unlockReg(index_lock); const src_mem: Memory = if (src_arg.isBase()) try src_arg.mem(self, .{ .size = .dword }) else .{ - .base = .{ .reg = try self.copyToTmpRegister( - Type.usize, - switch (src_arg) { - else => src_arg, - .air_ref => |src_ref| try self.resolveInst(src_ref), - }.address(), - ) }, + .base = .{ .reg = try self.copyToTmpRegister(.usize, switch (src_arg) { + else => src_arg, + .air_ref => |src_ref| try self.resolveInst(src_ref), + }.address()) }, .mod = .{ .rm = .{ .size = .dword } }, }; const src_lock = switch (src_mem.base) { @@ -13519,7 +13467,7 @@ fn genCall(self: *CodeGen, info: union(enum) { .indirect => |reg_off| { const ret_ty: Type = .fromInterned(fn_info.return_type); const frame_index = try self.allocFrameIndex(.initSpill(ret_ty, zcu)); - try self.genSetReg(reg_off.reg, Type.usize, .{ + try self.genSetReg(reg_off.reg, .usize, .{ .lea_frame = .{ .index = frame_index, .off = -reg_off.off }, }, .{}); call_info.return_value.short = .{ .load_frame = .{ .index = frame_index } }; @@ -13548,13 +13496,13 @@ fn genCall(self: *CodeGen, info: union(enum) { }, }, .register_pair => try self.genCopy(arg_ty, dst_arg, src_arg, .{}), - .indirect => |reg_off| try self.genSetReg(reg_off.reg, Type.usize, .{ + .indirect => |reg_off| try self.genSetReg(reg_off.reg, .usize, .{ .lea_frame = .{ .index = frame_index, .off = -reg_off.off }, }, .{}), .elementwise_regs_then_frame => |regs_frame_addr| { const src_mem: Memory = if (src_arg.isBase()) try src_arg.mem(self, .{ .size = .dword }) else .{ .base = .{ .reg = try self.copyToTmpRegister( - Type.usize, + .usize, switch (src_arg) { else => src_arg, .air_ref => |src_ref| try self.resolveInst(src_ref), @@ -13604,7 +13552,7 @@ fn genCall(self: *CodeGen, info: union(enum) { } else if (self.bin_file.cast(.coff)) |coff_file| { const atom = try coff_file.getOrCreateAtomForNav(func.owner_nav); const sym_index = coff_file.getAtom(atom).getSymbolIndex().?; - try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index }, .{}); + try self.genSetReg(.rax, .usize, .{ .lea_got = sym_index }, .{}); try self.asmRegister(.{ ._, .call }, .rax); } else if (self.bin_file.cast(.macho)) |macho_file| { const zo = macho_file.getZigObject().?; @@ -13644,7 +13592,7 @@ fn genCall(self: *CodeGen, info: union(enum) { } } else { assert(self.typeOf(callee).zigTypeTag(zcu) == .pointer); - try self.genSetReg(.rax, Type.usize, .{ .air_ref = callee }, .{}); + try self.genSetReg(.rax, .usize, .{ .air_ref = callee }, .{}); try self.asmRegister(.{ ._, .call }, .rax); }, .lib => |lib| if (self.bin_file.cast(.elf)) |elf_file| { @@ -13676,7 +13624,7 @@ fn airRet(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void { const lock = self.register_manager.lockRegAssumeUnused(reg_off.reg); defer self.register_manager.unlockReg(lock); - try self.genSetReg(reg_off.reg, Type.usize, self.ret_mcv.long, .{}); + try self.genSetReg(reg_off.reg, .usize, self.ret_mcv.long, .{}); try self.genSetMem( .{ .reg = reg_off.reg }, reg_off.off, @@ -13775,7 +13723,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v floatCompilerRtAbiName(float_bits), }) catch unreachable, } }, &.{ ty, ty }, &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs } }); - try self.genBinOpMir(.{ ._, .@"test" }, Type.i32, ret, ret); + try self.genBinOpMir(.{ ._, .@"test" }, .i32, ret, ret); break :result switch (op) { .eq => .e, .neq => .ne, @@ -13938,12 +13886,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v self.register_manager.lockRegAssumeUnused(dst_addr_reg); errdefer self.register_manager.unlockReg(dst_addr_lock); - try self.genSetReg( - dst_addr_reg, - Type.usize, - resolved_dst_mcv.address(), - .{}, - ); + try self.genSetReg(dst_addr_reg, .usize, resolved_dst_mcv.address(), .{}); break :dst .{ .addr_reg = dst_addr_reg, .addr_lock = dst_addr_lock, @@ -14000,12 +13943,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v self.register_manager.lockRegAssumeUnused(src_addr_reg); errdefer self.register_manager.unlockReg(src_addr_lock); - try self.genSetReg( - src_addr_reg, - Type.usize, - resolved_src_mcv.address(), - .{}, - ); + try self.genSetReg(src_addr_reg, .usize, resolved_src_mcv.address(), .{}); break :src .{ .addr_reg = src_addr_reg, .addr_lock = src_addr_lock, @@ -14026,7 +13964,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v const off = limb_i * 8; const tmp_reg = regs[@min(limb_i, 1)].to64(); - try self.genSetReg(tmp_reg, Type.usize, if (dst_info) |info| .{ + try self.genSetReg(tmp_reg, .usize, if (dst_info) |info| .{ .indirect = .{ .reg = info.addr_reg, .off = off }, } else switch (resolved_dst_mcv) { inline .register_pair, @@ -14049,7 +13987,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v try self.genBinOpMir( .{ ._, .xor }, - Type.usize, + .usize, .{ .register = tmp_reg }, if (src_info) |info| .{ .indirect = .{ .reg = info.addr_reg, .off = off }, @@ -14244,7 +14182,7 @@ fn genTry( else try self.isErr(null, operand_ty, operand_mcv); - const reloc = try self.genCondBrMir(Type.anyerror, is_err_mcv); + const reloc = try self.genCondBrMir(.anyerror, is_err_mcv); if (self.liveness.operandDies(inst, 0)) { if (operand.toIndex()) |operand_inst| try self.processDeath(operand_inst); @@ -14406,7 +14344,7 @@ fn isNull(self: *CodeGen, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) const some_info: struct { off: u31, ty: Type } = if (opt_ty.optionalReprIsPayload(zcu)) .{ .off = 0, .ty = if (pl_ty.isSlice(zcu)) pl_ty.slicePtrFieldType(zcu) else pl_ty } else - .{ .off = @intCast(pl_ty.abiSize(zcu)), .ty = Type.bool }; + .{ .off = @intCast(pl_ty.abiSize(zcu)), .ty = .bool }; self.eflags_inst = inst; switch (opt_mcv) { @@ -14481,7 +14419,7 @@ fn isNull(self: *CodeGen, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg); defer self.register_manager.unlockReg(addr_reg_lock); - try self.genSetReg(addr_reg, Type.usize, opt_mcv.address(), .{}); + try self.genSetReg(addr_reg, .usize, opt_mcv.address(), .{}); const some_abi_size: u32 = @intCast(some_info.ty.abiSize(zcu)); try self.asmMemoryImmediate( .{ ._, .cmp }, @@ -14536,7 +14474,7 @@ fn isNullPtr(self: *CodeGen, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValu const some_info: struct { off: i32, ty: Type } = if (opt_ty.optionalReprIsPayload(zcu)) .{ .off = 0, .ty = if (pl_ty.isSlice(zcu)) pl_ty.slicePtrFieldType(zcu) else pl_ty } else - .{ .off = @intCast(pl_ty.abiSize(zcu)), .ty = Type.bool }; + .{ .off = @intCast(pl_ty.abiSize(zcu)), .ty = .bool }; const ptr_reg = switch (ptr_mcv) { .register => |reg| reg, @@ -14582,22 +14520,17 @@ fn isErr(self: *CodeGen, maybe_inst: ?Air.Inst.Index, eu_ty: Type, eu_mcv: MCVal .{ ._r, .sh }, eu_ty, .{ .register = tmp_reg }, - Type.u8, + .u8, .{ .immediate = @as(u6, @intCast(err_off * 8)) }, ); } else { - try self.truncateRegister(Type.anyerror, tmp_reg); + try self.truncateRegister(.anyerror, tmp_reg); } - try self.genBinOpMir( - .{ ._, .cmp }, - Type.anyerror, - .{ .register = tmp_reg }, - .{ .immediate = 0 }, - ); + try self.genBinOpMir(.{ ._, .cmp }, .anyerror, .{ .register = tmp_reg }, .{ .immediate = 0 }); }, .load_frame => |frame_addr| try self.genBinOpMir( .{ ._, .cmp }, - Type.anyerror, + .anyerror, .{ .load_frame = .{ .index = frame_addr.index, .off = frame_addr.off + err_off, @@ -14633,7 +14566,7 @@ fn isErrPtr(self: *CodeGen, maybe_inst: ?Air.Inst.Index, ptr_ty: Type, ptr_mcv: .{ .base = .{ .reg = ptr_reg }, .mod = .{ .rm = .{ - .size = self.memSize(Type.anyerror), + .size = self.memSize(.anyerror), .disp = err_off, } }, }, @@ -15200,7 +15133,7 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void { .register, .register_offset, .lea_frame => break :arg ptr_mcv.deref(), else => {}, } - break :arg .{ .indirect = .{ .reg = try self.copyToTmpRegister(Type.usize, ptr_mcv) } }; + break :arg .{ .indirect = .{ .reg = try self.copyToTmpRegister(.usize, ptr_mcv) } }; }; }; if (arg_mcv.getReg()) |reg| if (RegisterManager.indexOfRegIntoTracked(reg)) |_| { @@ -15262,7 +15195,7 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void { try self.genCopy(ty, temp_mcv, input_mcv, .{}); break :arg temp_mcv; }; - try self.genSetReg(addr_reg, Type.usize, input_mcv.address(), .{}); + try self.genSetReg(addr_reg, .usize, input_mcv.address(), .{}); break :arg .{ .indirect = .{ .reg = addr_reg } }; } else if (std.mem.eql(u8, constraint, "g") or std.mem.eql(u8, constraint, "rm") or std.mem.eql(u8, constraint, "mr") or @@ -15539,11 +15472,11 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void { else return self.fail("invalid modifier: '{s}'", .{modifier}), .lea_got => |sym_index| if (std.mem.eql(u8, modifier, "P")) - .{ .reg = try self.copyToTmpRegister(Type.usize, .{ .lea_got = sym_index }) } + .{ .reg = try self.copyToTmpRegister(.usize, .{ .lea_got = sym_index }) } else return self.fail("invalid modifier: '{s}'", .{modifier}), .lea_symbol => |sym_off| if (std.mem.eql(u8, modifier, "P")) - .{ .reg = try self.copyToTmpRegister(Type.usize, .{ .lea_symbol = sym_off }) } + .{ .reg = try self.copyToTmpRegister(.usize, .{ .lea_symbol = sym_off }) } else return self.fail("invalid modifier: '{s}'", .{modifier}), else => return self.fail("invalid constraint: '{s}'", .{op_str}), @@ -16099,7 +16032,7 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C const src_addr_lock = self.register_manager.lockRegAssumeUnused(src_addr_reg); errdefer self.register_manager.unlockReg(src_addr_lock); - try self.genSetReg(src_addr_reg, Type.usize, src_mcv.address(), opts); + try self.genSetReg(src_addr_reg, .usize, src_mcv.address(), opts); break :src .{ .addr_reg = src_addr_reg, .addr_lock = src_addr_lock }; }, .air_ref => |src_ref| return self.genCopy( @@ -16146,7 +16079,7 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C else => unreachable, } - const addr_reg = try self.copyToTmpRegister(Type.usize, dst_mcv.address()); + const addr_reg = try self.copyToTmpRegister(.usize, dst_mcv.address()); const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); defer self.register_manager.unlockReg(addr_lock); @@ -16445,7 +16378,7 @@ fn genSetReg( else => unreachable, } - const addr_reg = try self.copyToTmpRegister(Type.usize, src_mcv.address()); + const addr_reg = try self.copyToTmpRegister(.usize, src_mcv.address()); const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); defer self.register_manager.unlockReg(addr_lock); @@ -16656,7 +16589,7 @@ fn genSetMem( try self.genSetMem( base, disp + @as(i32, @intCast(child_ty.abiSize(zcu))), - Type.bool, + .bool, .{ .eflags = ro.eflags }, opts, ); @@ -16732,9 +16665,9 @@ fn genSetMem( fn genInlineMemcpy(self: *CodeGen, dst_ptr: MCValue, src_ptr: MCValue, len: MCValue) InnerError!void { try self.spillRegisters(&.{ .rsi, .rdi, .rcx }); - try self.genSetReg(.rsi, Type.usize, src_ptr, .{}); - try self.genSetReg(.rdi, Type.usize, dst_ptr, .{}); - try self.genSetReg(.rcx, Type.usize, len, .{}); + try self.genSetReg(.rsi, .usize, src_ptr, .{}); + try self.genSetReg(.rdi, .usize, dst_ptr, .{}); + try self.genSetReg(.rcx, .usize, len, .{}); try self.asmOpOnly(.{ .@"rep _sb", .mov }); } @@ -16746,9 +16679,9 @@ fn genInlineMemset( opts: CopyOptions, ) InnerError!void { try self.spillRegisters(&.{ .rdi, .al, .rcx }); - try self.genSetReg(.rdi, Type.usize, dst_ptr, .{}); - try self.genSetReg(.al, Type.u8, value, opts); - try self.genSetReg(.rcx, Type.usize, len, .{}); + try self.genSetReg(.rdi, .usize, dst_ptr, .{}); + try self.genSetReg(.al, .u8, value, opts); + try self.genSetReg(.rcx, .usize, len, .{}); try self.asmOpOnly(.{ .@"rep _sb", .sto }); } @@ -16791,10 +16724,10 @@ fn genLazySymbolRef( return self.fail("{s} creating lazy symbol", .{@errorName(err)}); if (self.mod.pic) { switch (tag) { - .lea, .call => try self.genSetReg(reg, Type.usize, .{ + .lea, .call => try self.genSetReg(reg, .usize, .{ .lea_symbol = .{ .sym_index = sym_index }, }, .{}), - .mov => try self.genSetReg(reg, Type.usize, .{ + .mov => try self.genSetReg(reg, .usize, .{ .load_symbol = .{ .sym_index = sym_index }, }, .{}), else => unreachable, @@ -16844,8 +16777,8 @@ fn genLazySymbolRef( return self.fail("{s} creating lazy symbol", .{@errorName(err)}); const sym_index = coff_file.getAtom(atom_index).getSymbolIndex().?; switch (tag) { - .lea, .call => try self.genSetReg(reg, Type.usize, .{ .lea_got = sym_index }, .{}), - .mov => try self.genSetReg(reg, Type.usize, .{ .load_got = sym_index }, .{}), + .lea, .call => try self.genSetReg(reg, .usize, .{ .lea_got = sym_index }, .{}), + .mov => try self.genSetReg(reg, .usize, .{ .load_got = sym_index }, .{}), else => unreachable, } switch (tag) { @@ -16859,10 +16792,10 @@ fn genLazySymbolRef( return self.fail("{s} creating lazy symbol", .{@errorName(err)}); const sym = zo.symbols.items[sym_index]; switch (tag) { - .lea, .call => try self.genSetReg(reg, Type.usize, .{ + .lea, .call => try self.genSetReg(reg, .usize, .{ .lea_symbol = .{ .sym_index = sym.nlist_idx }, }, .{}), - .mov => try self.genSetReg(reg, Type.usize, .{ + .mov => try self.genSetReg(reg, .usize, .{ .load_symbol = .{ .sym_index = sym.nlist_idx }, }, .{}), else => unreachable, @@ -16932,7 +16865,7 @@ fn airBitCast(self: *CodeGen, inst: Air.Inst.Index) !void { const bit_size = dst_ty.bitSize(zcu); if (abi_size * 8 <= bit_size or dst_ty.isVector(zcu)) break :result dst_mcv; - const dst_limbs_len = std.math.divCeil(i32, @intCast(bit_size), 64) catch unreachable; + const dst_limbs_len = std.math.divCeil(u31, @intCast(bit_size), 64) catch unreachable; const high_mcv: MCValue = switch (dst_mcv) { .register => |dst_reg| .{ .register = dst_reg }, .register_pair => |dst_regs| .{ .register = dst_regs[1] }, @@ -16941,17 +16874,43 @@ fn airBitCast(self: *CodeGen, inst: Air.Inst.Index) !void { const high_reg = if (high_mcv.isRegister()) high_mcv.getReg().? else - try self.copyToTmpRegister(Type.usize, high_mcv); + try self.copyToTmpRegister(.usize, high_mcv); const high_lock = self.register_manager.lockReg(high_reg); defer if (high_lock) |lock| self.register_manager.unlockReg(lock); - try self.truncateRegister(dst_ty, high_reg); if (!high_mcv.isRegister()) try self.genCopy( - if (abi_size <= 8) dst_ty else Type.usize, + if (abi_size <= 8) dst_ty else .usize, high_mcv, .{ .register = high_reg }, .{}, ); + var offset = dst_limbs_len * 8; + if (offset < abi_size) { + const dst_signedness: std.builtin.Signedness = if (dst_ty.isAbiInt(zcu)) + dst_ty.intInfo(zcu).signedness + else + .unsigned; + const ext_mcv: MCValue = ext_mcv: switch (dst_signedness) { + .signed => { + try self.asmRegisterImmediate(.{ ._r, .sa }, high_reg, .u(63)); + break :ext_mcv .{ .register = high_reg }; + }, + .unsigned => .{ .immediate = 0 }, + }; + while (offset < abi_size) : (offset += 8) { + const limb_mcv: MCValue = switch (dst_mcv) { + .register => |dst_reg| .{ .register = dst_reg }, + .register_pair => |dst_regs| .{ .register = dst_regs[@divExact(offset, 8)] }, + else => dst_mcv.address().offset(offset).deref(), + }; + const limb_lock = if (limb_mcv.isRegister()) + self.register_manager.lockReg(limb_mcv.getReg().?) + else + null; + defer if (limb_lock) |lock| self.register_manager.unlockReg(lock); + try self.genCopy(.usize, limb_mcv, ext_mcv, .{}); + } + } break :result dst_mcv; }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); @@ -16973,7 +16932,7 @@ fn airArrayToSlice(self: *CodeGen, inst: Air.Inst.Index) !void { try self.genSetMem( .{ .frame = frame_index }, @intCast(ptr_ty.abiSize(zcu)), - Type.usize, + .usize, .{ .immediate = array_len }, .{}, ); @@ -17151,28 +17110,28 @@ fn airCmpxchg(self: *CodeGen, inst: Air.Inst.Index) !void { if (val_abi_size > 8) { const exp_addr_mcv: MCValue = switch (exp_mcv) { .memory, .indirect, .load_frame => exp_mcv.address(), - else => .{ .register = try self.copyToTmpRegister(Type.usize, exp_mcv.address()) }, + else => .{ .register = try self.copyToTmpRegister(.usize, exp_mcv.address()) }, }; const exp_addr_lock = if (exp_addr_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; defer if (exp_addr_lock) |lock| self.register_manager.unlockReg(lock); - try self.genSetReg(.rax, Type.usize, exp_addr_mcv.deref(), .{}); - try self.genSetReg(.rdx, Type.usize, exp_addr_mcv.offset(8).deref(), .{}); + try self.genSetReg(.rax, .usize, exp_addr_mcv.deref(), .{}); + try self.genSetReg(.rdx, .usize, exp_addr_mcv.offset(8).deref(), .{}); } else try self.genSetReg(.rax, val_ty, exp_mcv, .{}); const new_mcv = try self.resolveInst(extra.new_value); const new_reg = if (val_abi_size > 8) new: { const new_addr_mcv: MCValue = switch (new_mcv) { .memory, .indirect, .load_frame => new_mcv.address(), - else => .{ .register = try self.copyToTmpRegister(Type.usize, new_mcv.address()) }, + else => .{ .register = try self.copyToTmpRegister(.usize, new_mcv.address()) }, }; const new_addr_lock = if (new_addr_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; defer if (new_addr_lock) |lock| self.register_manager.unlockReg(lock); - try self.genSetReg(.rbx, Type.usize, new_addr_mcv.deref(), .{}); - try self.genSetReg(.rcx, Type.usize, new_addr_mcv.offset(8).deref(), .{}); + try self.genSetReg(.rbx, .usize, new_addr_mcv.deref(), .{}); + try self.genSetReg(.rcx, .usize, new_addr_mcv.offset(8).deref(), .{}); break :new null; } else try self.copyToTmpRegister(val_ty, new_mcv); const new_lock = if (new_reg) |reg| self.register_manager.lockRegAssumeUnused(reg) else null; @@ -17213,9 +17172,9 @@ fn airCmpxchg(self: *CodeGen, inst: Air.Inst.Index) !void { } const dst_mcv = try self.allocRegOrMem(inst, false); - try self.genCopy(Type.usize, dst_mcv, .{ .register = .rax }, .{}); - try self.genCopy(Type.usize, dst_mcv.address().offset(8).deref(), .{ .register = .rdx }, .{}); - try self.genCopy(Type.bool, dst_mcv.address().offset(16).deref(), .{ .eflags = .ne }, .{}); + try self.genCopy(.usize, dst_mcv, .{ .register = .rax }, .{}); + try self.genCopy(.usize, dst_mcv.address().offset(8).deref(), .{ .register = .rdx }, .{}); + try self.genCopy(.bool, dst_mcv.address().offset(16).deref(), .{ .eflags = .ne }, .{}); break :result dst_mcv; }; return self.finishAir(inst, result, .{ extra.ptr, extra.expected_value, extra.new_value }); @@ -17488,7 +17447,7 @@ fn atomicOp( const val_mem_mcv: MCValue = switch (val_mcv) { .memory, .indirect, .load_frame => val_mcv, else => .{ .indirect = .{ - .reg = try self.copyToTmpRegister(Type.usize, val_mcv.address()), + .reg = try self.copyToTmpRegister(.usize, val_mcv.address()), } }, }; const val_lo_mem = try val_mem_mcv.mem(self, .{ .size = .qword }); @@ -17545,7 +17504,7 @@ fn atomicOp( }, }; - const tmp_reg = try self.copyToTmpRegister(Type.usize, .{ .register = .rcx }); + const tmp_reg = try self.copyToTmpRegister(.usize, .{ .register = .rcx }); const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); @@ -17719,7 +17678,7 @@ fn airMemset(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void { const len_lock = self.register_manager.lockRegAssumeUnused(len_reg); defer self.register_manager.unlockReg(len_lock); - try self.genSetReg(len_reg, Type.usize, len, .{}); + try self.genSetReg(len_reg, .usize, len, .{}); try self.asmRegisterRegister(.{ ._, .@"test" }, len_reg, len_reg); const skip_reloc = try self.asmJccReloc(.z, undefined); @@ -17732,12 +17691,12 @@ fn airMemset(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void { self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg); defer self.register_manager.unlockReg(second_elem_ptr_lock); - try self.genSetReg(second_elem_ptr_reg, Type.usize, .{ .register_offset = .{ - .reg = try self.copyToTmpRegister(Type.usize, dst_ptr), + try self.genSetReg(second_elem_ptr_reg, .usize, .{ .register_offset = .{ + .reg = try self.copyToTmpRegister(.usize, dst_ptr), .off = elem_abi_size, } }, .{}); - try self.genBinOpMir(.{ ._, .sub }, Type.usize, len_mcv, .{ .immediate = 1 }); + try self.genBinOpMir(.{ ._, .sub }, .usize, len_mcv, .{ .immediate = 1 }); try self.asmRegisterRegisterImmediate( .{ .i_, .mul }, len_reg, @@ -17763,8 +17722,8 @@ fn airMemset(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void { self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg); defer self.register_manager.unlockReg(second_elem_ptr_lock); - try self.genSetReg(second_elem_ptr_reg, Type.usize, .{ .register_offset = .{ - .reg = try self.copyToTmpRegister(Type.usize, dst), + try self.genSetReg(second_elem_ptr_reg, .usize, .{ .register_offset = .{ + .reg = try self.copyToTmpRegister(.usize, dst), .off = elem_abi_size, } }, .{}); @@ -17886,7 +17845,7 @@ fn airTagName(self: *CodeGen, inst: Air.Inst.Index) !void { const param_regs = abi.getCAbiIntParamRegs(resolved_cc); const dst_mcv = try self.allocRegOrMem(inst, false); - try self.genSetReg(param_regs[0], Type.usize, dst_mcv.address(), .{}); + try self.genSetReg(param_regs[0], .usize, dst_mcv.address(), .{}); const operand = try self.resolveInst(un_op); try self.genSetReg(param_regs[1], enum_ty, operand, .{}); @@ -18415,7 +18374,7 @@ fn airSelect(self: *CodeGen, inst: Air.Inst.Index) !void { mask_alias, if (pred_mcv.isBase()) try pred_mcv.mem(self, .{ .size = .byte }) else .{ .base = .{ .reg = (try self.copyToTmpRegister( - Type.usize, + .usize, pred_mcv.address(), )).to64() }, .mod = .{ .rm = .{ .size = .byte } }, @@ -18478,7 +18437,7 @@ fn airSelect(self: *CodeGen, inst: Air.Inst.Index) !void { .storage = .{ .elems = mask_elems[0..vec_len] }, } }))); const mask_mem: Memory = .{ - .base = .{ .reg = try self.copyToTmpRegister(Type.usize, mask_mcv.address()) }, + .base = .{ .reg = try self.copyToTmpRegister(.usize, mask_mcv.address()) }, .mod = .{ .rm = .{ .size = self.memSize(ty) } }, }; if (has_avx) try self.asmRegisterRegisterMemory( @@ -18503,7 +18462,7 @@ fn airSelect(self: *CodeGen, inst: Air.Inst.Index) !void { .storage = .{ .elems = mask_elems[0..vec_len] }, } }))); const mask_mem: Memory = .{ - .base = .{ .reg = try self.copyToTmpRegister(Type.usize, mask_mcv.address()) }, + .base = .{ .reg = try self.copyToTmpRegister(.usize, mask_mcv.address()) }, .mod = .{ .rm = .{ .size = self.memSize(ty) } }, }; if (has_avx) { @@ -19380,7 +19339,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { .storage = .{ .elems = lhs_mask_elems[0..max_abi_size] }, } }))); const lhs_mask_mem: Memory = .{ - .base = .{ .reg = try self.copyToTmpRegister(Type.usize, lhs_mask_mcv.address()) }, + .base = .{ .reg = try self.copyToTmpRegister(.usize, lhs_mask_mcv.address()) }, .mod = .{ .rm = .{ .size = .fromSize(@max(max_abi_size, 16)) } }, }; if (has_avx) try self.asmRegisterRegisterMemory( @@ -19414,7 +19373,7 @@ fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void { .storage = .{ .elems = rhs_mask_elems[0..max_abi_size] }, } }))); const rhs_mask_mem: Memory = .{ - .base = .{ .reg = try self.copyToTmpRegister(Type.usize, rhs_mask_mcv.address()) }, + .base = .{ .reg = try self.copyToTmpRegister(.usize, rhs_mask_mcv.address()) }, .mod = .{ .rm = .{ .size = .fromSize(@max(max_abi_size, 16)) } }, }; if (has_avx) try self.asmRegisterRegisterMemory( @@ -19634,7 +19593,7 @@ fn airAggregateInit(self: *CodeGen, inst: Air.Inst.Index) !void { .{ ._l, .sh }, elem_ty, .{ .register = temp_alias }, - Type.u8, + .u8, .{ .immediate = elem_bit_off }, ); try self.genBinOpMir( @@ -19657,7 +19616,7 @@ fn airAggregateInit(self: *CodeGen, inst: Air.Inst.Index) !void { .{ ._r, .sh }, elem_ty, .{ .register = temp_reg }, - Type.u8, + .u8, .{ .immediate = elem_abi_bits - elem_bit_off }, ); try self.genBinOpMir( @@ -19984,7 +19943,7 @@ fn airVaStart(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; const va_list_ty = self.air.instructions.items(.data)[@intFromEnum(inst)].ty; - const ptr_anyopaque_ty = try pt.singleMutPtrType(Type.anyopaque); + const ptr_anyopaque_ty = try pt.singleMutPtrType(.anyopaque); const result: MCValue = switch (abi.resolveCallingConvention( self.fn_type.fnCallingConvention(zcu), @@ -19998,7 +19957,7 @@ fn airVaStart(self: *CodeGen, inst: Air.Inst.Index) !void { try self.genSetMem( .{ .frame = dst_fi }, field_off, - Type.c_uint, + .c_uint, .{ .immediate = info.gp_count * 8 }, .{}, ); @@ -20007,7 +19966,7 @@ fn airVaStart(self: *CodeGen, inst: Air.Inst.Index) !void { try self.genSetMem( .{ .frame = dst_fi }, field_off, - Type.c_uint, + .c_uint, .{ .immediate = abi.SysV.c_abi_int_param_regs.len * 8 + info.fp_count * 16 }, .{}, ); @@ -20044,7 +20003,7 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; const ty = self.typeOfIndex(inst); const promote_ty = self.promoteVarArg(ty); - const ptr_anyopaque_ty = try pt.singleMutPtrType(Type.anyopaque); + const ptr_anyopaque_ty = try pt.singleMutPtrType(.anyopaque); const unused = self.liveness.isUnused(inst); const result: MCValue = switch (abi.resolveCallingConvention( @@ -20083,7 +20042,7 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void { .integer => { assert(classes.len == 1); - try self.genSetReg(offset_reg, Type.c_uint, gp_offset, .{}); + try self.genSetReg(offset_reg, .c_uint, gp_offset, .{}); try self.asmRegisterImmediate(.{ ._, .cmp }, offset_reg, .u( abi.SysV.c_abi_int_param_regs.len * 8, )); @@ -20104,7 +20063,7 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void { .disp = 8, } }, }); - try self.genCopy(Type.c_uint, gp_offset, .{ .register = offset_reg }, .{}); + try self.genCopy(.c_uint, gp_offset, .{ .register = offset_reg }, .{}); const done_reloc = try self.asmJmpReloc(undefined); self.performReloc(mem_reloc); @@ -20131,7 +20090,7 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void { .sse => { assert(classes.len == 1); - try self.genSetReg(offset_reg, Type.c_uint, fp_offset, .{}); + try self.genSetReg(offset_reg, .c_uint, fp_offset, .{}); try self.asmRegisterImmediate(.{ ._, .cmp }, offset_reg, .u( abi.SysV.c_abi_int_param_regs.len * 8 + abi.SysV.c_abi_sse_param_regs.len * 16, )); @@ -20152,7 +20111,7 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void { .disp = 16, } }, }); - try self.genCopy(Type.c_uint, fp_offset, .{ .register = offset_reg }, .{}); + try self.genCopy(.c_uint, fp_offset, .{ .register = offset_reg }, .{}); const done_reloc = try self.asmJmpReloc(undefined); self.performReloc(mem_reloc); @@ -20277,7 +20236,7 @@ fn resolveInst(self: *CodeGen, ref: Air.Inst.Ref) InnerError!MCValue { try self.genSetMem( .{ .frame = frame_index }, 0, - Type.usize, + .usize, .{ .lea_symbol = .{ .sym_index = tlv_sym } }, .{}, ); @@ -20318,7 +20277,7 @@ fn limitImmediateType(self: *CodeGen, operand: Air.Inst.Ref, comptime T: type) ! // This immediate is unsigned. const U = std.meta.Int(.unsigned, ti.bits - @intFromBool(ti.signedness == .signed)); if (imm >= std.math.maxInt(U)) { - return MCValue{ .register = try self.copyToTmpRegister(Type.usize, mcv) }; + return MCValue{ .register = try self.copyToTmpRegister(.usize, mcv) }; } }, else => {}, @@ -20764,7 +20723,7 @@ fn splitType(self: *CodeGen, comptime parts_len: usize, ty: Type) ![parts_len]Ty if (classes.len == parts_len) for (&parts, classes, 0..) |*part, class, part_i| { part.* = switch (class) { .integer => if (part_i < parts_len - 1) - Type.u64 + .u64 else part: { const elem_size = ty.abiAlignment(zcu).minStrict(.@"8").toByteUnits().?; const elem_ty = try pt.intType(.unsigned, @intCast(elem_size * 8)); @@ -20773,9 +20732,9 @@ fn splitType(self: *CodeGen, comptime parts_len: usize, ty: Type) ![parts_len]Ty else => |array_len| try pt.arrayType(.{ .len = array_len, .child = elem_ty.toIntern() }), }; }, - .float => Type.f32, + .float => .f32, .float_combine => try pt.arrayType(.{ .len = 2, .child = .f32_type }), - .sse => Type.f64, + .sse => .f64, else => break, }; } else { @@ -20791,7 +20750,7 @@ fn splitType(self: *CodeGen, comptime parts_len: usize, ty: Type) ![parts_len]Ty fn truncateRegister(self: *CodeGen, ty: Type, reg: Register) !void { const pt = self.pt; const zcu = pt.zcu; - const int_info = if (ty.isAbiInt(zcu)) ty.intInfo(zcu) else std.builtin.Type.Int{ + const int_info: InternPool.Key.IntType = if (ty.isAbiInt(zcu)) ty.intInfo(zcu) else .{ .signedness = .unsigned, .bits = @intCast(ty.bitSize(zcu)), }; @@ -20799,38 +20758,16 @@ fn truncateRegister(self: *CodeGen, ty: Type, reg: Register) !void { try self.spillEflagsIfOccupied(); switch (int_info.signedness) { .signed => { - try self.genShiftBinOpMir( - .{ ._l, .sa }, - Type.isize, - .{ .register = reg }, - Type.u8, - .{ .immediate = shift }, - ); - try self.genShiftBinOpMir( - .{ ._r, .sa }, - Type.isize, - .{ .register = reg }, - Type.u8, - .{ .immediate = shift }, - ); + try self.genShiftBinOpMir(.{ ._l, .sa }, .isize, .{ .register = reg }, .u8, .{ .immediate = shift }); + try self.genShiftBinOpMir(.{ ._r, .sa }, .isize, .{ .register = reg }, .u8, .{ .immediate = shift }); }, .unsigned => { const mask = ~@as(u64, 0) >> shift; if (int_info.bits <= 32) { - try self.genBinOpMir( - .{ ._, .@"and" }, - Type.u32, - .{ .register = reg }, - .{ .immediate = mask }, - ); + try self.genBinOpMir(.{ ._, .@"and" }, .u32, .{ .register = reg }, .{ .immediate = mask }); } else { - const tmp_reg = try self.copyToTmpRegister(Type.usize, .{ .immediate = mask }); - try self.genBinOpMir( - .{ ._, .@"and" }, - Type.usize, - .{ .register = reg }, - .{ .register = tmp_reg }, - ); + const tmp_reg = try self.copyToTmpRegister(.usize, .{ .immediate = mask }); + try self.genBinOpMir(.{ ._, .@"and" }, .usize, .{ .register = reg }, .{ .register = tmp_reg }); } }, } @@ -20911,7 +20848,7 @@ fn floatCompilerRtAbiName(float_bits: u32) u8 { fn floatCompilerRtAbiType(self: *CodeGen, ty: Type, other_ty: Type) Type { if (ty.toIntern() == .f16_type and (other_ty.toIntern() == .f32_type or other_ty.toIntern() == .f64_type) and - self.target.isDarwin()) return Type.u16; + self.target.isDarwin()) return .u16; return ty; } @@ -20943,9 +20880,9 @@ fn promoteInt(self: *CodeGen, ty: Type) Type { else => if (ty.isAbiInt(zcu)) ty.intInfo(zcu) else return ty, }; for ([_]Type{ - Type.c_int, Type.c_uint, - Type.c_long, Type.c_ulong, - Type.c_longlong, Type.c_ulonglong, + .c_int, .c_uint, + .c_long, .c_ulong, + .c_longlong, .c_ulonglong, }) |promote_ty| { const promote_info = promote_ty.intInfo(zcu); if (int_info.signedness == .signed and promote_info.signedness == .unsigned) continue; @@ -20958,10 +20895,10 @@ fn promoteInt(self: *CodeGen, ty: Type) Type { fn promoteVarArg(self: *CodeGen, ty: Type) Type { if (!ty.isRuntimeFloat()) return self.promoteInt(ty); switch (ty.floatBits(self.target.*)) { - 32, 64 => return Type.f64, + 32, 64 => return .f64, else => |float_bits| { assert(float_bits == self.target.cTypeBitSize(.longdouble)); - return Type.c_longdouble; + return .c_longdouble; }, } } @@ -21036,7 +20973,7 @@ const Temp = struct { fn getOffset(temp: Temp, off: i32, cg: *CodeGen) !Temp { const new_temp_index = cg.next_temp_index; - cg.temp_type[@intFromEnum(new_temp_index)] = Type.usize; + cg.temp_type[@intFromEnum(new_temp_index)] = .usize; cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); switch (temp.tracking(cg).short) { else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), @@ -21141,7 +21078,7 @@ const Temp = struct { fn getLimb(temp: Temp, limb_index: u28, cg: *CodeGen) !Temp { const new_temp_index = cg.next_temp_index; - cg.temp_type[@intFromEnum(new_temp_index)] = Type.usize; + cg.temp_type[@intFromEnum(new_temp_index)] = .usize; switch (temp.tracking(cg).short) { else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), .immediate => |imm| { @@ -21220,7 +21157,7 @@ const Temp = struct { else => {}, .register, .lea_symbol, .lea_frame => { assert(limb_index == 0); - cg.temp_type[@intFromEnum(temp_index)] = Type.usize; + cg.temp_type[@intFromEnum(temp_index)] = .usize; return; }, .register_pair => |regs| { @@ -21232,7 +21169,7 @@ const Temp = struct { for (regs, 0..) |reg, reg_index| if (reg_index != limb_index) cg.register_manager.freeReg(reg); temp_tracking.* = .init(.{ .register = regs[limb_index] }); - cg.temp_type[@intFromEnum(temp_index)] = Type.usize; + cg.temp_type[@intFromEnum(temp_index)] = .usize; return; }, .load_symbol => |sym_off| { @@ -21241,7 +21178,7 @@ const Temp = struct { .sym_index = sym_off.sym_index, .off = sym_off.off + @as(u31, limb_index) * 8, } }); - cg.temp_type[@intFromEnum(temp_index)] = Type.usize; + cg.temp_type[@intFromEnum(temp_index)] = .usize; return; }, .load_frame => |frame_addr| if (!frame_addr.index.isNamed()) { @@ -21250,7 +21187,7 @@ const Temp = struct { .index = frame_addr.index, .off = frame_addr.off + @as(u31, limb_index) * 8, } }); - cg.temp_type[@intFromEnum(temp_index)] = Type.usize; + cg.temp_type[@intFromEnum(temp_index)] = .usize; return; }, } @@ -21339,7 +21276,7 @@ const Temp = struct { const result_temp: Temp = .{ .index = result_temp_index.toIndex() }; assert(cg.reuseTemp(result_temp.index, first_temp.index, first_temp_tracking)); assert(cg.reuseTemp(result_temp.index, second_temp.index, second_temp_tracking)); - cg.temp_type[@intFromEnum(result_temp_index)] = Type.slice_const_u8; + cg.temp_type[@intFromEnum(result_temp_index)] = .slice_const_u8; result_temp_index.tracking(cg).* = .init(result); first_temp.* = result_temp; } @@ -21405,7 +21342,7 @@ const Temp = struct { cg.temp_type[@intFromEnum(new_temp_index)] = temp.typeOf(cg); const new_reg = try cg.register_manager.allocReg(new_temp_index.toIndex(), abi.RegisterClass.gp); - try cg.genSetReg(new_reg, Type.usize, temp_tracking.short.address(), .{}); + try cg.genSetReg(new_reg, .usize, temp_tracking.short.address(), .{}); new_temp_index.tracking(cg).* = .init(.{ .indirect = .{ .reg = new_reg } }); try temp.die(cg); cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); @@ -21430,8 +21367,8 @@ const Temp = struct { } }, .load_frame => |val_frame_addr| { - var val_ptr = try cg.tempFromValue(Type.usize, .{ .lea_frame = val_frame_addr }); - var len = try cg.tempFromValue(Type.usize, .{ .immediate = val_abi_size }); + var val_ptr = try cg.tempFromValue(.usize, .{ .lea_frame = val_frame_addr }); + var len = try cg.tempFromValue(.usize, .{ .immediate = val_abi_size }); try val_ptr.memcpy(ptr, &len, cg); try val_ptr.die(cg); try len.die(cg); @@ -21668,11 +21605,12 @@ fn tempFromOperand( inst: Air.Inst.Index, op_index: Liveness.OperandInt, op_ref: Air.Inst.Ref, + ignore_death: bool, ) !Temp { const zcu = cg.pt.zcu; const ip = &zcu.intern_pool; - if (!cg.liveness.operandDies(inst, op_index)) { + if (ignore_death or !cg.liveness.operandDies(inst, op_index)) { if (op_ref.toIndex()) |op_inst| return .{ .index = op_inst }; const val = op_ref.toInterned().?; const gop = try cg.const_tracking.getOrPut(cg.gpa, val); @@ -21693,7 +21631,7 @@ fn tempFromOperand( try cg.genSetMem( .{ .frame = frame_index }, 0, - Type.usize, + .usize, .{ .lea_symbol = .{ .sym_index = tlv_sym } }, .{}, ); @@ -21721,7 +21659,9 @@ fn tempFromOperand( inline fn tempsFromOperands(cg: *CodeGen, inst: Air.Inst.Index, op_refs: anytype) ![op_refs.len]Temp { var temps: [op_refs.len]Temp = undefined; inline for (&temps, 0.., op_refs) |*temp, op_index, op_ref| { - temp.* = try cg.tempFromOperand(inst, op_index, op_ref); + temp.* = try cg.tempFromOperand(inst, op_index, op_ref, inline for (0..op_index) |prev_op_index| { + if (op_ref == op_refs[prev_op_index]) break true; + } else false); } return temps; } @@ -21776,6 +21716,8 @@ const Pattern = struct { implicit: u8, /// repeat another operand explicit: u8, + /// a condition code + cc, /// any general purpose register gpr, /// any 64-bit mmx register @@ -21808,6 +21750,8 @@ const Pattern = struct { ymm_limb, /// a limb stored in memory mem_limb, + /// a limb stored in a condition code + cc_mask_limb, /// a limb stored in a 64-bit mmx register mask mm_mask_limb, /// a limb stored in a 128-bit sse register masuk @@ -21829,7 +21773,7 @@ const Pattern = struct { fn matches(op: Op, is_mut: bool, temp: Temp, cg: *CodeGen) bool { switch (op) { - .implicit, .explicit => unreachable, + .implicit, .explicit, .cc, .cc_mask_limb => unreachable, else => {}, // temp is undefined .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => return true, @@ -21837,7 +21781,7 @@ const Pattern = struct { const temp_ty = temp.typeOf(cg); const abi_size = temp_ty.abiSize(cg.pt.zcu); return switch (op) { - .implicit, .explicit, .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => unreachable, + .implicit, .explicit, .cc, .cc_mask_limb, .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => unreachable, .gpr => abi_size <= 8 and switch (temp.tracking(cg).short) { .register => |reg| reg.class() == .general_purpose, .register_offset => |reg_off| reg_off.reg.class() == .general_purpose and @@ -21861,9 +21805,7 @@ const Pattern = struct { }, .mem, .mem_limb => (!is_mut or temp.isMut(cg)) and temp.tracking(cg).short.isMemory(), .gpr_limb => abi_size > 8 and switch (temp.tracking(cg).short) { - inline .register_pair, .register_triple, .register_quadruple => |regs| for (regs) |reg| { - if (reg.class() != .general_purpose) break false; - } else true, + .register, .register_pair, .register_triple, .register_quadruple => true, else => |mcv| mcv.isMemory(), }, .mm_limb, .mm_mask_limb => abi_size > 8 and switch (temp.tracking(cg).short) { @@ -21897,6 +21839,7 @@ const Pattern = struct { }; }; const SelectOptions = struct { + cc: ?Condition = null, invert_result: bool = false, }; fn select( @@ -21914,6 +21857,7 @@ fn select( limb_reloc: Mir.Inst.Index, limb_offset: Offset, limb_size: ?u8, + shuffle_temp: ?Temp, mask_limb_temp: ?Temp, mask_limb_offset: Offset, mask_limb_offset_lock: ?RegisterLock, @@ -21935,6 +21879,7 @@ fn select( .limb_reloc = undefined, .limb_offset = .unused, .limb_size = null, + .shuffle_temp = null, .mask_limb_temp = null, .mask_limb_offset = .unused, .mask_limb_offset_lock = null, @@ -21992,12 +21937,23 @@ fn select( if (!ref_src_op.matches(is_mut, src_temp, cg)) continue :patterns; } + for (pattern.ops) |op| switch (op) { + else => {}, + .cc_mask_limb, + .mm_mask_limb, + .xmm_mask_limb, + .ymm_mask_limb, + => if (loop.mask_limb_offset_lock == null and !cg.hasFeature(.bmi2)) { + try cg.register_manager.getKnownReg(.rcx, null); + loop.mask_limb_offset_lock = cg.register_manager.lockKnownRegAssumeUnused(.rcx); + }, + }; while (true) for (src_temps, pattern.ops[dst_temps.len..]) |*src_temp, src_op| { - if (changed: switch (switch (src_op) { + if (switch (switch (src_op) { .implicit, .explicit => |linked_index| pattern.ops[linked_index], else => src_op, }) { - .implicit, .explicit => unreachable, + .implicit, .explicit, .cc, .cc_mask_limb => unreachable, .gpr => try src_temp.toRegClass(.general_purpose, cg), .mm, .mm_mask, .mm_sign_mask => try src_temp.toRegClass(.mmx, cg), .xmm, @@ -22015,23 +21971,22 @@ fn select( .ymm_limb, .mem_limb, => switch (src_temp.tracking(cg).short) { - .register_pair, .register_triple, .register_quadruple => false, + .register, .register_pair, .register_triple, .register_quadruple => false, else => try src_temp.toBase(cg), }, - .mm_mask_limb, - .xmm_mask_limb, - .ymm_mask_limb, - => if (!cg.hasFeature(.bmi2) and !cg.register_manager.isKnownRegFree(.rcx)) { - try cg.register_manager.getKnownReg(.rcx, null); - loop.mask_limb_offset_lock = cg.register_manager.lockKnownRegAssumeUnused(.rcx); - break :changed true; - } else false, + .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => false, .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => false, }) break; } else break; + const invert_result = opts.invert_result != pattern_set.invert_result; var dst_is_linked: std.StaticBitSet(4) = .initEmpty(); - var mir_ops_len = dst_temps.len; + var mir_ops_len: usize = 0; + for (pattern.ops[0..dst_temps.len]) |dst_op| switch (dst_op) { + else => mir_ops_len += 1, + .cc, .cc_mask_limb => {}, + }; + const dst_mir_ops_len = mir_ops_len; for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| { defer mir_ops_len += @intFromBool(src_op != .implicit); const linked_src_op, const extra_temp = op: switch (src_op) { @@ -22046,13 +22001,17 @@ fn select( }; const limb_size: u8, const rc = switch (linked_src_op) { else => continue, - .gpr_limb => .{ 8, abi.RegisterClass.gp }, + .gpr_limb => .{ @intCast(@divExact(@as(Memory.Size, switch (pattern_set.scalar) { + .any => .qword, + .bool => unreachable, + .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size, + }).bitSize(), 8)), abi.RegisterClass.gp }, .mm_limb, .mm_mask_limb => .{ 8, @panic("TODO") }, .xmm_limb, .xmm_mask_limb => .{ 16, abi.RegisterClass.sse }, .ymm_limb, .ymm_mask_limb => .{ 32, abi.RegisterClass.sse }, .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => { assert(extra_temp.* == null); - extra_temp.* = try cg.tempAllocReg(Type.usize, switch (linked_src_op) { + extra_temp.* = try cg.tempAllocReg(.noreturn, switch (linked_src_op) { else => unreachable, .umax_gpr => abi.RegisterClass.gp, .umax_mm => @panic("TODO"), @@ -22064,10 +22023,18 @@ fn select( assert(loop.limb_size == null or loop.limb_size == limb_size); loop.limb_size = limb_size; loop.remaining_size = loop.remaining_size orelse src_temp.typeOf(cg).abiSize(cg.pt.zcu); - switch (src_temp.tracking(cg).short) { - .register_pair, .register_triple, .register_quadruple => switch (loop.limb_offset) { - .unused, .temp => loop.limb_offset = .{ .known = 0 }, - .known => {}, + const src_mcv = src_temp.tracking(cg).short; + switch (src_mcv) { + .register, .register_pair, .register_triple, .register_quadruple => { + switch (loop.limb_offset) { + .unused, .temp => loop.limb_offset = .{ .known = 0 }, + .known => {}, + } + if (!rc.isSet(RegisterManager.indexOfRegIntoTracked(src_mcv.getRegs()[0]).?)) { + if (loop.shuffle_temp == null) loop.shuffle_temp = try cg.tempAllocReg(.noreturn, abi.RegisterClass.sse); + assert(extra_temp.* == null); + extra_temp.* = try cg.tempAllocReg(.usize, rc); + } }, else => { switch (loop.limb_offset) { @@ -22075,7 +22042,7 @@ fn select( .known, .temp => {}, } assert(extra_temp.* == null); - extra_temp.* = try cg.tempAllocReg(Type.usize, rc); + extra_temp.* = try cg.tempAllocReg(.usize, rc); }, } } @@ -22090,13 +22057,17 @@ fn select( dst_temp.* = dst_temp: switch (dst_op) { .implicit => unreachable, .explicit => |linked_index| dst_temps[linked_index], + .cc => try cg.tempFromValue(.bool, .{ .eflags = switch (invert_result) { + false => opts.cc.?, + true => opts.cc.?.negate(), + } }), .gpr => try cg.tempAllocReg(dst_ty, abi.RegisterClass.gp), .mm, .mm_mask, .mm_sign_mask => @panic("TODO"), .xmm, .xmm_mask, .xmm_sign_mask => try cg.tempAllocReg(dst_ty, abi.RegisterClass.sse), .ymm, .ymm_mask, .ymm_sign_mask => try cg.tempAllocReg(dst_ty, abi.RegisterClass.sse), .mem => @panic("TODO"), .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb => { - if (extra_temp.* == null) extra_temp.* = try cg.tempAllocReg(Type.usize, switch (dst_op) { + if (extra_temp.* == null) extra_temp.* = try cg.tempAllocReg(.noreturn, switch (dst_op) { else => unreachable, .gpr_limb => abi.RegisterClass.gp, .mm_limb => @panic("TODO"), @@ -22105,11 +22076,11 @@ fn select( break :dst_temp try cg.tempAlloc(dst_ty); }, .mem_limb => try cg.tempAlloc(dst_ty), - .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => unreachable, // already checked + .cc_mask_limb, .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => unreachable, // already checked .imm, .simm32, .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => unreachable, // unmodifiable destination }; }, - .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => { + .cc_mask_limb, .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => { const scalar_size = @divExact(switch (pattern_set.scalar) { .any, .bool => unreachable, .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size, @@ -22131,14 +22102,17 @@ fn select( assert(loop.mask_store_bit_size == null or loop.mask_store_bit_size == mask_store_bit_size); loop.mask_store_bit_size = mask_store_bit_size; loop.mask_limb_offset = loop.limb_offset; - if (extra_temp.* == null) extra_temp.* = try cg.tempAllocReg(Type.usize, switch (dst_op) { - else => unreachable, - .mm_mask_limb => @panic("TODO"), - .xmm_mask_limb, .ymm_mask_limb => abi.RegisterClass.sse, - }); - if (loop.mask_limb_temp == null) loop.mask_limb_temp = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp); + if (loop.mask_limb_temp == null) { + loop.mask_limb_temp = try cg.tempAllocReg(.usize, abi.RegisterClass.gp); + if (dst_op == .cc_mask_limb and mask_store_bit_size > 8) { + // setcc only clears 8 bits + const mask_limb_alias = loop.mask_limb_temp.?.tracking(cg).short.register.to32(); + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterRegister(.{ ._, .xor }, mask_limb_alias, mask_limb_alias); + } + } if (mask_limb_bit_size < mask_store_bit_size and loop.mask_store_reg == null) { - loop.mask_store_temp = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp); + loop.mask_store_temp = try cg.tempAllocReg(.usize, abi.RegisterClass.gp); loop.mask_store_reg = loop.mask_store_temp.?.tracking(cg).short.register; } dst_temp.* = if (mask_store_bit_size < mask_bit_size) @@ -22152,28 +22126,26 @@ fn select( switch (loop.mask_limb_offset) { .unused, .known => {}, .temp => |*mask_limb_offset| { - if (cg.hasFeature(.bmi2)) { - assert(loop.mask_limb_offset_lock == null); - mask_limb_offset.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp); - } else { - if (loop.mask_limb_offset_lock) |lock| cg.register_manager.unlockReg(lock); - loop.mask_limb_offset_lock = null; - mask_limb_offset.* = try cg.tempFromValue(Type.usize, .{ .register = .rcx }); - } + mask_limb_offset.* = if (cg.hasFeature(.bmi2)) + try cg.tempAllocReg(.usize, abi.RegisterClass.gp) + else if (loop.mask_limb_offset_lock != null) + try cg.tempFromValue(.usize, .{ .register = .rcx }) + else + unreachable; if (loop.mask_store_reg) |mask_store_reg| { - const mask_store_alias = registerAlias( - mask_store_reg, - @min(std.math.divCeil(u7, loop.mask_store_bit_size.?, 8) catch unreachable, 4), - ); + const mask_store_alias = + if (loop.mask_store_bit_size.? > 8) mask_store_reg.to32() else mask_store_reg.to8(); try cg.spillEflagsIfOccupied(); try cg.asmRegisterRegister(.{ ._, .xor }, mask_store_alias, mask_store_alias); } }, } + if (loop.mask_limb_offset_lock) |lock| cg.register_manager.unlockReg(lock); + loop.mask_limb_offset_lock = null; switch (loop.element_offset) { .unused, .known => {}, .temp => |*element_offset| { - element_offset.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp); + element_offset.* = try cg.tempAllocReg(.usize, abi.RegisterClass.gp); const element_offset_reg = element_offset.tracking(cg).short.register; try cg.spillEflagsIfOccupied(); try cg.asmRegisterRegister(.{ ._, .xor }, element_offset_reg.to32(), element_offset_reg.to32()); @@ -22182,7 +22154,7 @@ fn select( } switch (loop.limb_offset) { .unused, .known => {}, - .temp => |*limb_offset| limb_offset.* = try cg.tempAllocReg(Type.usize, abi.RegisterClass.gp), + .temp => |*limb_offset| limb_offset.* = try cg.tempAllocReg(.usize, abi.RegisterClass.gp), } while (true) { switch (loop.mask_limb_offset) { @@ -22204,7 +22176,7 @@ fn select( } while (true) { var mir_ops: [4]Operand = @splat(.none); - mir_ops_len = dst_temps.len; + mir_ops_len = dst_mir_ops_len; for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| { defer mir_ops_len += @intFromBool(src_op != .implicit); const mir_op, const linked_src_op, const extra_temp = switch (src_op) { @@ -22217,35 +22189,146 @@ fn select( // src_temp is undefined .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => extra_temp.?, }.tracking(cg).short; - copy_limb: switch (src_mcv) { - .register_pair, .register_triple, .register_quadruple => {}, - else => try cg.asmRegisterMemory( - switch (linked_src_op) { - else => break :copy_limb, - .gpr_limb => .{ ._, .mov }, - .mm_limb, .mm_mask_limb => .{ ._q, .mov }, - .xmm_limb, - .ymm_limb, - .xmm_mask_limb, - .ymm_mask_limb, - => .{ if (cg.hasFeature(.avx)) .v_ else ._, .movdqu }, + switch (linked_src_op) { + else => {}, + .gpr_limb, + .mm_limb, + .xmm_limb, + .ymm_limb, + .mm_mask_limb, + .xmm_mask_limb, + .ymm_mask_limb, + => if (extra_temp) |limb_temp| switch (src_mcv) { + .register, .register_pair, .register_triple, .register_quadruple => { + const limb_reg = registerAlias(limb_temp.tracking(cg).short.register, loop.limb_size.?); + const src_regs = src_mcv.getRegs(); + const src_reg_size: u32 = @intCast(switch (src_mcv) { + .register => src_temp.typeOf(cg).abiSize(cg.pt.zcu), + else => @divExact(src_regs[0].bitSize(), 8), + }); + const src_reg = src_regs[loop.limb_offset.known / src_reg_size]; + assert(src_mcv == .register or src_reg.bitSize() == 8 * src_reg_size); + switch (src_reg.class()) { + else => unreachable, + .general_purpose => try cg.asmRegisterRegister( + .{ ._, .mov }, + limb_reg, + registerAlias(src_reg, src_reg_size), + ), + .sse => { + assert(src_reg_size == 16); + const limb_alias_size = @max(loop.limb_size.?, 4); + const limb_alias = registerAlias(limb_reg, limb_alias_size); + const src_reg_offset = loop.limb_offset.known % src_reg_size; + switch (limb_reg_offset: { + extr: { + const limb_size = if (cg.hasFeature(.sse4_1)) loop.limb_size.? else 2; + if (loop.limb_size.? > limb_size) break :extr; + const limb_offset = src_reg_offset / limb_size; + if (limb_offset == 0) break :extr; + try cg.asmRegisterRegisterImmediate(.{ switch (limb_size) { + else => unreachable, + 1 => if (cg.hasFeature(.avx)) .vp_b else .p_b, + 2 => if (cg.hasFeature(.avx)) .vp_w else .p_w, + 4 => if (cg.hasFeature(.avx)) .vp_d else .p_d, + 8 => if (cg.hasFeature(.avx)) .vp_q else .p_q, + }, .extr }, limb_alias, src_reg.to128(), .u(limb_offset)); + break :limb_reg_offset src_reg_offset % limb_size; + } + try cg.asmRegisterRegister( + .{ switch (limb_alias_size) { + else => unreachable, + 4 => ._d, + 8 => ._q, + }, .mov }, + limb_alias, + if (src_reg_offset < limb_alias_size) src_reg.to128() else shuffle_reg: { + const shuffle_reg = loop.shuffle_temp.?.tracking(cg).short.register.to128(); + const mir_fixes: Mir.Inst.Fixes = if (cg.hasFeature(.sse2)) + if (src_temp.typeOf(cg).scalarType(cg.pt.zcu).isRuntimeFloat()) switch (limb_alias_size) { + else => unreachable, + 4 => if (cg.hasFeature(.avx)) .v_ps else ._ps, + 8 => if (cg.hasFeature(.avx)) .v_pd else ._pd, + } else if (cg.hasFeature(.avx)) .vp_d else .p_d + else + ._ps; + try cg.asmRegisterRegisterImmediate( + .{ mir_fixes, .shuf }, + shuffle_reg, + src_reg: switch (mir_fixes) { + else => unreachable, + ._ps, ._pd => { + try cg.asmRegisterRegister(.{ mir_fixes, .mova }, shuffle_reg, src_reg.to128()); + break :src_reg shuffle_reg; + }, + .p_d => src_reg.to128(), + }, + .u(switch (mir_fixes) { + else => unreachable, + .v_ps, ._ps, .vp_d, .p_d => switch (limb_alias_size) { + else => unreachable, + 4 => switch (src_reg_offset) { + else => unreachable, + 4...7 => 0b01_01_01_01, + 8...11 => 0b10_10_10_10, + 12...15 => 0b11_11_11_11, + }, + 8 => switch (src_reg_offset) { + else => unreachable, + 8...15 => 0b11_10_11_10, + }, + }, + .v_pd, ._pd => switch (limb_alias_size) { + else => unreachable, + 8 => switch (src_reg_offset) { + else => unreachable, + 8...15 => 0b1_1, + }, + }, + }), + ); + break :shuffle_reg shuffle_reg; + }, + ); + break :limb_reg_offset src_reg_offset % limb_alias_size; + }) { + 0 => {}, + else => |limb_reg_offset| { + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterImmediate(.{ ._r, .sh }, limb_alias, .u(limb_reg_offset * 8)); + }, + } + }, + } }, - registerAlias(extra_temp.?.tracking(cg).short.register, loop.limb_size.?), - try src_mcv.mem(cg, switch (loop.limb_offset) { - .unused => unreachable, - .known => |limb_offset| .{ - .size = .fromSize(loop.limb_size.?), - .disp = limb_offset, + else => try cg.asmRegisterMemory( + switch (linked_src_op) { + else => unreachable, + .gpr_limb => .{ ._, .mov }, + .mm_limb, .mm_mask_limb => .{ ._q, .mov }, + .xmm_limb, + .ymm_limb, + .xmm_mask_limb, + .ymm_mask_limb, + => .{ if (cg.hasFeature(.avx)) .v_ else ._, .movdqu }, }, - .temp => |limb_offset| .{ - .size = .fromSize(loop.limb_size.?), - .index = limb_offset.tracking(cg).short.register.to64(), - }, - }), - ), + registerAlias(limb_temp.tracking(cg).short.register, loop.limb_size.?), + try src_mcv.mem(cg, switch (loop.limb_offset) { + .unused => unreachable, + .known => |limb_offset| .{ + .size = .fromSize(loop.limb_size.?), + .disp = limb_offset, + }, + .temp => |limb_offset| .{ + .size = .fromSize(loop.limb_size.?), + .index = limb_offset.tracking(cg).short.register.to64(), + }, + }), + ), + }, } mir_op.* = switch (linked_src_op) { - .implicit, .explicit => unreachable, + .implicit, .explicit, .cc, .cc_mask_limb => unreachable, .gpr => .{ .reg = registerAlias( src_mcv.register, @intCast(src_temp.typeOf(cg).abiSize(cg.pt.zcu)), @@ -22262,25 +22345,12 @@ fn select( .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb, - => switch (src_mcv) { - inline .register_pair, - .register_triple, - .register_quadruple, - => |src_regs| switch (loop.limb_offset) { - .unused => unreachable, - .known => |limb_offset| .{ .reg = registerAlias( - src_regs[@divExact(limb_offset, loop.limb_size.?)], - loop.limb_size.?, - ) }, - .temp => unreachable, - }, - else => .{ .reg = registerAlias( - extra_temp.?.tracking(cg).short.register, - loop.limb_size.?, - ) }, - }, + => .{ .reg = registerAlias(if (extra_temp) |limb_temp| + limb_temp.tracking(cg).short.register + else + src_mcv.getRegs()[@divExact(loop.limb_offset.known, loop.limb_size.?)], loop.limb_size.?) }, .mem_limb => .{ .mem = switch (src_mcv) { - .register_pair, .register_triple, .register_quadruple => unreachable, + .register, .register_pair, .register_triple, .register_quadruple => unreachable, else => switch (loop.limb_offset) { .unused => unreachable, .known => |limb_offset| try src_mcv.mem(cg, .{ @@ -22316,15 +22386,15 @@ fn select( } } for ( - mir_ops[0..dst_temps.len], - pattern.ops[0..dst_temps.len], - dst_temps, - dst_tys, - extra_temps[0..dst_temps.len], + mir_ops[0..dst_mir_ops_len], + pattern.ops[0..dst_mir_ops_len], + dst_temps[0..dst_mir_ops_len], + dst_tys[0..dst_mir_ops_len], + extra_temps[0..dst_mir_ops_len], ) |*mir_op, dst_op, dst_temp, dst_ty, extra_temp| { if (mir_op.* != .none) continue; mir_op.* = switch (dst_op) { - .implicit => unreachable, + .implicit, .cc, .cc_mask_limb => unreachable, .explicit => |linked_index| mir_ops[linked_index], .gpr => .{ .reg = registerAlias( dst_temp.tracking(cg).short.register, @@ -22334,7 +22404,14 @@ fn select( .xmm, .xmm_mask, .xmm_sign_mask => .{ .reg = dst_temp.tracking(cg).short.register.to128() }, .ymm, .ymm_mask, .ymm_sign_mask => .{ .reg = dst_temp.tracking(cg).short.register.to256() }, .mem => @panic("TODO"), - .gpr_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to64() }, + .gpr_limb => .{ .reg = registerAlias( + extra_temp.?.tracking(cg).short.register, + @intCast(@divExact(@as(Memory.Size, switch (pattern_set.scalar) { + .any => .qword, + .bool => unreachable, + .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size, + }).bitSize(), 8)), + ) }, .mm_limb => .{ .reg = extra_temp.?.tracking(cg).short.register }, .xmm_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to128() }, .ymm_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to256() }, @@ -22376,7 +22453,6 @@ fn select( }, else => |e| return e, }; - const invert_result = opts.invert_result != pattern_set.invert_result; for ( extra_temps[0..dst_temps.len], pattern.ops[0..dst_temps.len], @@ -22410,21 +22486,15 @@ fn select( .any, .bool => unreachable, .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size, }, cg), - .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb => if (extra_temp) |limb_temp| - switch (dst_temp.tracking(cg).short) { - inline .register_pair, - .register_triple, - .register_quadruple, - => |dst_regs| switch (loop.limb_offset) { - .unused => unreachable, - .known => |limb_offset| try cg.asmRegisterRegister( - .{ ._, .mov }, - dst_regs[@divExact(limb_offset, loop.limb_size.?)].to64(), - limb_temp.tracking(cg).short.register.to64(), - ), - .temp => unreachable, - }, - else => |dst_mcv| try cg.asmMemoryRegister( + .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb => if (extra_temp) |limb_temp| { + const dst_mcv = dst_temp.tracking(cg).short; + switch (dst_mcv) { + .register_pair, .register_triple, .register_quadruple => try cg.asmRegisterRegister( + .{ ._, .mov }, + dst_mcv.getRegs()[@divExact(loop.limb_offset.known, loop.limb_size.?)].to64(), + limb_temp.tracking(cg).short.register.to64(), + ), + else => try cg.asmMemoryRegister( switch (dst_op) { else => unreachable, .gpr_limb => .{ ._, .mov }, @@ -22444,57 +22514,66 @@ fn select( }), registerAlias(limb_temp.tracking(cg).short.register, loop.limb_size.?), ), - }, - .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => { + } + }, + .cc_mask_limb, .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => { const scalar_size = switch (pattern_set.scalar) { .any, .bool => unreachable, .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size, }; - switch (scalar_size) { - else => {}, - .word => if (cg.hasFeature(.avx)) try cg.asmRegisterRegisterRegister( - .{ .vp_b, .ackssw }, - mir_op.reg, - mir_op.reg, - mir_op.reg, - ) else try cg.asmRegisterRegister( - .{ .p_b, .ackssw }, - mir_op.reg, - mir_op.reg, - ), - } const mask_store_size: u4 = @intCast(std.math.divCeil(u7, loop.mask_store_bit_size.?, 8) catch unreachable); - const mask_limb_reg = registerAlias( - loop.mask_limb_temp.?.tracking(cg).short.register, - mask_store_size, - ); - try cg.asmRegisterRegister(switch (scalar_size) { + const known_shl_count = if (loop.mask_store_reg) |_| switch (loop.mask_limb_offset) { + .unused => unreachable, + .known => |mask_limb_offset| mask_limb_offset & (loop.mask_store_bit_size.? - 1), + .temp => null, + } else null; + const mask_limb_reg = registerAlias(if (known_shl_count != 0) + loop.mask_limb_temp.?.tracking(cg).short.register + else + loop.mask_store_reg.?, mask_store_size); + switch (dst_op) { else => unreachable, - .byte, .word => .{ if (cg.hasFeature(.avx)) .vp_b else .p_b, .movmsk }, - .dword => .{ if (cg.hasFeature(.avx)) .v_ps else ._ps, .movmsk }, - .qword => .{ if (cg.hasFeature(.avx)) .v_pd else ._pd, .movmsk }, - }, mask_limb_reg.to32(), mir_op.reg); - if (invert_result) if (loop.mask_store_reg) |_| { - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterImmediate( - .{ ._, .xor }, - registerAlias(mask_limb_reg, @min(mask_store_size, 4)), - .u((@as(u32, 1) << @intCast(loop.mask_limb_bit_size.?)) - 1), - ); - } else try cg.asmRegister(.{ ._, .not }, mask_limb_reg); + .cc_mask_limb => try cg.asmSetccRegister(switch (invert_result) { + false => opts.cc.?, + true => opts.cc.?.negate(), + }, mask_limb_reg.to8()), + .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => { + if (scalar_size == .word) if (cg.hasFeature(.avx)) try cg.asmRegisterRegisterRegister( + .{ .vp_b, .ackssw }, + mir_op.reg, + mir_op.reg, + mir_op.reg, + ) else try cg.asmRegisterRegister( + .{ .p_b, .ackssw }, + mir_op.reg, + mir_op.reg, + ); + try cg.asmRegisterRegister(switch (scalar_size) { + else => unreachable, + .byte, .word => .{ if (cg.hasFeature(.avx)) .vp_b else .p_b, .movmsk }, + .dword => .{ if (cg.hasFeature(.avx)) .v_ps else ._ps, .movmsk }, + .qword => .{ if (cg.hasFeature(.avx)) .v_pd else ._pd, .movmsk }, + }, mask_limb_reg.to32(), mir_op.reg); + if (invert_result) if (loop.mask_store_reg) |_| { + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterImmediate( + .{ ._, .xor }, + registerAlias(mask_limb_reg, @min(mask_store_size, 4)), + .u((@as(u32, 1) << @intCast(loop.mask_limb_bit_size.?)) - 1), + ); + } else try cg.asmRegister(.{ ._, .not }, mask_limb_reg); + }, + } if (loop.mask_store_reg) |mask_store_reg| { const mask_store_alias = registerAlias(mask_store_reg, mask_store_size); switch (loop.mask_limb_offset) { .unused => unreachable, - .known => |mask_limb_offset| switch (mask_limb_offset & (loop.mask_store_bit_size.? - 1)) { - 0 => try cg.asmRegisterRegister(.{ ._, .mov }, mask_store_alias, mask_limb_reg), - else => |shl_count| { - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterImmediate(.{ ._l, .sh }, mask_limb_reg, .u(shl_count)); - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterRegister(.{ ._, .@"or" }, mask_store_alias, mask_limb_reg); - }, + .known => if (known_shl_count.? != 0) { + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterImmediate(.{ ._l, .sh }, mask_limb_reg, .u(known_shl_count.?)); + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterRegister(.{ ._, .@"or" }, mask_store_alias, mask_limb_reg); }, .temp => |mask_limb_offset| { if (cg.hasFeature(.bmi2)) { @@ -22524,23 +22603,23 @@ fn select( .unused => unreachable, .known => |*mask_limb_offset| { mask_limb_offset.* += loop.mask_limb_bit_size.?; - if (mask_limb_offset.* & (loop.mask_store_bit_size.? - 1) == 0) { - switch (dst_mcv) { - .register => {}, - else => try cg.asmMemoryRegister( + if (mask_limb_offset.* & (loop.mask_store_bit_size.? - 1) == 0) switch (dst_mcv) { + .register => {}, + else => { + try cg.asmMemoryRegister( .{ ._, .mov }, try dst_mcv.mem(cg, .{ .size = .fromSize(mask_store_size), .disp = @divExact(mask_limb_offset.*, 8) - mask_store_size, }), registerAlias(loop.mask_store_reg orelse mask_limb_reg, mask_store_size), - ), - } - if (loop.mask_store_reg) |mask_store_reg| { - const mask_store_alias = registerAlias(mask_store_reg, @min(mask_store_size, 4)); - try cg.asmRegisterRegister(.{ ._, .xor }, mask_store_alias, mask_store_alias); - } - } + ); + if (loop.mask_store_reg) |mask_store_reg| { + const mask_store_alias = registerAlias(mask_store_reg, @min(mask_store_size, 4)); + try cg.asmRegisterRegister(.{ ._, .xor }, mask_store_alias, mask_store_alias); + } + }, + }; }, .temp => |mask_limb_offset| { const mask_limb_offset_reg = mask_limb_offset.tracking(cg).short.register.to32(); @@ -22641,6 +22720,7 @@ fn select( }, } } + if (loop.shuffle_temp) |shuffle_temp| try shuffle_temp.die(cg); if (loop.mask_limb_temp) |mask_limb_temp| try mask_limb_temp.die(cg); if (loop.mask_store_temp) |mask_store_temp| try mask_store_temp.die(cg); switch (loop.mask_limb_offset) { diff --git a/src/arch/x86_64/encoder.zig b/src/arch/x86_64/encoder.zig index cf03b9729f..81467de515 100644 --- a/src/arch/x86_64/encoder.zig +++ b/src/arch/x86_64/encoder.zig @@ -677,11 +677,11 @@ pub const Instruction = struct { else => unreachable, }, .frame => if (@TypeOf(encoder).options.allow_frame_locs) { - try encoder.modRm_indirectDisp32(operand_enc, undefined); + try encoder.modRm_indirectDisp32(operand_enc, 0); try encoder.disp32(undefined); } else return error.CannotEncode, .reloc => if (@TypeOf(encoder).options.allow_symbols) { - try encoder.modRm_indirectDisp32(operand_enc, undefined); + try encoder.modRm_indirectDisp32(operand_enc, 0); try encoder.disp32(undefined); } else return error.CannotEncode, }, diff --git a/src/codegen/c/Type.zig b/src/codegen/c/Type.zig index 2c2db8b506..20ba8fe6c0 100644 --- a/src/codegen/c/Type.zig +++ b/src/codegen/c/Type.zig @@ -1312,10 +1312,10 @@ pub const Pool = struct { }, else => { const target = &mod.resolved_target.result; - const abi_align = Type.intAbiAlignment(int_info.bits, target.*, false); + const abi_align = Type.intAbiAlignment(int_info.bits, target.*); const abi_align_bytes = abi_align.toByteUnits().?; const array_ctype = try pool.getArray(allocator, .{ - .len = @divExact(Type.intAbiSize(int_info.bits, target.*, false), abi_align_bytes), + .len = @divExact(Type.intAbiSize(int_info.bits, target.*), abi_align_bytes), .elem_ctype = try pool.fromIntInfo(allocator, .{ .signedness = .unsigned, .bits = @intCast(abi_align_bytes * 8), @@ -1429,7 +1429,7 @@ pub const Pool = struct { .name = .{ .index = .len }, .ctype = CType.usize, .alignas = AlignAs.fromAbiAlignment( - Type.intAbiAlignment(target.ptrBitWidth(), target.*, false), + Type.intAbiAlignment(target.ptrBitWidth(), target.*), ), }, }; @@ -1524,7 +1524,7 @@ pub const Pool = struct { .name = .{ .index = .len }, .ctype = CType.usize, .alignas = AlignAs.fromAbiAlignment( - Type.intAbiAlignment(target.ptrBitWidth(), target.*, false), + Type.intAbiAlignment(target.ptrBitWidth(), target.*), ), }, }; @@ -1644,7 +1644,7 @@ pub const Pool = struct { .name = .{ .index = .@"error" }, .ctype = error_set_ctype, .alignas = AlignAs.fromAbiAlignment( - Type.intAbiAlignment(error_set_bits, target.*, false), + Type.intAbiAlignment(error_set_bits, target.*), ), }, .{ diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 99403db78c..eedbe5a660 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -581,7 +581,7 @@ const DataLayoutBuilder = struct { switch (kind) { .integer => { if (self.target.ptrBitWidth() <= 16 and size >= 128) return; - abi = @min(abi, Type.maxIntAlignment(self.target, true) * 8); + abi = @min(abi, Type.maxIntAlignment(self.target) * 8); switch (self.target.cpu.arch) { .aarch64, .aarch64_be, diff --git a/src/link/C.zig b/src/link/C.zig index 6c7b7c8975..8a43845639 100644 --- a/src/link/C.zig +++ b/src/link/C.zig @@ -396,7 +396,7 @@ fn abiDefines(self: *C, target: std.Target) !std.ArrayList(u8) { else => {}, } try writer.print("#define ZIG_TARGET_MAX_INT_ALIGNMENT {d}\n", .{ - Type.maxIntAlignment(target, false), + Type.maxIntAlignment(target), }); return defines; } diff --git a/test/behavior/align.zig b/test/behavior/align.zig index d360a99aa3..e1b8e3a18f 100644 --- a/test/behavior/align.zig +++ b/test/behavior/align.zig @@ -144,31 +144,17 @@ test "alignment and size of structs with 128-bit fields" { }, }, - .x86_64 => switch (builtin.zig_backend) { - .stage2_x86_64 => .{ - .a_align = 8, - .a_size = 16, + .x86_64 => .{ + .a_align = 16, + .a_size = 16, - .b_align = 16, - .b_size = 32, + .b_align = 16, + .b_size = 32, - .u128_align = 8, - .u128_size = 16, - .u129_align = 8, - .u129_size = 24, - }, - else => .{ - .a_align = 16, - .a_size = 16, - - .b_align = 16, - .b_size = 32, - - .u128_align = 16, - .u128_size = 16, - .u129_align = 16, - .u129_size = 32, - }, + .u128_align = 16, + .u128_size = 16, + .u129_align = 16, + .u129_size = 32, }, .x86, diff --git a/test/behavior/x86_64/math.zig b/test/behavior/x86_64/math.zig index 900e5747a8..c69917fc05 100644 --- a/test/behavior/x86_64/math.zig +++ b/test/behavior/x86_64/math.zig @@ -31,7 +31,37 @@ fn testBinary(comptime op: anytype) !void { try testType(u32, 0x80d7a2c6, 0xbff6a402); try testType(u64, 0x71138bc6b4a38898, 0x1bc4043de9438c7b); try testType(u128, 0xe05fc132ef2cd8affee00a907f0a851f, 0x29f912a72cfc6a7c6973426a9636da9a); + try testType( + u256, + 0xb7935f5c2f3b1ae7a422c0a7c446884294b7d5370bada307d2fe5a4c4284a999, + 0x310e6e196ba4f143b8d285ca6addf7f3bb3344224aff221b27607a31e148be08, + ); + try testType( + u512, + 0xe5b1fedca3c77db765e517aabd05ffc524a3a8aff1784bbf67c45b894447ede32b65b9940e78173c591e56e078932d465f235aece7ad47b7f229df7ba8f12295, + 0x8b4bb7c2969e3b121cc1082c442f8b4330f0a50058438fed56447175bb10178607ecfe425cb54dacc25ef26810f3e04681de1844f1aa8d029aca75d658634806, + ); + try testType(@Vector(1, u8), .{ + 0x1f, + }, .{ + 0x06, + }); + try testType(@Vector(2, u8), .{ + 0x80, 0x63, + }, .{ + 0xe4, 0x28, + }); + try testType(@Vector(4, u8), .{ + 0x83, 0x9e, 0x1e, 0xc1, + }, .{ + 0xf0, 0x5c, 0x46, 0x85, + }); + try testType(@Vector(8, u8), .{ + 0x1e, 0x4d, 0x9d, 0x2a, 0x4c, 0x74, 0x0a, 0x83, + }, .{ + 0x28, 0x60, 0xa9, 0xb5, 0xd9, 0xa6, 0xf1, 0xb6, + }); try testType(@Vector(16, u8), .{ 0xea, 0x80, 0xbb, 0xe8, 0x74, 0x81, 0xc8, 0x66, 0x7b, 0x41, 0x90, 0xcb, 0x30, 0x70, 0x4b, 0x0f, }, .{ @@ -75,6 +105,21 @@ fn testBinary(comptime op: anytype) !void { 0x56, 0x4f, 0xf1, 0xaa, 0x0a, 0x0f, 0xdb, 0x1b, 0xc8, 0x45, 0x9b, 0x12, 0xb4, 0x1a, 0xe4, 0xa3, }); + try testType(@Vector(1, u16), .{ + 0x9d6f, + }, .{ + 0x44b1, + }); + try testType(@Vector(2, u16), .{ + 0xa0fa, 0xc365, + }, .{ + 0xe736, 0xc394, + }); + try testType(@Vector(4, u16), .{ + 0x9608, 0xa558, 0x161b, 0x206f, + }, .{ + 0x3088, 0xf25c, 0x7837, 0x9b3f, + }); try testType(@Vector(8, u16), .{ 0xcf61, 0xb121, 0x3cf1, 0x3e9f, 0x43a7, 0x8d69, 0x96f5, 0xc11e, }, .{ @@ -118,6 +163,16 @@ fn testBinary(comptime op: anytype) !void { 0x2c02, 0xff5b, 0x19ca, 0xbbf5, 0x870e, 0xc9ca, 0x47bb, 0xcfcc, }); + try testType(@Vector(1, u32), .{ + 0x1d0d9cc4, + }, .{ + 0xce2d0ab6, + }); + try testType(@Vector(2, u32), .{ + 0x5ab78c03, 0xd21bb513, + }, .{ + 0x8a6664eb, 0x79eac37d, + }); try testType(@Vector(4, u32), .{ 0x234d576e, 0x4151cc9c, 0x39f558e4, 0xba935a32, }, .{ @@ -161,9 +216,11 @@ fn testBinary(comptime op: anytype) !void { 0xf080e943, 0xc8718d14, 0x3f920382, 0x18d101b5, }); - // TODO: implement fallback for pcmpeqq - if (!comptime @import("std").Target.x86.featureSetHas(@import("builtin").cpu.features, .sse4_1)) return; - + try testType(@Vector(1, u64), .{ + 0x333f593bf9d08546, + }, .{ + 0x6918bd767e730778, + }); try testType(@Vector(2, u64), .{ 0x4cd89a317b03d430, 0x28998f61842f63a9, }, .{ From 73a42953c93273c178a1f8c8067bc732ca933be0 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 23 Dec 2024 01:56:39 -0500 Subject: [PATCH 06/25] x86_64: 2 means better --- src/arch/x86_64/CodeGen.zig | 2736 +++++++++++++++++++++++++-------- src/arch/x86_64/Encoding.zig | 3 + src/arch/x86_64/bits.zig | 25 + src/arch/x86_64/encodings.zig | 10 + test/behavior/x86_64/math.zig | 6 + 5 files changed, 2172 insertions(+), 608 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index dffcbf7b78..c8d41dfc4e 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -135,7 +135,8 @@ const Owner = union(enum) { } }; -const MaskKind = enum { sign, all }; +const MaskKind = enum(u1) { sign, all }; +const MaskInfo = packed struct { kind: MaskKind, inverted: bool, scalar: Memory.Size }; pub const MCValue = union(enum) { /// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc. @@ -167,7 +168,7 @@ pub const MCValue = union(enum) { /// The value is a tuple { wrapped, overflow } where wrapped value is stored in the GP register. register_overflow: struct { reg: Register, eflags: Condition }, /// The value is a bool vector stored in a vector register with a different scalar type. - register_mask: struct { reg: Register, kind: MaskKind, inverted: bool, scalar: Memory.Size }, + register_mask: struct { reg: Register, info: MaskInfo }, /// The value is in memory at a hard-coded address. /// If the type is a pointer, it means the pointer address is stored at this memory location. memory: u64, @@ -509,11 +510,23 @@ pub const MCValue = union(enum) { .memory => |pl| try writer.print("[ds:0x{x}]", .{pl}), inline .eflags, .register => |pl| try writer.print("{s}", .{@tagName(pl)}), .register_pair => |pl| try writer.print("{s}:{s}", .{ @tagName(pl[1]), @tagName(pl[0]) }), - .register_triple => |pl| try writer.print("{s}:{s}:{s}", .{ @tagName(pl[2]), @tagName(pl[1]), @tagName(pl[0]) }), - .register_quadruple => |pl| try writer.print("{s}:{s}:{s}:{s}", .{ @tagName(pl[3]), @tagName(pl[2]), @tagName(pl[1]), @tagName(pl[0]) }), + .register_triple => |pl| try writer.print("{s}:{s}:{s}", .{ + @tagName(pl[2]), @tagName(pl[1]), @tagName(pl[0]), + }), + .register_quadruple => |pl| try writer.print("{s}:{s}:{s}:{s}", .{ + @tagName(pl[3]), @tagName(pl[2]), @tagName(pl[1]), @tagName(pl[0]), + }), .register_offset => |pl| try writer.print("{s} + 0x{x}", .{ @tagName(pl.reg), pl.off }), - .register_overflow => |pl| try writer.print("{s}:{s}", .{ @tagName(pl.eflags), @tagName(pl.reg) }), - .register_mask => |pl| try writer.print("mask({s},{}):{s}", .{ @tagName(pl.kind), pl.scalar, @tagName(pl.reg) }), + .register_overflow => |pl| try writer.print("{s}:{s}", .{ + @tagName(pl.eflags), + @tagName(pl.reg), + }), + .register_mask => |pl| try writer.print("mask({s},{}):{c}{s}", .{ + @tagName(pl.info.kind), + pl.info.scalar, + @as(u8, if (pl.info.inverted) '!' else ' '), + @tagName(pl.reg), + }), .load_symbol => |pl| try writer.print("[sym:{} + 0x{x}]", .{ pl.sym_index, pl.off }), .lea_symbol => |pl| try writer.print("sym:{} + 0x{x}", .{ pl.sym_index, pl.off }), .indirect => |pl| try writer.print("[{s} + 0x{x}]", .{ @tagName(pl.reg), pl.off }), @@ -524,7 +537,9 @@ pub const MCValue = union(enum) { .load_tlv => |pl| try writer.print("[tlv:{d}]", .{pl}), .lea_tlv => |pl| try writer.print("tlv:{d}", .{pl}), .load_frame => |pl| try writer.print("[{} + 0x{x}]", .{ pl.index, pl.off }), - .elementwise_regs_then_frame => |pl| try writer.print("elementwise:{d}:[{} + 0x{x}]", .{ pl.regs, pl.frame_index, pl.frame_off }), + .elementwise_regs_then_frame => |pl| try writer.print("elementwise:{d}:[{} + 0x{x}]", .{ + pl.regs, pl.frame_index, pl.frame_off, + }), .lea_frame => |pl| try writer.print("{} + 0x{x}", .{ pl.index, pl.off }), .reserved_frame => |pl| try writer.print("(dead:{})", .{pl}), .air_ref => |pl| try writer.print("(air:0x{x})", .{@intFromEnum(pl)}), @@ -2390,13 +2405,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .div_float, .div_trunc, .div_floor, .div_exact => try cg.airMulDivBinOp(inst), - .cmp_lt => try cg.airCmp(inst, .lt), - .cmp_lte => try cg.airCmp(inst, .lte), - .cmp_eq => try cg.airCmp(inst, .eq), - .cmp_gte => try cg.airCmp(inst, .gte), - .cmp_gt => try cg.airCmp(inst, .gt), - .cmp_neq => try cg.airCmp(inst, .neq), - .cmp_lt_errors_len => try cg.airCmpLtErrorsLen(inst), .bitcast => try cg.airBitCast(inst), @@ -2474,12 +2482,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .rem_optimized, .mod_optimized, .neg_optimized, - .cmp_lt_optimized, - .cmp_lte_optimized, - .cmp_eq_optimized, - .cmp_gte_optimized, - .cmp_gt_optimized, - .cmp_neq_optimized, .reduce_optimized, .int_from_float_optimized, => return cg.fail("TODO implement optimized float mode", .{}), @@ -2512,148 +2514,313 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { const bin_op = air_datas[@intFromEnum(inst)].bin_op; var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); var res: [1]Temp = undefined; - try cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, pattern_sets: switch (air_tag) { + cg.select2(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Mir.Inst.Tag, switch (air_tag) { else => unreachable, - inline .bit_and, .bit_or, .xor => |ct_air_tag| { - const mir_tag: Mir.Inst.Tag = switch (ct_air_tag) { - else => unreachable, - .bit_and => .@"and", - .bit_or => .@"or", - .xor => .xor, - }; - break :pattern_sets &.{ - .{ - .required_features = &.{.avx2}, - .mir_tag = .{ .vp_, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .ymm, .ymm, .mem } }, - .{ .ops = &.{ .ymm, .mem, .ymm }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .ymm, .ymm, .ymm } }, - }, + .bit_and => .@"and", + .bit_or => .@"or", + .xor => .xor, + })) { + else => unreachable, + inline .@"and", .@"or", .xor => |mir_tag| comptime &.{ .{ + .required_features = .{ .avx2, null }, + .patterns = &.{ + .{ .src = .{ .ymm, .mem } }, + .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .ymm, .ymm } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ .vp_, mir_tag, .ydst0, .ysrc0, .ysrc1, .none }, + } }, + }, .{ + .required_features = .{ .avx, null }, + .patterns = &.{ + .{ .src = .{ .ymm, .mem } }, + .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .ymm, .ymm } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ .v_pd, mir_tag, .ydst0, .ysrc0, .ysrc1, .none }, + } }, + }, .{ + .required_features = .{ .avx, null }, + .patterns = &.{ + .{ .src = .{ .xmm, .mem } }, + .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .xmm, .xmm } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ .vp_, mir_tag, .xdst0, .xsrc0, .xsrc1, .none }, + } }, + }, .{ + .required_features = .{ .sse2, null }, + .patterns = &.{ + .{ .src = .{ .mut_xmm, .mem } }, + .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_xmm, .xmm } }, + }, + .dst_temps = .{.{ .src = 0 }}, + .each = .{ .once = &.{ + .{ .p_, mir_tag, .xdst0, .xsrc1, .none, .none }, + } }, + }, .{ + .required_features = .{ .sse, null }, + .patterns = &.{ + .{ .src = .{ .mut_xmm, .mem } }, + .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_xmm, .xmm } }, + }, + .dst_temps = .{.{ .src = 0 }}, + .each = .{ .once = &.{ + .{ ._ps, mir_tag, .xdst0, .xsrc1, .none, .none }, + } }, + }, .{ + .required_features = .{ .mmx, null }, + .patterns = &.{ + .{ .src = .{ .mut_mm, .mem } }, + .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mm, .mm } }, + }, + .dst_temps = .{.{ .src = 0 }}, + .each = .{ .once = &.{ + .{ .p_, mir_tag, .rdst0, .rsrc1, .none, .none }, + } }, + }, .{ + .constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .imm8 } }, + .{ .src = .{ .imm8, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .imm8 } }, + .{ .src = .{ .imm8, .mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .gpr } }, + .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .mem } }, + .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .gpr } }, + }, + .clobbers = .{ .eflags = true }, + .dst_temps = .{.{ .src = 0 }}, + .each = .{ .once = &.{ + .{ ._, mir_tag, .dst0b, .src1b, .none, .none }, + } }, + }, .{ + .constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .imm16 } }, + .{ .src = .{ .imm16, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .imm16 } }, + .{ .src = .{ .imm16, .mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .gpr } }, + .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .mem } }, + .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .gpr } }, + }, + .clobbers = .{ .eflags = true }, + .dst_temps = .{.{ .src = 0 }}, + .each = .{ .once = &.{ + .{ ._, mir_tag, .dst0w, .src1w, .none, .none }, + } }, + }, .{ + .constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .imm32 } }, + .{ .src = .{ .imm32, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .imm32 } }, + .{ .src = .{ .imm32, .mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .gpr } }, + .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .mem } }, + .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .gpr } }, + }, + .clobbers = .{ .eflags = true }, + .dst_temps = .{.{ .src = 0 }}, + .each = .{ .once = &.{ + .{ ._, mir_tag, .edst0, .esrc1, .none, .none }, + } }, + }, .{ + .required_features = .{ .@"64bit", null }, + .constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .simm32 } }, + .{ .src = .{ .simm32, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .simm32 } }, + .{ .src = .{ .simm32, .mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .gpr } }, + .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .mem } }, + .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .gpr } }, + }, + .clobbers = .{ .eflags = true }, + .dst_temps = .{.{ .src = 0 }}, + .each = .{ .once = &.{ + .{ ._, mir_tag, .rdst0, .rsrc1, .none, .none }, + } }, + }, .{ + .required_features = .{ .avx2, null }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb = .{ + .of = .ysrc0, + .body = &.{ + .{ .v_, .movdqu, .ytmp1, .{ .src_limb = 0 }, .none, .none }, + .{ .vp_, mir_tag, .ytmp1, .ytmp1, .{ .src_limb = 1 }, .none }, + .{ .v_, .movdqu, .{ .dst_limb = 0 }, .ytmp1, .none, .none }, }, - .{ - .required_features = &.{.avx}, - .mir_tag = .{ .vp_, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .xmm, .xmm, .mem } }, - .{ .ops = &.{ .xmm, .mem, .xmm }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .xmm, .xmm, .xmm } }, - }, + } }, + }, .{ + .required_features = .{ .avx, null }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb = .{ + .of = .ysrc0, + .body = &.{ + .{ .v_pd, .movu, .ytmp1, .{ .src_limb = 0 }, .none, .none }, + .{ .v_pd, mir_tag, .ytmp1, .ytmp1, .{ .src_limb = 1 }, .none }, + .{ .v_pd, .movu, .{ .dst_limb = 0 }, .ytmp1, .none, .none }, }, - .{ - .required_features = &.{.sse2}, - .mir_tag = .{ .p_, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .xmm, .{ .implicit = 0 }, .mem } }, - .{ .ops = &.{ .xmm, .mem, .{ .implicit = 0 } } }, - .{ .ops = &.{ .xmm, .{ .implicit = 0 }, .xmm } }, - .{ .ops = &.{ .xmm, .xmm, .{ .implicit = 0 } } }, - }, + } }, + }, .{ + .required_features = .{ .avx, null }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb = .{ + .of = .xsrc0, + .body = &.{ + .{ .v_, .movdqu, .xtmp1, .{ .src_limb = 0 }, .none, .none }, + .{ .vp_, mir_tag, .xtmp1, .xtmp1, .{ .src_limb = 1 }, .none }, + .{ .v_, .movdqu, .{ .dst_limb = 0 }, .xtmp1, .none, .none }, }, - .{ - .required_features = &.{.sse}, - .mir_tag = .{ ._ps, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .xmm, .{ .implicit = 0 }, .mem } }, - .{ .ops = &.{ .xmm, .mem, .{ .implicit = 0 } } }, - .{ .ops = &.{ .xmm, .{ .implicit = 0 }, .xmm } }, - .{ .ops = &.{ .xmm, .xmm, .{ .implicit = 0 } } }, - }, + } }, + }, .{ + .required_features = .{ .sse2, null }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb = .{ + .of = .xsrc0, + .body = &.{ + .{ ._, .movdqu, .xtmp1, .{ .src_limb = 0 }, .none, .none }, + .{ .p_, mir_tag, .xtmp1, .{ .src_limb = 1 }, .none, .none }, + .{ ._, .movdqu, .{ .dst_limb = 0 }, .xtmp1, .none, .none }, }, - .{ - .required_features = &.{.mmx}, - .mir_tag = .{ .p_, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .mm, .{ .implicit = 0 }, .mem } }, - .{ .ops = &.{ .mm, .mem, .{ .implicit = 0 } } }, - .{ .ops = &.{ .mm, .{ .implicit = 0 }, .mm } }, - .{ .ops = &.{ .mm, .mm, .{ .implicit = 0 } } }, - }, + } }, + }, .{ + .required_features = .{ .sse, null }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb = .{ + .of = .xsrc0, + .body = &.{ + .{ ._ps, .movu, .xtmp1, .{ .src_limb = 0 }, .none, .none }, + .{ ._ps, mir_tag, .xtmp1, .{ .src_limb = 1 }, .none, .none }, + .{ ._ps, .movu, .{ .dst_limb = 0 }, .xtmp1, .none, .none }, }, - .{ - .clobbers = .{ .eflags = true }, - .mir_tag = .{ ._, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .mem, .{ .implicit = 0 }, .simm32 } }, - .{ .ops = &.{ .mem, .simm32, .{ .implicit = 0 } } }, - .{ .ops = &.{ .mem, .{ .implicit = 0 }, .gpr } }, - .{ .ops = &.{ .mem, .gpr, .{ .implicit = 0 } } }, - .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .simm32 } }, - .{ .ops = &.{ .gpr, .simm32, .{ .implicit = 0 } } }, - .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .mem } }, - .{ .ops = &.{ .gpr, .mem, .{ .implicit = 0 } } }, - .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .gpr } }, - .{ .ops = &.{ .gpr, .gpr, .{ .implicit = 0 } } }, - }, + } }, + }, .{ + .required_features = .{ .mmx, null }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .mmx } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb = .{ + .of = .rsrc0, + .body = &.{ + .{ ._q, .mov, .rtmp1, .{ .src_limb = 0 }, .none, .none }, + .{ .p_, mir_tag, .rtmp1, .{ .src_limb = 1 }, .none, .none }, + .{ ._q, .mov, .{ .dst_limb = 0 }, .rtmp1, .none, .none }, }, - - .{ - .required_features = &.{.avx2}, - .loop = .bitwise, - .mir_tag = .{ .vp_, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .ymm_limb, .{ .explicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .ymm_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .ymm_limb, .ymm_limb, .mem_limb } }, - .{ .ops = &.{ .ymm_limb, .ymm_limb, .ymm_limb } }, - }, + } }, + }, .{ + .required_features = .{ .@"64bit", null }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb = .{ + .of = .rsrc0, + .body = &.{ + .{ ._, .mov, .rtmp1, .{ .src_limb = 0 }, .none, .none }, + .{ ._, mir_tag, .rtmp1, .{ .src_limb = 1 }, .none, .none }, + .{ ._, .mov, .{ .dst_limb = 0 }, .rtmp1, .none, .none }, }, - .{ - .required_features = &.{.avx}, - .loop = .bitwise, - .mir_tag = .{ .vp_, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .xmm_limb, .{ .explicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .xmm_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .xmm_limb, .xmm_limb, .mem_limb } }, - .{ .ops = &.{ .xmm_limb, .xmm_limb, .xmm_limb } }, - }, + } }, + }, .{ + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb = .{ + .of = .esrc0, + .body = &.{ + .{ ._, .mov, .etmp1, .{ .src_limb = 0 }, .none, .none }, + .{ ._, mir_tag, .etmp1, .{ .src_limb = 1 }, .none, .none }, + .{ ._, .mov, .{ .dst_limb = 0 }, .etmp1, .none, .none }, }, - .{ - .required_features = &.{.sse2}, - .loop = .bitwise, - .mir_tag = .{ .p_, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .xmm_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .xmm_limb, .mem_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .xmm_limb, .{ .implicit = 0 }, .xmm_limb } }, - }, - }, - .{ - .required_features = &.{.sse}, - .loop = .bitwise, - .mir_tag = .{ ._ps, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .xmm_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .xmm_limb, .mem_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .xmm_limb, .{ .implicit = 0 }, .xmm_limb } }, - }, - }, - .{ - .required_features = &.{.mmx}, - .loop = .bitwise, - .mir_tag = .{ .p_, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .mm_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .mm_limb, .mem_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .mm_limb, .{ .implicit = 0 }, .mm_limb } }, - }, - }, - .{ - .clobbers = .{ .eflags = true }, - .loop = .bitwise, - .mir_tag = .{ ._, mir_tag }, - .patterns = &.{ - .{ .ops = &.{ .mem_limb, .{ .implicit = 0 }, .gpr_limb } }, - .{ .ops = &.{ .mem_limb, .gpr_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .gpr_limb, .mem_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .gpr_limb } }, - }, - }, - }; - }, - }, .{}); + } }, + } }, + }) catch |err2| switch (err2) { + error.Select2Failed => return cg.fail("failed to select2 {s} {} {} {}", .{ + @tagName(air_tag), + cg.typeOf(bin_op.lhs).fmt(pt), + ops[0].tracking(cg), + ops[1].tracking(cg), + }), + else => |e| return e, + }; if (ops[0].index != res[0].index) try ops[0].die(cg); if (ops[1].index != res[0].index) try ops[1].die(cg); try res[0].moveTo(inst, cg); @@ -2720,422 +2887,660 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { switch (extra.compareOperator()) { .lt => unreachable, .lte => unreachable, - .eq, .neq => |cmp_op| try cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, &.{ - .{ - .required_features = &.{.avx2}, - .scalar = .{ .any_int = .byte }, - .mir_tag = .{ .vp_b, .cmpeq }, + .eq, .neq => |cmp_op| cg.select2(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Condition, switch (cmp_op) { + else => unreachable, + .eq => .e, + .neq => .ne, + })) { + else => unreachable, + inline .e, .ne => |cc| comptime &.{ .{ + .required_features = .{ .avx2, null }, + .constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, .patterns = &.{ - .{ .ops = &.{ .ymm_mask, .ymm, .mem } }, - .{ .ops = &.{ .ymm_mask, .mem, .ymm }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .ymm_mask, .ymm, .ymm } }, + .{ .src = .{ .ymm, .mem } }, + .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .ymm, .ymm } }, }, - }, - .{ - .required_features = &.{.avx2}, - .scalar = .{ .any_int = .word }, - .mir_tag = .{ .vp_w, .cmpeq }, + .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ + .kind = .all, + .inverted = switch (cc) { + else => unreachable, + .e => false, + .ne => true, + }, + .scalar = .byte, + } } }}, + .each = .{ .once = &.{ + .{ .vp_b, .cmpeq, .ydst0, .ysrc0, .ysrc1, .none }, + } }, + }, .{ + .required_features = .{ .avx2, null }, + .constraints = .{ .{ .int = .word }, .{ .int = .word } }, .patterns = &.{ - .{ .ops = &.{ .ymm_mask, .ymm, .mem } }, - .{ .ops = &.{ .ymm_mask, .mem, .ymm }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .ymm_mask, .ymm, .ymm } }, + .{ .src = .{ .ymm, .mem } }, + .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .ymm, .ymm } }, }, - }, - .{ - .required_features = &.{.avx2}, - .scalar = .{ .any_int = .dword }, - .mir_tag = .{ .vp_d, .cmpeq }, + .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ + .kind = .all, + .inverted = switch (cc) { + else => unreachable, + .e => false, + .ne => true, + }, + .scalar = .word, + } } }}, + .each = .{ .once = &.{ + .{ .vp_w, .cmpeq, .ydst0, .ysrc0, .ysrc1, .none }, + } }, + }, .{ + .required_features = .{ .avx2, null }, + .constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, .patterns = &.{ - .{ .ops = &.{ .ymm_mask, .ymm, .mem } }, - .{ .ops = &.{ .ymm_mask, .mem, .ymm }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .ymm_mask, .ymm, .ymm } }, + .{ .src = .{ .ymm, .mem } }, + .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .ymm, .ymm } }, }, - }, - .{ - .required_features = &.{.avx2}, - .scalar = .{ .any_int = .qword }, - .mir_tag = .{ .vp_q, .cmpeq }, + .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ + .kind = .all, + .inverted = switch (cc) { + else => unreachable, + .e => false, + .ne => true, + }, + .scalar = .dword, + } } }}, + .each = .{ .once = &.{ + .{ .vp_d, .cmpeq, .ydst0, .ysrc0, .ysrc1, .none }, + } }, + }, .{ + .required_features = .{ .avx2, null }, + .constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, .patterns = &.{ - .{ .ops = &.{ .ymm_mask, .ymm, .mem } }, - .{ .ops = &.{ .ymm_mask, .mem, .ymm }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .ymm_mask, .ymm, .ymm } }, + .{ .src = .{ .ymm, .mem } }, + .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .ymm, .ymm } }, }, - }, - .{ - .required_features = &.{.avx}, - .scalar = .{ .any_int = .byte }, - .mir_tag = .{ .vp_b, .cmpeq }, + .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ + .kind = .all, + .inverted = switch (cc) { + else => unreachable, + .e => false, + .ne => true, + }, + .scalar = .qword, + } } }}, + .each = .{ .once = &.{ + .{ .vp_q, .cmpeq, .ydst0, .ysrc0, .ysrc1, .none }, + } }, + }, .{ + .required_features = .{ .avx, null }, + .constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, .patterns = &.{ - .{ .ops = &.{ .xmm_mask, .xmm, .mem } }, - .{ .ops = &.{ .xmm_mask, .mem, .xmm }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .xmm_mask, .xmm, .xmm } }, + .{ .src = .{ .xmm, .mem } }, + .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .xmm, .xmm } }, }, - }, - .{ - .required_features = &.{.avx}, - .scalar = .{ .any_int = .word }, - .mir_tag = .{ .vp_w, .cmpeq }, + .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ + .kind = .all, + .inverted = switch (cc) { + else => unreachable, + .e => false, + .ne => true, + }, + .scalar = .byte, + } } }}, + .each = .{ .once = &.{ + .{ .vp_b, .cmpeq, .xdst0, .xsrc0, .xsrc1, .none }, + } }, + }, .{ + .required_features = .{ .avx, null }, + .constraints = .{ .{ .int = .word }, .{ .int = .word } }, .patterns = &.{ - .{ .ops = &.{ .xmm_mask, .xmm, .mem } }, - .{ .ops = &.{ .xmm_mask, .mem, .xmm }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .xmm_mask, .xmm, .xmm } }, + .{ .src = .{ .xmm, .mem } }, + .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .xmm, .xmm } }, }, - }, - .{ - .required_features = &.{.avx}, - .scalar = .{ .any_int = .dword }, - .mir_tag = .{ .vp_d, .cmpeq }, + .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ + .kind = .all, + .inverted = switch (cc) { + else => unreachable, + .e => false, + .ne => true, + }, + .scalar = .word, + } } }}, + .each = .{ .once = &.{ + .{ .vp_w, .cmpeq, .xdst0, .xsrc0, .xsrc1, .none }, + } }, + }, .{ + .required_features = .{ .avx, null }, + .constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, .patterns = &.{ - .{ .ops = &.{ .xmm_mask, .xmm, .mem } }, - .{ .ops = &.{ .xmm_mask, .mem, .xmm }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .xmm_mask, .xmm, .xmm } }, + .{ .src = .{ .xmm, .mem } }, + .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .xmm, .xmm } }, }, - }, - .{ - .required_features = &.{.avx}, - .scalar = .{ .any_int = .qword }, - .mir_tag = .{ .vp_q, .cmpeq }, + .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ + .kind = .all, + .inverted = switch (cc) { + else => unreachable, + .e => false, + .ne => true, + }, + .scalar = .dword, + } } }}, + .each = .{ .once = &.{ + .{ .vp_d, .cmpeq, .xdst0, .xsrc0, .xsrc1, .none }, + } }, + }, .{ + .required_features = .{ .avx, null }, + .constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, .patterns = &.{ - .{ .ops = &.{ .xmm_mask, .xmm, .mem } }, - .{ .ops = &.{ .xmm_mask, .mem, .xmm }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .xmm_mask, .xmm, .xmm } }, + .{ .src = .{ .xmm, .mem } }, + .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .xmm, .xmm } }, }, - }, - .{ - .required_features = &.{.sse2}, - .scalar = .{ .any_int = .byte }, - .mir_tag = .{ .p_b, .cmpeq }, + .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ + .kind = .all, + .inverted = switch (cc) { + else => unreachable, + .e => false, + .ne => true, + }, + .scalar = .qword, + } } }}, + .each = .{ .once = &.{ + .{ .vp_q, .cmpeq, .xdst0, .xsrc0, .xsrc1, .none }, + } }, + }, .{ + .required_features = .{ .sse2, null }, + .constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, .patterns = &.{ - .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .mem } }, - .{ .ops = &.{ .xmm_mask, .mem, .{ .implicit = 0 } } }, - .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .xmm } }, - .{ .ops = &.{ .xmm_mask, .xmm, .{ .implicit = 0 } } }, + .{ .src = .{ .mut_xmm, .mem } }, + .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_xmm, .xmm } }, }, - }, - .{ - .required_features = &.{.sse2}, - .scalar = .{ .any_int = .word }, - .mir_tag = .{ .p_w, .cmpeq }, + .dst_temps = .{.{ .src_mask = .{ .src = 0, .info = .{ + .kind = .all, + .inverted = switch (cc) { + else => unreachable, + .e => false, + .ne => true, + }, + .scalar = .byte, + } } }}, + .each = .{ .once = &.{ + .{ .p_b, .cmpeq, .xdst0, .xsrc1, .none, .none }, + } }, + }, .{ + .required_features = .{ .sse2, null }, + .constraints = .{ .{ .int = .word }, .{ .int = .word } }, .patterns = &.{ - .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .mem } }, - .{ .ops = &.{ .xmm_mask, .mem, .{ .implicit = 0 } } }, - .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .xmm } }, - .{ .ops = &.{ .xmm_mask, .xmm, .{ .implicit = 0 } } }, + .{ .src = .{ .mut_xmm, .mem } }, + .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_xmm, .xmm } }, }, - }, - .{ - .required_features = &.{.sse2}, - .scalar = .{ .any_int = .dword }, - .mir_tag = .{ .p_d, .cmpeq }, + .dst_temps = .{.{ .src_mask = .{ .src = 0, .info = .{ + .kind = .all, + .inverted = switch (cc) { + else => unreachable, + .e => false, + .ne => true, + }, + .scalar = .word, + } } }}, + .each = .{ .once = &.{ + .{ .p_w, .cmpeq, .xdst0, .xsrc1, .none, .none }, + } }, + }, .{ + .required_features = .{ .sse2, null }, + .constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, .patterns = &.{ - .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .mem } }, - .{ .ops = &.{ .xmm_mask, .mem, .{ .implicit = 0 } } }, - .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .xmm } }, - .{ .ops = &.{ .xmm_mask, .xmm, .{ .implicit = 0 } } }, + .{ .src = .{ .mut_xmm, .mem } }, + .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_xmm, .xmm } }, }, - }, - .{ - .required_features = &.{.sse4_1}, - .scalar = .{ .any_int = .qword }, - .mir_tag = .{ .p_q, .cmpeq }, + .dst_temps = .{.{ .src_mask = .{ .src = 0, .info = .{ + .kind = .all, + .inverted = switch (cc) { + else => unreachable, + .e => false, + .ne => true, + }, + .scalar = .dword, + } } }}, + .each = .{ .once = &.{ + .{ .p_d, .cmpeq, .xdst0, .xsrc1, .none, .none }, + } }, + }, .{ + .required_features = .{ .sse4_1, null }, + .constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, .patterns = &.{ - .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .mem } }, - .{ .ops = &.{ .xmm_mask, .mem, .{ .implicit = 0 } } }, - .{ .ops = &.{ .xmm_mask, .{ .implicit = 0 }, .xmm } }, - .{ .ops = &.{ .xmm_mask, .xmm, .{ .implicit = 0 } } }, + .{ .src = .{ .mut_xmm, .mem } }, + .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_xmm, .xmm } }, }, - }, - .{ - .required_features = &.{.mmx}, - .scalar = .{ .any_int = .byte }, - .mir_tag = .{ .p_b, .cmpeq }, + .dst_temps = .{.{ .src_mask = .{ .src = 0, .info = .{ + .kind = .all, + .inverted = switch (cc) { + else => unreachable, + .e => false, + .ne => true, + }, + .scalar = .qword, + } } }}, + .each = .{ .once = &.{ + .{ .p_q, .cmpeq, .xdst0, .xsrc1, .none, .none }, + } }, + }, .{ + .required_features = .{ .mmx, null }, + .constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, .patterns = &.{ - .{ .ops = &.{ .mm_mask, .{ .implicit = 0 }, .mem } }, - .{ .ops = &.{ .mm_mask, .mem, .{ .implicit = 0 } } }, - .{ .ops = &.{ .mm_mask, .{ .implicit = 0 }, .mm } }, - .{ .ops = &.{ .mm_mask, .mm, .{ .implicit = 0 } } }, + .{ .src = .{ .mut_mm, .mem } }, + .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mm, .mm } }, }, - }, - .{ - .required_features = &.{.mmx}, - .scalar = .{ .any_int = .word }, - .mir_tag = .{ .p_w, .cmpeq }, + .dst_temps = .{.{ .src_mask = .{ .src = 0, .info = .{ + .kind = .all, + .inverted = switch (cc) { + else => unreachable, + .e => false, + .ne => true, + }, + .scalar = .byte, + } } }}, + .each = .{ .once = &.{ + .{ .p_b, .cmpeq, .rdst0, .rsrc1, .none, .none }, + } }, + }, .{ + .required_features = .{ .mmx, null }, + .constraints = .{ .{ .int = .word }, .{ .int = .word } }, .patterns = &.{ - .{ .ops = &.{ .mm_mask, .{ .implicit = 0 }, .mem } }, - .{ .ops = &.{ .mm_mask, .mem, .{ .implicit = 0 } } }, - .{ .ops = &.{ .mm_mask, .{ .implicit = 0 }, .mm } }, - .{ .ops = &.{ .mm_mask, .mm, .{ .implicit = 0 } } }, + .{ .src = .{ .mut_mm, .mem } }, + .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mm, .mm } }, }, - }, - .{ - .required_features = &.{.mmx}, - .scalar = .{ .any_int = .dword }, - .mir_tag = .{ .p_d, .cmpeq }, + .dst_temps = .{.{ .src_mask = .{ .src = 0, .info = .{ + .kind = .all, + .inverted = switch (cc) { + else => unreachable, + .e => false, + .ne => true, + }, + .scalar = .word, + } } }}, + .each = .{ .once = &.{ + .{ .p_w, .cmpeq, .rdst0, .rsrc1, .none, .none }, + } }, + }, .{ + .required_features = .{ .mmx, null }, + .constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, .patterns = &.{ - .{ .ops = &.{ .mm_mask, .{ .implicit = 0 }, .mem } }, - .{ .ops = &.{ .mm_mask, .mem, .{ .implicit = 0 } } }, - .{ .ops = &.{ .mm_mask, .{ .implicit = 0 }, .mm } }, - .{ .ops = &.{ .mm_mask, .mm, .{ .implicit = 0 } } }, + .{ .src = .{ .mut_mm, .mem } }, + .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mm, .mm } }, + }, + .dst_temps = .{.{ .src_mask = .{ .src = 0, .info = .{ + .kind = .all, + .inverted = switch (cc) { + else => unreachable, + .e => false, + .ne => true, + }, + .scalar = .dword, + } } }}, + .each = .{ .once = &.{ + .{ .p_d, .cmpeq, .rdst0, .rsrc1, .none, .none }, + } }, + }, .{ + .constraints = .{ .{ .bool_vec = .byte }, .{ .bool_vec = .byte } }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .imm8 } }, + .{ .src = .{ .imm8, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .imm8 } }, + .{ .src = .{ .imm8, .mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .gpr } }, + .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .mem } }, + .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .gpr } }, }, - }, - .{ - .scalar = .bool, .clobbers = .{ .eflags = true }, - .invert_result = true, - .mir_tag = .{ ._, .xor }, + .dst_temps = .{.{ .src = 0 }}, + .each = .{ .once = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, .xor, .dst0b, .src1b, .none, .none }, + .{ ._, .not, .dst0b, .none, .none, .none }, + }, + .ne => &.{ + .{ ._, .xor, .dst0b, .src1b, .none, .none }, + }, + } }, + }, .{ + .constraints = .{ .{ .bool_vec = .word }, .{ .bool_vec = .word } }, .patterns = &.{ - .{ .ops = &.{ .mem, .{ .implicit = 0 }, .simm32 } }, - .{ .ops = &.{ .mem, .simm32, .{ .implicit = 0 } } }, - .{ .ops = &.{ .mem, .{ .implicit = 0 }, .gpr } }, - .{ .ops = &.{ .mem, .gpr, .{ .implicit = 0 } } }, - .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .simm32 } }, - .{ .ops = &.{ .gpr, .simm32, .{ .implicit = 0 } } }, - .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .mem } }, - .{ .ops = &.{ .gpr, .mem, .{ .implicit = 0 } } }, - .{ .ops = &.{ .gpr, .{ .implicit = 0 }, .gpr } }, - .{ .ops = &.{ .gpr, .gpr, .{ .implicit = 0 } } }, + .{ .src = .{ .mut_mem, .imm16 } }, + .{ .src = .{ .imm16, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .imm16 } }, + .{ .src = .{ .imm16, .mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .gpr } }, + .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .mem } }, + .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .gpr } }, }, - }, - - .{ - .required_features = &.{.avx2}, - .scalar = .{ .any_int = .byte }, - .loop = .elementwise, - .mir_tag = .{ .vp_b, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } }, - .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } }, - }, - }, - .{ - .required_features = &.{.avx2}, - .scalar = .{ .any_int = .word }, - .loop = .elementwise, - .mir_tag = .{ .vp_w, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } }, - .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } }, - }, - }, - .{ - .required_features = &.{.avx2}, - .scalar = .{ .any_int = .dword }, - .loop = .elementwise, - .mir_tag = .{ .vp_d, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } }, - .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } }, - }, - }, - .{ - .required_features = &.{.avx2}, - .scalar = .{ .any_int = .qword }, - .loop = .elementwise, - .mir_tag = .{ .vp_q, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } }, - .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } }, - }, - }, - .{ - .required_features = &.{.avx}, - .scalar = .{ .any_int = .byte }, - .loop = .elementwise, - .mir_tag = .{ .vp_b, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } }, - }, - }, - .{ - .required_features = &.{.avx}, - .scalar = .{ .any_int = .word }, - .loop = .elementwise, - .mir_tag = .{ .vp_w, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } }, - }, - }, - .{ - .required_features = &.{.avx}, - .scalar = .{ .any_int = .dword }, - .loop = .elementwise, - .mir_tag = .{ .vp_d, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } }, - }, - }, - .{ - .required_features = &.{.avx}, - .scalar = .{ .any_int = .qword }, - .loop = .elementwise, - .mir_tag = .{ .vp_q, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } }, - }, - }, - .{ - .required_features = &.{.sse2}, - .scalar = .{ .any_int = .byte }, - .loop = .elementwise, - .mir_tag = .{ .p_b, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } }, - }, - }, - .{ - .required_features = &.{.sse2}, - .scalar = .{ .any_int = .word }, - .loop = .elementwise, - .mir_tag = .{ .p_w, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } }, - }, - }, - .{ - .required_features = &.{.sse2}, - .scalar = .{ .any_int = .dword }, - .loop = .elementwise, - .mir_tag = .{ .p_d, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } }, - }, - }, - .{ - .required_features = &.{.sse4_1}, - .scalar = .{ .any_int = .qword }, - .loop = .elementwise, - .mir_tag = .{ .p_q, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } }, - }, - }, - .{ - .required_features = &.{.mmx}, - .scalar = .{ .any_int = .byte }, - .loop = .elementwise, - .mir_tag = .{ .p_b, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .mm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mm_limb } }, - }, - }, - .{ - .required_features = &.{.mmx}, - .scalar = .{ .any_int = .word }, - .loop = .elementwise, - .mir_tag = .{ .p_w, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .mm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mm_limb } }, - }, - }, - .{ - .required_features = &.{.mmx}, - .scalar = .{ .any_int = .dword }, - .loop = .elementwise, - .mir_tag = .{ .p_d, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .mm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mm_limb } }, - }, - }, - .{ - .scalar = .bool, .clobbers = .{ .eflags = true }, - .invert_result = true, - .loop = .elementwise, - .mir_tag = .{ ._, .xor }, + .dst_temps = .{.{ .src = 0 }}, + .each = .{ .once = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, .xor, .dst0w, .src1w, .none, .none }, + .{ ._, .not, .dst0w, .none, .none, .none }, + }, + .ne => &.{ + .{ ._, .xor, .dst0w, .src1w, .none, .none }, + }, + } }, + }, .{ + .constraints = .{ .{ .bool_vec = .dword }, .{ .bool_vec = .dword } }, .patterns = &.{ - .{ .ops = &.{ .mem_limb, .{ .implicit = 0 }, .gpr_limb } }, - .{ .ops = &.{ .mem_limb, .gpr_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .gpr_limb, .mem_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .gpr_limb } }, + .{ .src = .{ .mut_mem, .imm32 } }, + .{ .src = .{ .imm32, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .imm32 } }, + .{ .src = .{ .imm32, .mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .gpr } }, + .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .mem } }, + .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .gpr } }, }, - }, - .{ - .scalar = .{ .any_int = .byte }, .clobbers = .{ .eflags = true }, - .loop = .elementwise, - .mir_tag = .{ ._, .cmp }, + .dst_temps = .{.{ .src = 0 }}, + .each = .{ .once = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, .xor, .edst0, .esrc1, .none, .none }, + .{ ._, .not, .edst0, .none, .none, .none }, + }, + .ne => &.{ + .{ ._, .xor, .edst0, .esrc1, .none, .none }, + }, + } }, + }, .{ + .required_features = .{ .@"64bit", null }, + .constraints = .{ .{ .bool_vec = .qword }, .{ .bool_vec = .qword } }, .patterns = &.{ - .{ .ops = &.{ .cc_mask_limb, .mem_limb, .gpr_limb } }, - .{ .ops = &.{ .cc_mask_limb, .gpr_limb, .mem_limb } }, - .{ .ops = &.{ .cc_mask_limb, .gpr_limb, .gpr_limb } }, + .{ .src = .{ .mut_mem, .simm32 } }, + .{ .src = .{ .simm32, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .simm32 } }, + .{ .src = .{ .simm32, .mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .gpr } }, + .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .mem } }, + .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_gpr, .gpr } }, }, - }, - .{ - .scalar = .{ .any_int = .word }, .clobbers = .{ .eflags = true }, - .loop = .elementwise, - .mir_tag = .{ ._, .cmp }, - .patterns = &.{ - .{ .ops = &.{ .cc_mask_limb, .mem_limb, .gpr_limb } }, - .{ .ops = &.{ .cc_mask_limb, .gpr_limb, .mem_limb } }, - .{ .ops = &.{ .cc_mask_limb, .gpr_limb, .gpr_limb } }, + .dst_temps = .{.{ .src = 0 }}, + .each = .{ .once = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, .xor, .rdst0, .rsrc1, .none, .none }, + .{ ._, .not, .rdst0, .none, .none, .none }, + }, + .ne => &.{ + .{ ._, .xor, .rdst0, .rsrc1, .none, .none }, + }, + } }, + } }, + }) catch |err2| switch (err2) { + error.Select2Failed => cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, &.{ + .{ + .required_features = &.{.avx2}, + .scalar = .{ .any_int = .byte }, + .loop = .elementwise, + .mir_tag = .{ .vp_b, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } }, + .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } }, + }, }, - }, - .{ - .scalar = .{ .any_int = .dword }, - .clobbers = .{ .eflags = true }, - .loop = .elementwise, - .mir_tag = .{ ._, .cmp }, - .patterns = &.{ - .{ .ops = &.{ .cc_mask_limb, .mem_limb, .gpr_limb } }, - .{ .ops = &.{ .cc_mask_limb, .gpr_limb, .mem_limb } }, - .{ .ops = &.{ .cc_mask_limb, .gpr_limb, .gpr_limb } }, + .{ + .required_features = &.{.avx2}, + .scalar = .{ .any_int = .word }, + .loop = .elementwise, + .mir_tag = .{ .vp_w, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } }, + .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } }, + }, }, - }, - .{ - .scalar = .{ .any_int = .qword }, - .clobbers = .{ .eflags = true }, - .loop = .elementwise, - .mir_tag = .{ ._, .cmp }, - .patterns = &.{ - .{ .ops = &.{ .cc_mask_limb, .mem_limb, .gpr_limb } }, - .{ .ops = &.{ .cc_mask_limb, .gpr_limb, .mem_limb } }, - .{ .ops = &.{ .cc_mask_limb, .gpr_limb, .gpr_limb } }, + .{ + .required_features = &.{.avx2}, + .scalar = .{ .any_int = .dword }, + .loop = .elementwise, + .mir_tag = .{ .vp_d, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } }, + .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } }, + }, }, + .{ + .required_features = &.{.avx2}, + .scalar = .{ .any_int = .qword }, + .loop = .elementwise, + .mir_tag = .{ .vp_q, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } }, + .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } }, + }, + }, + .{ + .required_features = &.{.avx}, + .scalar = .{ .any_int = .byte }, + .loop = .elementwise, + .mir_tag = .{ .vp_b, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } }, + }, + }, + .{ + .required_features = &.{.avx}, + .scalar = .{ .any_int = .word }, + .loop = .elementwise, + .mir_tag = .{ .vp_w, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } }, + }, + }, + .{ + .required_features = &.{.avx}, + .scalar = .{ .any_int = .dword }, + .loop = .elementwise, + .mir_tag = .{ .vp_d, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } }, + }, + }, + .{ + .required_features = &.{.avx}, + .scalar = .{ .any_int = .qword }, + .loop = .elementwise, + .mir_tag = .{ .vp_q, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, + .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } }, + }, + }, + .{ + .required_features = &.{.sse2}, + .scalar = .{ .any_int = .byte }, + .loop = .elementwise, + .mir_tag = .{ .p_b, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } }, + }, + }, + .{ + .required_features = &.{.sse2}, + .scalar = .{ .any_int = .word }, + .loop = .elementwise, + .mir_tag = .{ .p_w, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } }, + }, + }, + .{ + .required_features = &.{.sse2}, + .scalar = .{ .any_int = .dword }, + .loop = .elementwise, + .mir_tag = .{ .p_d, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } }, + }, + }, + .{ + .required_features = &.{.sse4_1}, + .scalar = .{ .any_int = .qword }, + .loop = .elementwise, + .mir_tag = .{ .p_q, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } }, + }, + }, + .{ + .required_features = &.{.mmx}, + .scalar = .{ .any_int = .byte }, + .loop = .elementwise, + .mir_tag = .{ .p_b, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .mm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mm_limb } }, + }, + }, + .{ + .required_features = &.{.mmx}, + .scalar = .{ .any_int = .word }, + .loop = .elementwise, + .mir_tag = .{ .p_w, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .mm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mm_limb } }, + }, + }, + .{ + .required_features = &.{.mmx}, + .scalar = .{ .any_int = .dword }, + .loop = .elementwise, + .mir_tag = .{ .p_d, .cmpeq }, + .patterns = &.{ + .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .mm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mm_limb } }, + }, + }, + .{ + .scalar = .bool, + .clobbers = .{ .eflags = true }, + .invert_result = true, + .loop = .elementwise, + .mir_tag = .{ ._, .xor }, + .patterns = &.{ + .{ .ops = &.{ .mem_limb, .{ .implicit = 0 }, .gpr_limb } }, + .{ .ops = &.{ .mem_limb, .gpr_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .mem_limb } }, + .{ .ops = &.{ .gpr_limb, .mem_limb, .{ .implicit = 0 } } }, + .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .gpr_limb } }, + }, + }, + .{ + .scalar = .{ .any_int = .byte }, + .clobbers = .{ .eflags = true }, + .loop = .elementwise, + .mir_tag = .{ ._, .cmp }, + .patterns = &.{ + .{ .ops = &.{ .cc_elem, .mem_elem, .gpr_elem } }, + .{ .ops = &.{ .cc_elem, .gpr_elem, .mem_elem } }, + .{ .ops = &.{ .cc_elem, .gpr_elem, .gpr_elem } }, + }, + }, + .{ + .scalar = .{ .any_int = .word }, + .clobbers = .{ .eflags = true }, + .loop = .elementwise, + .mir_tag = .{ ._, .cmp }, + .patterns = &.{ + .{ .ops = &.{ .cc_elem, .mem_elem, .gpr_elem } }, + .{ .ops = &.{ .cc_elem, .gpr_elem, .mem_elem } }, + .{ .ops = &.{ .cc_elem, .gpr_elem, .gpr_elem } }, + }, + }, + .{ + .scalar = .{ .any_int = .dword }, + .clobbers = .{ .eflags = true }, + .loop = .elementwise, + .mir_tag = .{ ._, .cmp }, + .patterns = &.{ + .{ .ops = &.{ .cc_elem, .mem_elem, .gpr_elem } }, + .{ .ops = &.{ .cc_elem, .gpr_elem, .mem_elem } }, + .{ .ops = &.{ .cc_elem, .gpr_elem, .gpr_elem } }, + }, + }, + .{ + .scalar = .{ .any_int = .qword }, + .clobbers = .{ .eflags = true }, + .loop = .elementwise, + .mir_tag = .{ ._, .cmp }, + .patterns = &.{ + .{ .ops = &.{ .cc_elem, .mem_elem, .gpr_elem } }, + .{ .ops = &.{ .cc_elem, .gpr_elem, .mem_elem } }, + .{ .ops = &.{ .cc_elem, .gpr_elem, .gpr_elem } }, + }, + }, + }, .{ + .cc = .e, + .invert_result = switch (cmp_op) { + .eq => false, + .neq => true, + else => unreachable, + }, + }) catch |err| switch (err) { + error.SelectFailed => return cg.fail("failed to select", .{}), + else => |e| return e, }, - }, .{ - .cc = .e, - .invert_result = switch (cmp_op) { - .eq => false, - .neq => true, - else => unreachable, - }, - }), + else => |e| return e, + }, .gte => unreachable, .gt => unreachable, } @@ -3144,6 +3549,643 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { try res[0].moveTo(inst, cg); }, + .cmp_lt, + .cmp_lt_optimized, + .cmp_lte, + .cmp_lte_optimized, + .cmp_gte, + .cmp_gte_optimized, + .cmp_gt, + .cmp_gt_optimized, + => |air_tag| if (use_old) try cg.airCmp(inst, switch (air_tag) { + else => unreachable, + .cmp_lt, .cmp_lt_optimized => .lt, + .cmp_lte, .cmp_lte_optimized => .lte, + .cmp_gte, .cmp_gte_optimized => .gte, + .cmp_gt, .cmp_gt_optimized => .gt, + }) else { + const bin_op = air_datas[@intFromEnum(inst)].bin_op; + const scalar_ty = cg.typeOf(bin_op.lhs).scalarType(zcu); + const signedness = if (scalar_ty.isAbiInt(zcu)) + scalar_ty.intInfo(zcu).signedness + else + .unsigned; + var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); + var res: [1]Temp = undefined; + cg.select2(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Condition, switch (signedness) { + .signed => switch (air_tag) { + else => unreachable, + .cmp_lt, .cmp_lt_optimized => .l, + .cmp_lte, .cmp_lte_optimized => .le, + .cmp_gte, .cmp_gte_optimized => .ge, + .cmp_gt, .cmp_gt_optimized => .g, + }, + .unsigned => switch (air_tag) { + else => unreachable, + .cmp_lt, .cmp_lt_optimized => .b, + .cmp_lte, .cmp_lte_optimized => .be, + .cmp_gte, .cmp_gte_optimized => .ae, + .cmp_gt, .cmp_gt_optimized => .a, + }, + })) { + else => unreachable, + inline .l, .le, .ge, .g, .b, .be, .ae, .a => |cc| comptime &.{ .{ + .constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .patterns = &.{ + .{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .imm8, .gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, + }, + .clobbers = .{ .eflags = true }, + .dst_temps = .{.{ .cc = cc.commute() }}, + .each = .{ .once = &.{ + .{ ._, .cmp, .src0b, .src1b, .none, .none }, + } }, + }, .{ + .constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .patterns = &.{ + .{ .src = .{ .mem, .imm8 } }, + .{ .src = .{ .gpr, .imm8 } }, + .{ .src = .{ .gpr, .mem } }, + .{ .src = .{ .gpr, .gpr } }, + }, + .clobbers = .{ .eflags = true }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .once = &.{ + .{ ._, .cmp, .src0b, .src1b, .none, .none }, + } }, + }, .{ + .constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .patterns = &.{ + .{ .src = .{ .imm16, .mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .imm16, .gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, + }, + .clobbers = .{ .eflags = true }, + .dst_temps = .{.{ .cc = cc.commute() }}, + .each = .{ .once = &.{ + .{ ._, .cmp, .src0w, .src1w, .none, .none }, + } }, + }, .{ + .constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .patterns = &.{ + .{ .src = .{ .mem, .imm16 } }, + .{ .src = .{ .gpr, .imm16 } }, + .{ .src = .{ .gpr, .mem } }, + .{ .src = .{ .gpr, .gpr } }, + }, + .clobbers = .{ .eflags = true }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .once = &.{ + .{ ._, .cmp, .src0w, .src1w, .none, .none }, + } }, + }, .{ + .constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .patterns = &.{ + .{ .src = .{ .imm32, .mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .imm32, .gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, + }, + .clobbers = .{ .eflags = true }, + .dst_temps = .{.{ .cc = cc.commute() }}, + .each = .{ .once = &.{ + .{ ._, .cmp, .esrc0, .esrc1, .none, .none }, + } }, + }, .{ + .constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .patterns = &.{ + .{ .src = .{ .mem, .imm32 } }, + .{ .src = .{ .gpr, .imm32 } }, + .{ .src = .{ .gpr, .mem } }, + .{ .src = .{ .gpr, .gpr } }, + }, + .clobbers = .{ .eflags = true }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .once = &.{ + .{ ._, .cmp, .esrc0, .esrc1, .none, .none }, + } }, + }, .{ + .required_features = .{ .@"64bit", null }, + .constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .patterns = &.{ + .{ .src = .{ .simm32, .mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .simm32, .gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, + }, + .clobbers = .{ .eflags = true }, + .dst_temps = .{.{ .cc = cc.commute() }}, + .each = .{ .once = &.{ + .{ ._, .cmp, .rsrc0, .rsrc1, .none, .none }, + } }, + }, .{ + .required_features = .{ .@"64bit", null }, + .constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .patterns = &.{ + .{ .src = .{ .mem, .simm32 } }, + .{ .src = .{ .gpr, .simm32 } }, + .{ .src = .{ .gpr, .mem } }, + .{ .src = .{ .gpr, .gpr } }, + }, + .clobbers = .{ .eflags = true }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .once = &.{ + .{ ._, .cmp, .rsrc0, .rsrc1, .none, .none }, + } }, + }, .{ + .required_features = .{ .@"64bit", null }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .bool, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .clobbers = .{ .eflags = true }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .limb = .{ + .of = .rsrc0, + .header = &.{ + .{ ._, .xor, .tmp1b, .tmp1b, .none, .none }, + }, + .body = &.{ + .{ ._, .mov, .rtmp0, .{ .src_limb = 0 }, .none, .none }, + .{ ._r, .sh, .tmp1b, .{ .simm32 = 1 }, .none, .none }, + .{ ._, .sbb, .rtmp0, .{ .src_limb = 1 }, .none, .none }, + .{ ._c, .set, .tmp1b, .none, .none, .none }, + .{ .fromCondition(cc), .set, .dst0b, .none, .none, .none }, + }, + } }, + }, .{ + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .bool, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .clobbers = .{ .eflags = true }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .limb = .{ + .of = .esrc0, + .header = &.{ + .{ ._, .xor, .tmp1b, .tmp1b, .none, .none }, + }, + .body = &.{ + .{ ._, .mov, .etmp0, .{ .src_limb = 0 }, .none, .none }, + .{ ._r, .sh, .tmp1b, .{ .simm32 = 1 }, .none, .none }, + .{ ._, .sbb, .etmp0, .{ .src_limb = 1 }, .none, .none }, + .{ ._c, .set, .tmp1b, .none, .none, .none }, + .{ .fromCondition(cc), .set, .dst0b, .none, .none, .none }, + }, + } }, + } }, + }) catch |err| switch (err) { + error.Select2Failed => return cg.fail("failed to select2 {s} {} {} {}", .{ + @tagName(air_tag), + cg.typeOf(bin_op.lhs).fmt(pt), + ops[0].tracking(cg), + ops[1].tracking(cg), + }), + else => |e| return e, + }; + if (ops[0].index != res[0].index) try ops[0].die(cg); + if (ops[1].index != res[0].index) try ops[1].die(cg); + try res[0].moveTo(inst, cg); + }, + .cmp_eq, .cmp_eq_optimized, .cmp_neq, .cmp_neq_optimized => |air_tag| if (use_old) try cg.airCmp(inst, switch (air_tag) { + else => unreachable, + .cmp_eq, .cmp_eq_optimized => .eq, + .cmp_neq, .cmp_neq_optimized => .neq, + }) else fallback: { + const bin_op = air_datas[@intFromEnum(inst)].bin_op; + if (ip.isOptionalType(cg.typeOf(bin_op.lhs).toIntern())) break :fallback try cg.airCmp(inst, switch (air_tag) { + else => unreachable, + .cmp_eq, .cmp_eq_optimized => .eq, + .cmp_neq, .cmp_neq_optimized => .neq, + }); + var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); + var res: [1]Temp = undefined; + cg.select2(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Condition, switch (air_tag) { + else => unreachable, + .cmp_eq, .cmp_eq_optimized => .e, + .cmp_neq, .cmp_neq_optimized => .ne, + })) { + else => unreachable, + inline .e, .ne => |cc| comptime &.{ .{ + .required_features = .{ .avx2, null }, + .constraints = .{ .any_int, .any_int }, + .patterns = &.{ + .{ .src = .{ .ymm, .mem } }, + .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .ymm, .ymm } }, + }, + .clobbers = .{ .eflags = true }, + .extra_temps = .{ .{ .kind = .{ .rc = .sse } }, .unused, .unused }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .once = &.{ + .{ .vp_, .xor, .ytmp0, .ysrc0, .ysrc1, .none }, + .{ .vp_, .@"test", .ytmp0, .ytmp0, .none, .none }, + } }, + }, .{ + .required_features = .{ .avx, null }, + .constraints = .{ .any_int, .any_int }, + .patterns = &.{ + .{ .src = .{ .ymm, .mem } }, + .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .ymm, .ymm } }, + }, + .clobbers = .{ .eflags = true }, + .extra_temps = .{ .{ .kind = .{ .rc = .sse } }, .unused, .unused }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .once = &.{ + .{ .v_pd, .xor, .ytmp0, .ysrc0, .ysrc1, .none }, + .{ .vp_, .@"test", .ytmp0, .ytmp0, .none, .none }, + } }, + }, .{ + .required_features = .{ .avx, null }, + .constraints = .{ .any_int, .any_int }, + .patterns = &.{ + .{ .src = .{ .xmm, .mem } }, + .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .xmm, .xmm } }, + }, + .clobbers = .{ .eflags = true }, + .extra_temps = .{ .{ .kind = .{ .rc = .sse } }, .unused, .unused }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .once = &.{ + .{ .vp_, .xor, .xtmp0, .xsrc0, .xsrc1, .none }, + .{ .vp_, .@"test", .xtmp0, .xtmp0, .none, .none }, + } }, + }, .{ + .required_features = .{ .sse4_1, null }, + .constraints = .{ .any_int, .any_int }, + .patterns = &.{ + .{ .src = .{ .mut_xmm, .mem } }, + .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_xmm, .xmm } }, + }, + .clobbers = .{ .eflags = true }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .once = &.{ + .{ .p_, .xor, .xsrc0, .xsrc1, .none, .none }, + .{ .p_, .@"test", .xsrc0, .xsrc0, .none, .none }, + } }, + }, .{ + .required_features = .{ .sse2, null }, + .constraints = .{ .any_int, .any_int }, + .patterns = &.{ + .{ .src = .{ .mut_xmm, .mem } }, + .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_xmm, .xmm } }, + }, + .clobbers = .{ .eflags = true }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .once = &.{ + .{ .p_, .xor, .xtmp1, .xtmp1, .none, .none }, + .{ .p_, .xor, .xsrc0, .xsrc1, .none, .none }, + .{ .p_b, .cmpeq, .xtmp1, .xsrc0, .none, .none }, + .{ .p_b, .movmsk, .etmp0, .xtmp1, .none, .none }, + .{ ._, .xor, .etmp0, .{ .simm32 = std.math.maxInt(u16) }, .none, .none }, + } }, + }, .{ + .required_features = .{ .sse2, .mmx }, + .constraints = .{ .any_int, .any_int }, + .patterns = &.{ + .{ .src = .{ .mut_mm, .mem } }, + .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mm, .mm } }, + }, + .clobbers = .{ .eflags = true }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .mmx } }, + .unused, + }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .once = &.{ + .{ .p_, .xor, .rtmp1, .rtmp1, .none, .none }, + .{ .p_, .xor, .rsrc0, .rsrc1, .none, .none }, + .{ .p_b, .cmpeq, .rtmp1, .rsrc0, .none, .none }, + .{ .p_b, .movmsk, .etmp0, .rtmp1, .none, .none }, + .{ ._, .xor, .etmp0, .{ .simm32 = std.math.maxInt(u8) }, .none, .none }, + } }, + }, .{ + .constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .patterns = &.{ + .{ .src = .{ .mem, .imm8 } }, + .{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .gpr, .imm8 } }, + .{ .src = .{ .imm8, .gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .gpr, .mem } }, + .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .gpr, .gpr } }, + }, + .clobbers = .{ .eflags = true }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .once = &.{ + .{ ._, .cmp, .src0b, .src1b, .none, .none }, + } }, + }, .{ + .constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .patterns = &.{ + .{ .src = .{ .mem, .imm16 } }, + .{ .src = .{ .imm16, .mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .gpr, .imm16 } }, + .{ .src = .{ .imm16, .gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .gpr, .mem } }, + .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .gpr, .gpr } }, + }, + .clobbers = .{ .eflags = true }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .once = &.{ + .{ ._, .cmp, .src0w, .src1w, .none, .none }, + } }, + }, .{ + .constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .patterns = &.{ + .{ .src = .{ .mem, .imm32 } }, + .{ .src = .{ .imm32, .mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .gpr, .imm32 } }, + .{ .src = .{ .imm32, .gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .gpr, .mem } }, + .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .gpr, .gpr } }, + }, + .clobbers = .{ .eflags = true }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .once = &.{ + .{ ._, .cmp, .esrc0, .esrc1, .none, .none }, + } }, + }, .{ + .required_features = .{ .@"64bit", null }, + .constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .patterns = &.{ + .{ .src = .{ .mem, .simm32 } }, + .{ .src = .{ .simm32, .mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .gpr, .simm32 } }, + .{ .src = .{ .simm32, .gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .gpr, .mem } }, + .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .gpr, .gpr } }, + }, + .clobbers = .{ .eflags = true }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .once = &.{ + .{ ._, .cmp, .rsrc0, .rsrc1, .none, .none }, + } }, + }, .{ + .required_features = .{ .avx2, null }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .limb = .{ + .of = .ysrc0, + .header = &.{ + .{ .vp_, .xor, .ytmp2, .ytmp2, .ytmp2, .none }, + }, + .body = &.{ + .{ .v_, .movdqu, .ytmp1, .{ .src_limb = 0 }, .none, .none }, + .{ .vp_, .xor, .ytmp1, .ytmp1, .{ .src_limb = 1 }, .none }, + .{ .vp_, .@"or", .ytmp2, .ytmp2, .ytmp1, .none }, + }, + .trailer = &.{ + .{ .vp_, .@"test", .ytmp2, .ytmp2, .none, .none }, + }, + } }, + }, .{ + .required_features = .{ .avx, null }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .limb = .{ + .of = .ysrc0, + .header = &.{ + .{ .v_pd, .xor, .ytmp2, .ytmp2, .ytmp2, .none }, + }, + .body = &.{ + .{ .v_pd, .movu, .ytmp1, .{ .src_limb = 0 }, .none, .none }, + .{ .v_pd, .xor, .ytmp1, .ytmp1, .{ .src_limb = 1 }, .none }, + .{ .v_pd, .@"or", .ytmp2, .ytmp2, .ytmp1, .none }, + }, + .trailer = &.{ + .{ .vp_, .@"test", .ytmp2, .ytmp2, .none, .none }, + }, + } }, + }, .{ + .required_features = .{ .avx, null }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .limb = .{ + .of = .xsrc0, + .header = &.{ + .{ .vp_, .xor, .xtmp2, .xtmp2, .xtmp2, .none }, + }, + .body = &.{ + .{ .v_, .movdqu, .xtmp1, .{ .src_limb = 0 }, .none, .none }, + .{ .vp_, .xor, .xtmp1, .xtmp1, .{ .src_limb = 1 }, .none }, + .{ .vp_, .@"or", .xtmp2, .xtmp2, .xtmp1, .none }, + }, + .trailer = &.{ + .{ .vp_, .@"test", .xtmp2, .xtmp2, .none, .none }, + }, + } }, + }, .{ + .required_features = .{ .avx, null }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .limb = .{ + .of = .xsrc0, + .header = &.{ + .{ .vp_, .xor, .xtmp2, .xtmp2, .xtmp2, .none }, + }, + .body = &.{ + .{ .v_, .movdqu, .xtmp1, .{ .src_limb = 0 }, .none, .none }, + .{ .vp_, .xor, .xtmp1, .xtmp1, .{ .src_limb = 1 }, .none }, + .{ .vp_, .@"or", .xtmp2, .xtmp2, .xtmp1, .none }, + }, + .trailer = &.{ + .{ .vp_, .@"test", .xtmp2, .xtmp2, .none, .none }, + }, + } }, + }, .{ + .required_features = .{ .sse4_1, null }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .limb = .{ + .of = .xsrc0, + .header = &.{ + .{ .p_, .xor, .xtmp2, .xtmp2, .none, .none }, + }, + .body = &.{ + .{ ._, .movdqu, .xtmp1, .{ .src_limb = 0 }, .none, .none }, + .{ .p_, .xor, .xtmp1, .{ .src_limb = 1 }, .none, .none }, + .{ .p_, .@"or", .xtmp2, .xtmp1, .none, .none }, + }, + .trailer = &.{ + .{ .p_, .@"test", .xtmp2, .xtmp2, .none, .none }, + }, + } }, + }, .{ + .required_features = .{ .sse2, null }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .limb = .{ + .of = .xsrc0, + .header = &.{ + .{ .p_, .xor, .xtmp2, .xtmp2, .none, .none }, + }, + .body = &.{ + .{ ._, .movdqu, .xtmp1, .{ .src_limb = 0 }, .none, .none }, + .{ .p_, .xor, .xtmp1, .{ .src_limb = 1 }, .none, .none }, + .{ .p_, .@"or", .xtmp2, .xtmp1, .none, .none }, + }, + .trailer = &.{ + .{ .p_, .xor, .xtmp1, .xtmp1, .none, .none }, + .{ .p_b, .cmpeq, .xtmp2, .xtmp1, .none, .none }, + .{ .p_b, .movmsk, .etmp0, .xtmp2, .none, .none }, + .{ ._, .xor, .etmp0, .{ .simm32 = std.math.maxInt(u16) }, .none, .none }, + }, + } }, + }, .{ + .required_features = .{ .sse, .mmx }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .mmx } }, + .{ .kind = .{ .rc = .mmx } }, + }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .limb = .{ + .of = .rsrc0, + .header = &.{ + .{ .p_, .xor, .rtmp2, .rtmp2, .none, .none }, + }, + .body = &.{ + .{ ._q, .mov, .rtmp1, .{ .src_limb = 0 }, .none, .none }, + .{ .p_, .xor, .rtmp1, .{ .src_limb = 1 }, .none, .none }, + .{ .p_, .@"or", .rtmp2, .rtmp1, .none, .none }, + }, + .trailer = &.{ + .{ .p_, .xor, .rtmp1, .rtmp1, .none, .none }, + .{ .p_b, .cmpeq, .rtmp2, .rtmp1, .none, .none }, + .{ .p_b, .movmsk, .etmp0, .rtmp2, .none, .none }, + .{ ._, .xor, .etmp0, .{ .simm32 = std.math.maxInt(u8) }, .none, .none }, + }, + } }, + }, .{ + .required_features = .{ .@"64bit", null }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .limb = .{ + .of = .rsrc0, + .header = &.{ + .{ ._, .xor, .rtmp2, .rtmp2, .none, .none }, + }, + .body = &.{ + .{ ._, .mov, .rtmp1, .{ .src_limb = 0 }, .none, .none }, + .{ ._, .xor, .rtmp1, .{ .src_limb = 1 }, .none, .none }, + .{ ._, .@"or", .rtmp2, .rtmp1, .none, .none }, + }, + .trailer = &.{ + .{ ._, .@"test", .rtmp2, .rtmp2, .none, .none }, + }, + } }, + }, .{ + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + }, + .dst_temps = .{.{ .cc = cc }}, + .each = .{ .limb = .{ + .of = .esrc0, + .header = &.{ + .{ ._, .xor, .etmp2, .etmp2, .none, .none }, + }, + .body = &.{ + .{ ._, .mov, .etmp1, .{ .src_limb = 0 }, .none, .none }, + .{ ._, .xor, .etmp1, .{ .src_limb = 1 }, .none, .none }, + .{ ._, .@"or", .etmp2, .etmp1, .none, .none }, + }, + .trailer = &.{ + .{ ._, .@"test", .etmp2, .etmp2, .none, .none }, + }, + } }, + } }, + }) catch |err| switch (err) { + error.Select2Failed => return cg.fail("failed to select2 {s} {} {} {}", .{ + @tagName(air_tag), + cg.typeOf(bin_op.lhs).fmt(pt), + ops[0].tracking(cg), + ops[1].tracking(cg), + }), + else => |e| return e, + }; + if (ops[0].index != res[0].index) try ops[0].die(cg); + if (ops[1].index != res[0].index) try ops[1].die(cg); + try res[0].moveTo(inst, cg); + }, + .cond_br => try cg.airCondBr(inst), .switch_br => try cg.airSwitchBr(inst), .loop_switch_br => try cg.airLoopSwitchBr(inst), @@ -13850,7 +14892,6 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v .undef, .immediate, .eflags, - .register, .register_offset, .register_overflow, .register_mask, @@ -13864,7 +14905,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v .reserved_frame, .air_ref, => unreachable, - .register_pair, .register_triple, .register_quadruple, .load_frame => null, + .register, .register_pair, .register_triple, .register_quadruple, .load_frame => null, .memory, .load_symbol, .load_got, .load_direct, .load_tlv => dst: { switch (resolved_dst_mcv) { .memory => |addr| if (std.math.cast( @@ -13893,8 +14934,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v }; }, }; - defer if (dst_info) |info| - self.register_manager.unlockReg(info.addr_lock); + defer if (dst_info) |info| self.register_manager.unlockReg(info.addr_lock); const resolved_src_mcv = switch (src_mcv) { else => src_mcv, @@ -16299,15 +17339,15 @@ fn genSetReg( const bits_lock = self.register_manager.lockReg(bits_reg); defer if (bits_lock) |lock| self.register_manager.unlockReg(lock); - const pack_reg = switch (src_reg_mask.scalar) { + const pack_reg = switch (src_reg_mask.info.scalar) { else => src_reg_mask.reg, .word => try self.register_manager.allocReg(null, abi.RegisterClass.sse), }; const pack_lock = self.register_manager.lockReg(pack_reg); defer if (pack_lock) |lock| self.register_manager.unlockReg(lock); - var mask_size: u32 = @intCast(ty.vectorLen(zcu) * @divExact(src_reg_mask.scalar.bitSize(), 8)); - switch (src_reg_mask.scalar) { + var mask_size: u32 = @intCast(ty.vectorLen(zcu) * @divExact(src_reg_mask.info.scalar.bitSize(), 8)); + switch (src_reg_mask.info.scalar) { else => {}, .word => { const src_alias = registerAlias(src_reg_mask.reg, mask_size); @@ -16321,13 +17361,13 @@ fn genSetReg( mask_size = std.math.divCeil(u32, mask_size, 2) catch unreachable; }, } - try self.asmRegisterRegister(.{ switch (src_reg_mask.scalar) { + try self.asmRegisterRegister(.{ switch (src_reg_mask.info.scalar) { .byte, .word => if (has_avx) .vp_b else .p_b, .dword => if (has_avx) .v_ps else ._ps, .qword => if (has_avx) .v_pd else ._pd, else => unreachable, }, .movmsk }, bits_reg.to32(), registerAlias(pack_reg, mask_size)); - if (src_reg_mask.inverted) try self.asmRegister(.{ ._, .not }, registerAlias(bits_reg, abi_size)); + if (src_reg_mask.info.inverted) try self.asmRegister(.{ ._, .not }, registerAlias(bits_reg, abi_size)); try self.genSetReg(dst_reg, ty, .{ .register = bits_reg }, .{}); }, .memory, .load_symbol, .load_direct, .load_got, .load_tlv => { @@ -21239,18 +22279,14 @@ const Temp = struct { return true; } - fn toRegClass(temp: *Temp, rc: Register.Class, cg: *CodeGen) !bool { - const val, const ty = switch (temp.unwrap(cg)) { - .ref => |ref| .{ temp.tracking(cg).short, cg.typeOf(ref) }, - .temp => |temp_index| val: { - const temp_tracking = temp_index.tracking(cg); - switch (temp_tracking.short) { - else => {}, - .register => |reg| if (reg.class() == rc) return false, - } - break :val .{ temp_tracking.short, temp_index.typeOf(cg) }; - }, + fn toRegClass(temp: *Temp, mut: bool, rc: Register.Class, cg: *CodeGen) !bool { + const val = temp.tracking(cg).short; + if (!mut or temp.isMut(cg)) switch (val) { + else => {}, + .register => |reg| if (reg.class() == rc) return false, + .register_offset => |reg_off| if (reg_off.reg.class() == rc and reg_off.off == 0) return false, }; + const ty = temp.typeOf(cg); const new_temp_index = cg.next_temp_index; cg.temp_type[@intFromEnum(new_temp_index)] = ty; const new_reg = try cg.register_manager.allocReg(new_temp_index.toIndex(), regSetForRegClass(rc)); @@ -21281,15 +22317,11 @@ const Temp = struct { first_temp.* = result_temp; } - fn asMask(temp: Temp, kind: MaskKind, inverted: bool, scalar: Memory.Size, cg: *CodeGen) void { - assert(scalar != .none); + fn asMask(temp: Temp, info: MaskInfo, cg: *CodeGen) void { + assert(info.scalar != .none); const mcv = &temp.unwrap(cg).temp.tracking(cg).short; - mcv.* = .{ .register_mask = .{ - .reg = mcv.register, - .kind = kind, - .inverted = inverted, - .scalar = scalar, - } }; + const reg = mcv.register; + mcv.* = .{ .register_mask = .{ .reg = reg, .info = info } }; } fn toLea(temp: *Temp, cg: *CodeGen) !bool { @@ -21335,9 +22367,25 @@ const Temp = struct { } } + fn toMemory(temp: *Temp, cg: *CodeGen) !bool { + const temp_tracking = temp.tracking(cg); + if (temp_tracking.short.isMemory()) return false; + const new_temp_index = cg.next_temp_index; + const ty = temp.typeOf(cg); + cg.temp_type[@intFromEnum(new_temp_index)] = ty; + const new_frame_index = try cg.allocFrameIndex(.initSpill(ty, cg.pt.zcu)); + try cg.genSetMem(.{ .frame = new_frame_index }, 0, ty, temp_tracking.short, .{}); + new_temp_index.tracking(cg).* = .init(.{ .load_frame = .{ .index = new_frame_index } }); + try temp.die(cg); + cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); + temp.* = .{ .index = new_temp_index.toIndex() }; + return true; + } + fn toBase(temp: *Temp, cg: *CodeGen) !bool { const temp_tracking = temp.tracking(cg); if (temp_tracking.short.isBase()) return false; + if (try temp.toMemory(cg)) return true; const new_temp_index = cg.next_temp_index; cg.temp_type[@intFromEnum(new_temp_index)] = temp.typeOf(cg); const new_reg = @@ -21561,20 +22609,20 @@ fn tempAlloc(cg: *CodeGen, ty: Type) !Temp { return .{ .index = temp_index.toIndex() }; } -fn tempAllocReg(cg: *CodeGen, ty: Type, rc: RegisterManager.RegisterBitSet) !Temp { +fn tempAllocReg(cg: *CodeGen, ty: Type, rs: RegisterManager.RegisterBitSet) !Temp { const temp_index = cg.next_temp_index; temp_index.tracking(cg).* = .init( - .{ .register = try cg.register_manager.allocReg(temp_index.toIndex(), rc) }, + .{ .register = try cg.register_manager.allocReg(temp_index.toIndex(), rs) }, ); cg.temp_type[@intFromEnum(temp_index)] = ty; cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); return .{ .index = temp_index.toIndex() }; } -fn tempAllocRegPair(cg: *CodeGen, ty: Type, rc: RegisterManager.RegisterBitSet) !Temp { +fn tempAllocRegPair(cg: *CodeGen, ty: Type, rs: RegisterManager.RegisterBitSet) !Temp { const temp_index = cg.next_temp_index; temp_index.tracking(cg).* = .init( - .{ .register_pair = try cg.register_manager.allocRegs(2, temp_index.toIndex(), rc) }, + .{ .register_pair = try cg.register_manager.allocRegs(2, temp_index.toIndex(), rs) }, ); cg.temp_type[@intFromEnum(temp_index)] = ty; cg.next_temp_index = @enumFromInt(@intFromEnum(temp_index) + 1); @@ -21696,18 +22744,21 @@ const Pattern = struct { once, /// execute the instruction on all groups of non-overlapping bits in the entire value bitwise, - /// for each element, execute the instruction on each limb, propogating the carry flag + /// for each element, execute the instruction on each limb, propagating the carry flag limbwise_carry, + /// for each element, execute the instruction on each limb, propagating a register + limbwise_reduce, /// for each element, execute the instruction on pairs of limbs, starting from the - /// least significant, propogating a limb + /// least significant, propagating a limb limbwise_pairs_forward, /// for each element, execute the instruction on pairs of limbs, starting from the - /// most significant, propogating a limb + /// most significant, propagating a limb limbwise_pairs_reverse, /// for each element, execute the instruction elementwise, } = .once, mir_tag: Mir.Inst.FixedTag, + final_mir_tag: ?Mir.Inst.FixedTag = null, patterns: []const Pattern, }; @@ -21740,7 +22791,7 @@ const Pattern = struct { ymm_sign_mask, /// any memory mem, - /// a limb stored in a gpr + /// a limb stored in a general purpose register gpr_limb, /// a limb stored in a 64-bit mmx register mm_limb, @@ -21750,8 +22801,16 @@ const Pattern = struct { ymm_limb, /// a limb stored in memory mem_limb, - /// a limb stored in a condition code - cc_mask_limb, + /// a mutable limb stored in a general purpose register + mut_gpr_limb, + /// a mutable limb stored in memory + mut_mem_limb, + /// an element stored in a condition code + cc_elem, + /// an element stored in a general purpose register + gpr_elem, + /// an element stored in memory + mem_elem, /// a limb stored in a 64-bit mmx register mask mm_mask_limb, /// a limb stored in a 128-bit sse register masuk @@ -21773,7 +22832,7 @@ const Pattern = struct { fn matches(op: Op, is_mut: bool, temp: Temp, cg: *CodeGen) bool { switch (op) { - .implicit, .explicit, .cc, .cc_mask_limb => unreachable, + .implicit, .explicit, .cc, .cc_elem => unreachable, else => {}, // temp is undefined .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => return true, @@ -21781,7 +22840,7 @@ const Pattern = struct { const temp_ty = temp.typeOf(cg); const abi_size = temp_ty.abiSize(cg.pt.zcu); return switch (op) { - .implicit, .explicit, .cc, .cc_mask_limb, .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => unreachable, + .implicit, .explicit, .cc, .cc_elem, .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => unreachable, .gpr => abi_size <= 8 and switch (temp.tracking(cg).short) { .register => |reg| reg.class() == .general_purpose, .register_offset => |reg_off| reg_off.reg.class() == .general_purpose and @@ -21803,8 +22862,8 @@ const Pattern = struct { .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0, else => cg.regClassForType(temp_ty) == .sse, }, - .mem, .mem_limb => (!is_mut or temp.isMut(cg)) and temp.tracking(cg).short.isMemory(), - .gpr_limb => abi_size > 8 and switch (temp.tracking(cg).short) { + .mem, .mem_limb, .mut_mem_limb, .mem_elem => (!is_mut or temp.isMut(cg)) and temp.tracking(cg).short.isMemory(), + .gpr_limb, .mut_gpr_limb, .gpr_elem => abi_size > 8 and switch (temp.tracking(cg).short) { .register, .register_pair, .register_triple, .register_quadruple => true, else => |mcv| mcv.isMemory(), }, @@ -21837,6 +22896,20 @@ const Pattern = struct { }; } }; + + const Instruction = struct { + mir_tag: Mir.Inst.FixedTag, + operands: [4]Instruction.Operand, + + const Operand = union(enum) { + regb: u8, + regw: u8, + ereg: u8, + rreg: u8, + xmm: u8, + ymm: u8, + }; + }; }; const SelectOptions = struct { cc: ?Condition = null, @@ -21932,6 +23005,7 @@ fn select( for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| { const ref_src_op, const is_mut = switch (src_op) { .implicit, .explicit => |linked_index| .{ pattern.ops[linked_index], true }, + .mut_mem_limb, .mut_gpr_limb => .{ src_op, true }, else => .{ src_op, false }, }; if (!ref_src_op.matches(is_mut, src_temp, cg)) continue :patterns; @@ -21939,7 +23013,7 @@ fn select( for (pattern.ops) |op| switch (op) { else => {}, - .cc_mask_limb, + .cc_elem, .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb, @@ -21953,16 +23027,16 @@ fn select( .implicit, .explicit => |linked_index| pattern.ops[linked_index], else => src_op, }) { - .implicit, .explicit, .cc, .cc_mask_limb => unreachable, - .gpr => try src_temp.toRegClass(.general_purpose, cg), - .mm, .mm_mask, .mm_sign_mask => try src_temp.toRegClass(.mmx, cg), + .implicit, .explicit, .cc, .cc_elem => unreachable, + .gpr => try src_temp.toRegClass(true, .general_purpose, cg), + .mm, .mm_mask, .mm_sign_mask => try src_temp.toRegClass(true, .mmx, cg), .xmm, .ymm, .xmm_mask, .ymm_mask, .xmm_sign_mask, .ymm_sign_mask, - => try src_temp.toRegClass(.sse, cg), + => try src_temp.toRegClass(true, .sse, cg), .mem => try src_temp.toBase(cg), .imm, .simm32 => false, .gpr_limb, @@ -21970,6 +23044,10 @@ fn select( .xmm_limb, .ymm_limb, .mem_limb, + .mut_gpr_limb, + .mut_mem_limb, + .gpr_elem, + .mem_elem, => switch (src_temp.tracking(cg).short) { .register, .register_pair, .register_triple, .register_quadruple => false, else => try src_temp.toBase(cg), @@ -21984,7 +23062,7 @@ fn select( var mir_ops_len: usize = 0; for (pattern.ops[0..dst_temps.len]) |dst_op| switch (dst_op) { else => mir_ops_len += 1, - .cc, .cc_mask_limb => {}, + .cc, .cc_elem => {}, }; const dst_mir_ops_len = mir_ops_len; for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| { @@ -22001,11 +23079,11 @@ fn select( }; const limb_size: u8, const rc = switch (linked_src_op) { else => continue, - .gpr_limb => .{ @intCast(@divExact(@as(Memory.Size, switch (pattern_set.scalar) { + .gpr_limb, .mut_gpr_limb, .gpr_elem => .{ @intCast(@divExact(Memory.Size.bitSize(switch (pattern_set.scalar) { .any => .qword, .bool => unreachable, .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size, - }).bitSize(), 8)), abi.RegisterClass.gp }, + }), 8)), abi.RegisterClass.gp }, .mm_limb, .mm_mask_limb => .{ 8, @panic("TODO") }, .xmm_limb, .xmm_mask_limb => .{ 16, abi.RegisterClass.sse }, .ymm_limb, .ymm_mask_limb => .{ 32, abi.RegisterClass.sse }, @@ -22030,7 +23108,10 @@ fn select( .unused, .temp => loop.limb_offset = .{ .known = 0 }, .known => {}, } - if (!rc.isSet(RegisterManager.indexOfRegIntoTracked(src_mcv.getRegs()[0]).?)) { + if (switch (linked_src_op) { + .mut_gpr_limb => true, + else => !rc.isSet(RegisterManager.indexOfRegIntoTracked(src_mcv.getRegs()[0]).?), + }) { if (loop.shuffle_temp == null) loop.shuffle_temp = try cg.tempAllocReg(.noreturn, abi.RegisterClass.sse); assert(extra_temp.* == null); extra_temp.* = try cg.tempAllocReg(.usize, rc); @@ -22066,25 +23147,26 @@ fn select( .xmm, .xmm_mask, .xmm_sign_mask => try cg.tempAllocReg(dst_ty, abi.RegisterClass.sse), .ymm, .ymm_mask, .ymm_sign_mask => try cg.tempAllocReg(dst_ty, abi.RegisterClass.sse), .mem => @panic("TODO"), - .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb => { + .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb, .mut_gpr_limb, .gpr_elem => { if (extra_temp.* == null) extra_temp.* = try cg.tempAllocReg(.noreturn, switch (dst_op) { else => unreachable, - .gpr_limb => abi.RegisterClass.gp, + .gpr_limb, .mut_gpr_limb, .gpr_elem => abi.RegisterClass.gp, .mm_limb => @panic("TODO"), .xmm_limb, .ymm_limb => abi.RegisterClass.sse, }); break :dst_temp try cg.tempAlloc(dst_ty); }, - .mem_limb => try cg.tempAlloc(dst_ty), - .cc_mask_limb, .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => unreachable, // already checked + .mem_limb, .mut_mem_limb, .mem_elem => try cg.tempAlloc(dst_ty), + .cc_elem, .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => unreachable, // already checked .imm, .simm32, .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => unreachable, // unmodifiable destination }; }, - .cc_mask_limb, .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => { - const scalar_size = @divExact(switch (pattern_set.scalar) { - .any, .bool => unreachable, + .cc_elem, .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => { + const scalar_size = @divExact(Memory.Size.bitSize(switch (pattern_set.scalar) { + .any => .qword, + .bool => unreachable, .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size, - }.bitSize(), 8); + }), 8); const mask_bit_size = @divExact(loop.remaining_size.?, scalar_size); const mask_limb_bit_size: u7 = @intCast(@divExact(loop.limb_size.?, scalar_size)); assert(loop.mask_limb_bit_size == null or loop.mask_limb_bit_size == mask_limb_bit_size); @@ -22104,7 +23186,7 @@ fn select( loop.mask_limb_offset = loop.limb_offset; if (loop.mask_limb_temp == null) { loop.mask_limb_temp = try cg.tempAllocReg(.usize, abi.RegisterClass.gp); - if (dst_op == .cc_mask_limb and mask_store_bit_size > 8) { + if (dst_op == .cc_elem and mask_store_bit_size > 8) { // setcc only clears 8 bits const mask_limb_alias = loop.mask_limb_temp.?.tracking(cg).short.register.to32(); try cg.spillEflagsIfOccupied(); @@ -22195,6 +23277,8 @@ fn select( .mm_limb, .xmm_limb, .ymm_limb, + .mut_gpr_limb, + .gpr_elem, .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb, @@ -22304,7 +23388,7 @@ fn select( else => try cg.asmRegisterMemory( switch (linked_src_op) { else => unreachable, - .gpr_limb => .{ ._, .mov }, + .gpr_limb, .mut_gpr_limb, .gpr_elem => .{ ._, .mov }, .mm_limb, .mm_mask_limb => .{ ._q, .mov }, .xmm_limb, .ymm_limb, @@ -22328,7 +23412,7 @@ fn select( }, } mir_op.* = switch (linked_src_op) { - .implicit, .explicit, .cc, .cc_mask_limb => unreachable, + .implicit, .explicit, .cc, .cc_elem => unreachable, .gpr => .{ .reg = registerAlias( src_mcv.register, @intCast(src_temp.typeOf(cg).abiSize(cg.pt.zcu)), @@ -22342,6 +23426,8 @@ fn select( .mm_limb, .xmm_limb, .ymm_limb, + .mut_gpr_limb, + .gpr_elem, .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb, @@ -22349,7 +23435,7 @@ fn select( limb_temp.tracking(cg).short.register else src_mcv.getRegs()[@divExact(loop.limb_offset.known, loop.limb_size.?)], loop.limb_size.?) }, - .mem_limb => .{ .mem = switch (src_mcv) { + .mem_limb, .mut_mem_limb, .mem_elem => .{ .mem = switch (src_mcv) { .register, .register_pair, .register_triple, .register_quadruple => unreachable, else => switch (loop.limb_offset) { .unused => unreachable, @@ -22394,7 +23480,7 @@ fn select( ) |*mir_op, dst_op, dst_temp, dst_ty, extra_temp| { if (mir_op.* != .none) continue; mir_op.* = switch (dst_op) { - .implicit, .cc, .cc_mask_limb => unreachable, + .implicit, .cc, .cc_elem => unreachable, .explicit => |linked_index| mir_ops[linked_index], .gpr => .{ .reg = registerAlias( dst_temp.tracking(cg).short.register, @@ -22404,18 +23490,18 @@ fn select( .xmm, .xmm_mask, .xmm_sign_mask => .{ .reg = dst_temp.tracking(cg).short.register.to128() }, .ymm, .ymm_mask, .ymm_sign_mask => .{ .reg = dst_temp.tracking(cg).short.register.to256() }, .mem => @panic("TODO"), - .gpr_limb => .{ .reg = registerAlias( + .gpr_limb, .mut_gpr_limb, .gpr_elem => .{ .reg = registerAlias( extra_temp.?.tracking(cg).short.register, - @intCast(@divExact(@as(Memory.Size, switch (pattern_set.scalar) { + @intCast(@divExact(Memory.Size.bitSize(switch (pattern_set.scalar) { .any => .qword, .bool => unreachable, .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size, - }).bitSize(), 8)), + }), 8)), ) }, .mm_limb => .{ .reg = extra_temp.?.tracking(cg).short.register }, .xmm_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to128() }, .ymm_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to256() }, - .mem_limb => .{ .mem = try dst_temp.tracking(cg).short.mem(cg, switch (loop.limb_offset) { + .mem_limb, .mut_mem_limb, .mem_elem => .{ .mem = try dst_temp.tracking(cg).short.mem(cg, switch (loop.limb_offset) { .unused => unreachable, .known => |limb_offset| .{ .size = .fromSize(loop.limb_size.?), @@ -22434,7 +23520,11 @@ fn select( } std.mem.swap(Operand, &mir_ops[pattern.commute[0]], &mir_ops[pattern.commute[1]]); if (pattern_set.clobbers.eflags) try cg.spillEflagsIfOccupied(); - cg.asmOps(pattern_set.mir_tag, mir_ops) catch |err| switch (err) { + cg.asmOps((if (loop.remaining_size != null and loop.limb_size != null and + loop.remaining_size.? <= loop.limb_size.?) + pattern_set.final_mir_tag + else + null) orelse pattern_set.mir_tag, mir_ops) catch |err| switch (err) { error.InvalidInstruction => { const fixes = @tagName(pattern_set.mir_tag[0]); const fixes_blank = std.mem.indexOfScalar(u8, fixes, '_').?; @@ -22478,15 +23568,19 @@ fn select( .mm_sign_mask, .xmm_sign_mask, .ymm_sign_mask, - => dst_temp.asMask(switch (dst_op) { - else => unreachable, - .mm_mask, .xmm_mask, .ymm_mask => .all, - .mm_sign_mask, .xmm_sign_mask, .ymm_sign_mask => .sign, - }, invert_result, switch (pattern_set.scalar) { - .any, .bool => unreachable, - .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size, + => dst_temp.asMask(.{ + .kind = switch (dst_op) { + else => unreachable, + .mm_mask, .xmm_mask, .ymm_mask => .all, + .mm_sign_mask, .xmm_sign_mask, .ymm_sign_mask => .sign, + }, + .inverted = invert_result, + .scalar = switch (pattern_set.scalar) { + .any, .bool => unreachable, + .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size, + }, }, cg), - .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb => if (extra_temp) |limb_temp| { + .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb, .mut_gpr_limb, .gpr_elem => if (extra_temp) |limb_temp| { const dst_mcv = dst_temp.tracking(cg).short; switch (dst_mcv) { .register_pair, .register_triple, .register_quadruple => try cg.asmRegisterRegister( @@ -22497,7 +23591,7 @@ fn select( else => try cg.asmMemoryRegister( switch (dst_op) { else => unreachable, - .gpr_limb => .{ ._, .mov }, + .gpr_limb, .mut_gpr_limb, .gpr_elem => .{ ._, .mov }, .mm_limb => .{ ._q, .mov }, .xmm_limb, .ymm_limb => .{ if (cg.hasFeature(.avx)) .v_ else ._, .movdqu }, }, @@ -22516,9 +23610,10 @@ fn select( ), } }, - .cc_mask_limb, .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => { + .cc_elem, .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => { const scalar_size = switch (pattern_set.scalar) { - .any, .bool => unreachable, + .any => .qword, + .bool => unreachable, .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size, }; const mask_store_size: u4 = @@ -22534,7 +23629,7 @@ fn select( loop.mask_store_reg.?, mask_store_size); switch (dst_op) { else => unreachable, - .cc_mask_limb => try cg.asmSetccRegister(switch (invert_result) { + .cc_elem => try cg.asmSetccRegister(switch (invert_result) { false => opts.cc.?, true => opts.cc.?.negate(), }, mask_limb_reg.to8()), @@ -22678,6 +23773,7 @@ fn select( .once => break :pattern_sets, .bitwise => {}, .limbwise_carry => @panic("TODO"), + .limbwise_reduce => @panic("TODO"), .limbwise_pairs_forward => @panic("TODO"), .limbwise_pairs_reverse => @panic("TODO"), .elementwise => {}, @@ -22743,9 +23839,433 @@ fn select( } } } else { - log.err("failed to select:", .{}); + log.err("failed to select {s}:", .{@tagName(pattern_sets[0].mir_tag[1])}); for (src_temps) |src_temp| log.err("{}", .{src_temp.tracking(cg)}); - return cg.fail("failed to select", .{}); + return error.SelectFailed; } for (extra_temps) |extra_temp| if (extra_temp) |temp| try temp.die(cg); } + +const Select2 = struct { + cg: *CodeGen, + case: *const Case, + pattern: *const Select2.Pattern, + extra_temps: [3]Temp, + dst_temps: []const Temp, + src_temps: []const Temp, + commute: struct { u8, u8 }, + limb: Memory.Mod.Rm, + + fn emit(s: Select2, inst: Instruction) !void { + const mir_tag: Mir.Inst.FixedTag = .{ inst[0], inst[1] }; + var mir_ops: [4]CodeGen.Operand = undefined; + inline for (&mir_ops, 2..) |*mir_op, inst_index| mir_op.* = try inst[inst_index].lower(s); + s.cg.asmOps(mir_tag, mir_ops) catch |err| switch (err) { + error.InvalidInstruction => { + const fixes = @tagName(mir_tag[0]); + const fixes_blank = std.mem.indexOfScalar(u8, fixes, '_').?; + return s.cg.fail( + "invalid instruction: '{s}{s}{s} {s} {s} {s} {s}'", + .{ + fixes[0..fixes_blank], + @tagName(mir_tag[1]), + fixes[fixes_blank + 1 ..], + @tagName(mir_ops[0]), + @tagName(mir_ops[1]), + @tagName(mir_ops[2]), + @tagName(mir_ops[3]), + }, + ); + }, + else => |e| return e, + }; + } + + fn lowerLimb(s: Select2, temp: Temp) !CodeGen.Operand { + return .{ .mem = try temp.tracking(s.cg).short.mem(s.cg, s.limb) }; + } + + fn srcTemp(s: Select2, index: u8) Temp { + return s.src_temps[ + if (index == s.commute[0]) + s.commute[1] + else if (index == s.commute[1]) + s.commute[0] + else + index + ]; + } + + const Case = struct { + required_features: [2]?std.Target.x86.Feature = @splat(null), + constraints: [2]Constraint = @splat(.any), + patterns: []const Select2.Pattern, + clobbers: struct { eflags: bool = false } = .{}, + extra_temps: [3]TempSpec = @splat(.unused), + dst_temps: [1]TempSpec.Kind = @splat(.unused), + each: union(enum) { + once: []const Instruction, + limb: struct { + of: Select2.Operand, + header: []const Instruction = &.{}, + first: ?[]const Instruction = null, + body: []const Instruction, + last: ?[]const Instruction = null, + trailer: []const Instruction = &.{}, + }, + }, + }; + + const Constraint = union(enum) { + any, + any_int, + any_float, + bool_vec: Memory.Size, + int: Memory.Size, + signed_int: Memory.Size, + unsigned_int: Memory.Size, + + fn accepts(constraint: Constraint, temp: Temp, cg: *CodeGen) bool { + const zcu = cg.pt.zcu; + switch (constraint) { + .any => return true, + .any_int => { + const scalar_ty = temp.typeOf(cg).scalarType(zcu); + return scalar_ty.isAbiInt(zcu) or scalar_ty.isPtrAtRuntime(zcu); + }, + .any_float => return temp.typeOf(cg).scalarType(zcu).isRuntimeFloat(), + .bool_vec => |size| { + const ty = temp.typeOf(cg); + return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type and + ty.vectorLen(zcu) <= size.bitSize(); + }, + .int => |size| { + const scalar_ty = temp.typeOf(cg).scalarType(zcu); + if (scalar_ty.isPtrAtRuntime(zcu)) return cg.target.ptrBitWidth() <= size.bitSize(); + return scalar_ty.isAbiInt(zcu) and scalar_ty.intInfo(zcu).bits <= size.bitSize(); + }, + .signed_int => |size| { + const scalar_ty = temp.typeOf(cg).scalarType(zcu); + if (!scalar_ty.isAbiInt(zcu)) return false; + const info = scalar_ty.intInfo(zcu); + return info.signedness == .signed and info.bits <= size.bitSize(); + }, + .unsigned_int => |size| { + const scalar_ty = temp.typeOf(cg).scalarType(zcu); + if (scalar_ty.isPtrAtRuntime(zcu)) return cg.target.ptrBitWidth() <= size.bitSize(); + if (!scalar_ty.isAbiInt(zcu)) return false; + const info = scalar_ty.intInfo(zcu); + return info.signedness == .unsigned and info.bits <= size.bitSize(); + }, + } + } + }; + + const Pattern = struct { + src: [2]Src, + commute: struct { u8, u8 } = .{ 0, 0 }, + + const Src = enum { + none, + any, + imm8, + imm16, + imm32, + simm32, + mem, + mut_mem, + to_mem, + gpr, + mut_gpr, + mm, + mut_mm, + xmm, + mut_xmm, + ymm, + mut_ymm, + + fn matches(src: Src, temp: Temp, cg: *CodeGen) bool { + switch (src) { + .none => unreachable, + .any => return true, + .imm8 => return switch (temp.tracking(cg).short) { + .immediate => |imm| std.math.cast(u8, imm) != null, + else => false, + }, + .imm16 => return switch (temp.tracking(cg).short) { + .immediate => |imm| std.math.cast(u16, imm) != null, + else => false, + }, + .imm32 => return switch (temp.tracking(cg).short) { + .immediate => |imm| std.math.cast(u32, imm) != null, + else => false, + }, + .simm32 => return switch (temp.tracking(cg).short) { + .immediate => |imm| std.math.cast(i32, @as(i64, @bitCast(imm))) != null, + else => false, + }, + .mem => return temp.tracking(cg).short.isMemory(), + .mut_mem => return temp.isMut(cg) and temp.tracking(cg).short.isMemory(), + .to_mem => return true, + .gpr, .mut_gpr => { + const mcv = temp.tracking(cg).short; + const abi_size = temp.typeOf(cg).abiSize(cg.pt.zcu); + return abi_size <= 8 and switch (mcv) { + .register => |reg| reg.class() == .general_purpose, + .register_offset => |reg_off| reg_off.reg.class() == .general_purpose and + reg_off.off == 0, + .register_pair, .register_triple, .register_quadruple => false, + else => true, + }; + }, + .mm, .mut_mm => { + const mcv = temp.tracking(cg).short; + const abi_size = temp.typeOf(cg).abiSize(cg.pt.zcu); + return abi_size <= 8 and switch (mcv) { + .register => |reg| reg.class() == .mmx, + .register_offset => |reg_off| reg_off.reg.class() == .mmx and + reg_off.off == 0, + else => false, + }; + }, + .xmm, .mut_xmm => { + const mcv = temp.tracking(cg).short; + const abi_size = temp.typeOf(cg).abiSize(cg.pt.zcu); + return abi_size > 8 and abi_size <= 16 and switch (mcv) { + .register => |reg| reg.class() == .sse, + .register_offset => |reg_off| reg_off.reg.class() == .sse and + reg_off.off == 0, + .register_pair, .register_triple, .register_quadruple => false, + else => true, + }; + }, + .ymm, .mut_ymm => { + const mcv = temp.tracking(cg).short; + const abi_size = temp.typeOf(cg).abiSize(cg.pt.zcu); + return abi_size > 16 and abi_size <= 32 and switch (mcv) { + .register => |reg| reg.class() == .sse, + .register_offset => |reg_off| reg_off.reg.class() == .sse and + reg_off.off == 0, + .register_pair, .register_triple, .register_quadruple => false, + else => true, + }; + }, + } + } + + fn convert(src: Src, temp: *Temp, cg: *CodeGen) !bool { + return switch (src) { + .none => unreachable, + .any, .imm8, .imm16, .imm32, .simm32 => false, + .mem, .mut_mem, .to_mem => try temp.toBase(cg), + .gpr => try temp.toRegClass(false, .general_purpose, cg), + .mut_gpr => try temp.toRegClass(true, .general_purpose, cg), + .mm => try temp.toRegClass(false, .mmx, cg), + .mut_mm => try temp.toRegClass(true, .mmx, cg), + .xmm, .ymm => try temp.toRegClass(false, .sse, cg), + .mut_xmm, .mut_ymm => try temp.toRegClass(true, .sse, cg), + }; + } + }; + }; + + const TempSpec = struct { + type: Type = .noreturn, + kind: Kind, + + const unused: TempSpec = .{ .kind = .unused }; + + const Kind = union(enum) { + unused, + any, + cc: Condition, + reg: Register, + rc: Register.Class, + rc_mask: struct { rc: Register.Class, info: MaskInfo }, + mem, + src: u8, + src_mask: struct { src: u8, info: MaskInfo }, + + fn finish(kind: Kind, temp: Temp, s: Select2) void { + switch (kind) { + else => {}, + inline .rc_mask, .src_mask => |mask| temp.asMask(mask.info, s.cg), + } + } + }; + + fn create(spec: TempSpec, s: Select2) !?Temp { + return switch (spec.kind) { + .unused => null, + .any => try s.cg.tempAlloc(spec.type), + .cc => |cc| try s.cg.tempFromValue(spec.type, .{ .eflags = cc }), + .reg => |reg| try s.cg.tempFromValue(spec.type, .{ .register = reg }), + .rc => |rc| try s.cg.tempAllocReg(spec.type, regSetForRegClass(rc)), + .rc_mask => |mask| try s.cg.tempAllocReg(spec.type, regSetForRegClass(mask.rc)), + .mem => try s.cg.tempAllocMem(spec.type), + .src => |src| s.srcTemp(src), + .src_mask => |mask| s.srcTemp(mask.src), + }; + } + }; + + const Instruction = struct { + Mir.Inst.Fixes, + Mir.Inst.Tag, + Select2.Operand, + Select2.Operand, + Select2.Operand, + Select2.Operand, + }; + const Operand = union(enum) { + none, + extra: struct { Memory.Size, u8 }, + dst: struct { Memory.Size, u8 }, + src: struct { Memory.Size, u8 }, + dst_limb: u8, + src_limb: u8, + simm32: i32, + + const tmp0b: Select2.Operand = .{ .extra = .{ .byte, 0 } }; + const tmp0w: Select2.Operand = .{ .extra = .{ .word, 0 } }; + const etmp0: Select2.Operand = .{ .extra = .{ .dword, 0 } }; + const rtmp0: Select2.Operand = .{ .extra = .{ .qword, 0 } }; + const xtmp0: Select2.Operand = .{ .extra = .{ .xword, 0 } }; + const ytmp0: Select2.Operand = .{ .extra = .{ .yword, 0 } }; + + const tmp1b: Select2.Operand = .{ .extra = .{ .byte, 1 } }; + const tmp1w: Select2.Operand = .{ .extra = .{ .word, 1 } }; + const etmp1: Select2.Operand = .{ .extra = .{ .dword, 1 } }; + const rtmp1: Select2.Operand = .{ .extra = .{ .qword, 1 } }; + const xtmp1: Select2.Operand = .{ .extra = .{ .xword, 1 } }; + const ytmp1: Select2.Operand = .{ .extra = .{ .yword, 1 } }; + + const tmp2b: Select2.Operand = .{ .extra = .{ .byte, 2 } }; + const tmp2w: Select2.Operand = .{ .extra = .{ .word, 2 } }; + const etmp2: Select2.Operand = .{ .extra = .{ .dword, 2 } }; + const rtmp2: Select2.Operand = .{ .extra = .{ .qword, 2 } }; + const xtmp2: Select2.Operand = .{ .extra = .{ .xword, 2 } }; + const ytmp2: Select2.Operand = .{ .extra = .{ .yword, 2 } }; + + const dst0b: Select2.Operand = .{ .dst = .{ .byte, 0 } }; + const dst0w: Select2.Operand = .{ .dst = .{ .word, 0 } }; + const edst0: Select2.Operand = .{ .dst = .{ .dword, 0 } }; + const rdst0: Select2.Operand = .{ .dst = .{ .qword, 0 } }; + const xdst0: Select2.Operand = .{ .dst = .{ .xword, 0 } }; + const ydst0: Select2.Operand = .{ .dst = .{ .yword, 0 } }; + + const src0b: Select2.Operand = .{ .src = .{ .byte, 0 } }; + const src0w: Select2.Operand = .{ .src = .{ .word, 0 } }; + const esrc0: Select2.Operand = .{ .src = .{ .dword, 0 } }; + const rsrc0: Select2.Operand = .{ .src = .{ .qword, 0 } }; + const xsrc0: Select2.Operand = .{ .src = .{ .xword, 0 } }; + const ysrc0: Select2.Operand = .{ .src = .{ .yword, 0 } }; + + const src1b: Select2.Operand = .{ .src = .{ .byte, 1 } }; + const src1w: Select2.Operand = .{ .src = .{ .word, 1 } }; + const esrc1: Select2.Operand = .{ .src = .{ .dword, 1 } }; + const rsrc1: Select2.Operand = .{ .src = .{ .qword, 1 } }; + const xsrc1: Select2.Operand = .{ .src = .{ .xword, 1 } }; + const ysrc1: Select2.Operand = .{ .src = .{ .yword, 1 } }; + + fn unwrap(op: Select2.Operand, s: Select2) struct { Memory.Size, Temp } { + return switch (op) { + else => unreachable, + .extra => |extra| .{ extra[0], s.extra_temps[extra[1]] }, + .dst => |dst| .{ dst[0], s.dst_temps[dst[1]] }, + .src => |src| .{ src[0], s.srcTemp(src[1]) }, + }; + } + + fn lower(op: Select2.Operand, s: Select2) !CodeGen.Operand { + switch (op) { + .none => return .none, + else => {}, + .dst_limb => |dst| return s.lowerLimb(s.dst_temps[dst]), + .src_limb => |src| return s.lowerLimb(s.srcTemp(src)), + .simm32 => |imm| return .{ .imm = .s(imm) }, + } + const size, const temp = op.unwrap(s); + return switch (temp.tracking(s.cg).short) { + .immediate => |imm| .{ .imm = switch (size) { + .byte => if (std.math.cast(i8, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(@as(u8, @intCast(imm))), + .word => if (std.math.cast(i16, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(@as(u16, @intCast(imm))), + .dword => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(@as(u32, @intCast(imm))), + .qword => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(imm), + else => unreachable, + } }, + else => |mcv| .{ .mem = try mcv.mem(s.cg, .{ .size = size }) }, + .register => |reg| .{ .reg = registerAlias(reg, @intCast(@divExact(size.bitSize(), 8))) }, + }; + } + }; +}; +fn select2( + cg: *CodeGen, + dst_temps: []Temp, + dst_tys: []const Type, + src_temps: []Temp, + cases: []const Select2.Case, +) !void { + cases: for (cases) |*case| { + for (case.required_features) |required_feature| if (required_feature) |feature| if (!switch (feature) { + .@"64bit" => cg.target.cpu.arch == .x86_64, + .mmx => false, + else => cg.hasFeature(feature), + }) continue :cases; + for (case.constraints[0..src_temps.len], src_temps) |src_constraint, src_temp| if (!src_constraint.accepts(src_temp, cg)) continue :cases; + patterns: for (case.patterns) |*pattern| { + for (pattern.src, src_temps) |src_pattern, src_temp| if (!src_pattern.matches(src_temp, cg)) continue :patterns; + + var s: Select2 = .{ + .cg = cg, + .case = case, + .pattern = pattern, + .extra_temps = undefined, + .dst_temps = dst_temps, + .src_temps = src_temps, + .commute = pattern.commute, + .limb = undefined, + }; + for (&s.extra_temps, case.extra_temps) |*temp, spec| temp.* = try spec.create(s) orelse continue; + + while (true) for (pattern.src, src_temps) |src_pattern, *src_temp| { + if (try src_pattern.convert(src_temp, cg)) break; + } else break; + + if (case.clobbers.eflags or case.each != .once) try cg.spillEflagsIfOccupied(); + + for (dst_temps, dst_tys, case.dst_temps[0..dst_temps.len]) |*dst_temp, dst_ty, dst_kind| + dst_temp.* = (try Select2.TempSpec.create(.{ .type = dst_ty, .kind = dst_kind }, s)).?; + + switch (case.each) { + .once => |body| for (body) |inst| try s.emit(inst), + .limb => |limb| { + const limb_size, const limb_of_temp = limb.of.unwrap(s); + const limb_of_size: u31 = @intCast(limb_of_temp.typeOf(cg).abiSize(cg.pt.zcu)); + s.limb = .{ + .size = limb_size, + .index = s.extra_temps[0].tracking(cg).short.register.to64(), + .disp = limb_of_size, + }; + for (limb.header) |inst| try s.emit(inst); + try cg.asmRegisterImmediate(.{ ._, .mov }, s.limb.index, .s(-@as(i32, limb_of_size))); + const limb_loop_reloc: u32 = @intCast(cg.mir_instructions.len); + for (limb.body) |inst| try s.emit(inst); + try cg.asmRegisterImmediate( + .{ ._, .add }, + s.limb.index, + .s(@intCast(@divExact(limb_size.bitSize(), 8))), + ); + _ = try cg.asmJccReloc(.nc, limb_loop_reloc); + for (limb.trailer) |inst| try s.emit(inst); + }, + } + + for (dst_temps, case.dst_temps[0..dst_temps.len]) |dst_temp, dst_kind| dst_kind.finish(dst_temp, s); + for (case.extra_temps, s.extra_temps) |spec, temp| if (spec.kind != .unused) try temp.die(cg); + return; + } + } + return error.Select2Failed; +} diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index a172a948d2..4be9acf334 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -353,6 +353,7 @@ pub const Mnemonic = enum { pmovsxbd, pmovsxbq, pmovsxbw, pmovsxdq, pmovsxwd, pmovsxwq, pmovzxbd, pmovzxbq, pmovzxbw, pmovzxdq, pmovzxwd, pmovzxwq, pmulld, + ptest, roundpd, roundps, roundsd, roundss, // SSE4.2 pcmpgtq, @@ -413,6 +414,7 @@ pub const Mnemonic = enum { vpsrad, vpsraq, vpsraw, vpsrld, vpsrldq, vpsrlq, vpsrlw, vpsubb, vpsubd, vpsubq, vpsubsb, vpsubsw, vpsubusb, vpsubusw, vpsubw, + vptest, vpunpckhbw, vpunpckhdq, vpunpckhqdq, vpunpckhwd, vpunpcklbw, vpunpckldq, vpunpcklqdq, vpunpcklwd, vpxor, @@ -421,6 +423,7 @@ pub const Mnemonic = enum { vsqrtpd, vsqrtps, vsqrtsd, vsqrtss, vstmxcsr, vsubpd, vsubps, vsubsd, vsubss, + vtestpd, vtestps, vxorpd, vxorps, // F16C vcvtph2ps, vcvtps2ph, diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig index 19881e0dee..032a57a3d0 100644 --- a/src/arch/x86_64/bits.zig +++ b/src/arch/x86_64/bits.zig @@ -150,6 +150,31 @@ pub const Condition = enum(u5) { .nz_or_p => .z_and_np, }; } + + /// Returns the equivalent condition when the operands are swapped. + pub fn commute(cond: Condition) Condition { + return switch (cond) { + else => cond, + .a => .b, + .ae => .be, + .b => .a, + .be => .ae, + .c => .a, + .g => .l, + .ge => .le, + .l => .g, + .le => .ge, + .na => .nb, + .nae => .nbe, + .nb => .na, + .nbe => .nae, + .nc => .na, + .ng => .nl, + .nge => .nle, + .nl => .ng, + .nle => .nge, + }; + } }; pub const Register = enum(u7) { diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index a204aa017f..a3a82cf4e2 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -1251,6 +1251,8 @@ pub const table = [_]Entry{ .{ .pmulld, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .none, .sse4_1 }, + .{ .ptest, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x17 }, 0, .none, .sse4_1 }, + .{ .roundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .none, .sse4_1 }, .{ .roundps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .none, .sse4_1 }, @@ -1676,6 +1678,9 @@ pub const table = [_]Entry{ .{ .vpsubusb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd8 }, 0, .vex_128_wig, .avx }, .{ .vpsubusw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd9 }, 0, .vex_128_wig, .avx }, + .{ .vptest, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x17 }, 0, .vex_128_wig, .avx }, + .{ .vptest, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x17 }, 0, .vex_256_wig, .avx }, + .{ .vpunpckhbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .vex_128_wig, .avx }, .{ .vpunpckhwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .vex_128_wig, .avx }, .{ .vpunpckhdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .vex_128_wig, .avx }, @@ -1726,6 +1731,11 @@ pub const table = [_]Entry{ .{ .vsubss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .vex_lig_wig, .avx }, + .{ .vtestps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x0e }, 0, .vex_128_w0, .avx }, + .{ .vtestps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x0e }, 0, .vex_256_w0, .avx }, + .{ .vtestpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x0f }, 0, .vex_128_w0, .avx }, + .{ .vtestpd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x0f }, 0, .vex_256_w0, .avx }, + .{ .vxorpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .vex_128_wig, .avx }, .{ .vxorpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x57 }, 0, .vex_256_wig, .avx }, diff --git a/test/behavior/x86_64/math.zig b/test/behavior/x86_64/math.zig index c69917fc05..031f1d9e1b 100644 --- a/test/behavior/x86_64/math.zig +++ b/test/behavior/x86_64/math.zig @@ -263,6 +263,12 @@ fn testBinary(comptime op: anytype) !void { 0xbfd88aee1d82ed32, 0x20e91c15b701059a, 0xed533d18f8657f3f, 0x1ddd7cd7f6bab957, }); + + if (false) try testType(@Vector(1, u128), .{ + 0x5f11e16b0ca3392f907a857881455d2e, + }, .{ + 0xf9142d73b408fd6955922f9fc147f7d7, + }); } inline fn bitAnd(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs & rhs) { From a1828ebcda0f684cdd146fa3ab5b86701909ca12 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 23 Dec 2024 15:53:13 -0500 Subject: [PATCH 07/25] x86_64: demolish the old --- src/arch/x86_64/CodeGen.zig | 4482 +++++++++++++++++++-------------- src/arch/x86_64/Encoding.zig | 29 +- src/arch/x86_64/Lower.zig | 33 +- src/arch/x86_64/Mir.zig | 14 +- src/arch/x86_64/bits.zig | 17 +- src/arch/x86_64/encoder.zig | 33 +- src/link/Elf/Atom.zig | 36 +- test/behavior/x86_64/math.zig | 39 +- 8 files changed, 2677 insertions(+), 2006 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index c8d41dfc4e..4d7c51ffd0 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -465,7 +465,7 @@ pub const MCValue = union(enum) { } }, } else .{ .base = .{ .reg = .ds }, .mod = .{ .off = addr } }, .indirect => |reg_off| .{ - .base = .{ .reg = reg_off.reg }, + .base = .{ .reg = registerAlias(reg_off.reg, @divExact(function.target.ptrBitWidth(), 8)) }, .mod = .{ .rm = .{ .size = mod_rm.size, .index = mod_rm.index, @@ -986,6 +986,7 @@ pub fn generate( .air = function.air, .lower = .{ .bin_file = bin_file, + .target = function.target, .allocator = gpa, .mir = mir, .cc = cc, @@ -1074,6 +1075,7 @@ pub fn generateLazy( .air = function.air, .lower = .{ .bin_file = bin_file, + .target = function.target, .allocator = gpa, .mir = mir, .cc = abi.resolveCallingConvention(.auto, function.target.*), @@ -1154,6 +1156,7 @@ fn formatWipMir( const mod = comp.root_mod; var lower: Lower = .{ .bin_file = data.self.bin_file, + .target = data.self.target, .allocator = data.self.gpa, .mir = .{ .instructions = data.self.mir_instructions.slice(), @@ -2514,7 +2517,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { const bin_op = air_datas[@intFromEnum(inst)].bin_op; var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); var res: [1]Temp = undefined; - cg.select2(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Mir.Inst.Tag, switch (air_tag) { + cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Mir.Inst.Tag, switch (air_tag) { else => unreachable, .bit_and => .@"and", .bit_or => .@"or", @@ -2530,7 +2533,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .dst_temps = .{.{ .rc = .sse }}, .each = .{ .once = &.{ - .{ .vp_, mir_tag, .ydst0, .ysrc0, .ysrc1, .none }, + .{ ._, .vp_, mir_tag, .dst0y, .src0y, .src1y, ._ }, } }, }, .{ .required_features = .{ .avx, null }, @@ -2541,7 +2544,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .dst_temps = .{.{ .rc = .sse }}, .each = .{ .once = &.{ - .{ .v_pd, mir_tag, .ydst0, .ysrc0, .ysrc1, .none }, + .{ ._, .v_pd, mir_tag, .dst0y, .src0y, .src1y, ._ }, } }, }, .{ .required_features = .{ .avx, null }, @@ -2552,7 +2555,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .dst_temps = .{.{ .rc = .sse }}, .each = .{ .once = &.{ - .{ .vp_, mir_tag, .xdst0, .xsrc0, .xsrc1, .none }, + .{ ._, .vp_, mir_tag, .dst0x, .src0x, .src1x, ._ }, } }, }, .{ .required_features = .{ .sse2, null }, @@ -2561,9 +2564,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, .{ .src = .{ .mut_xmm, .xmm } }, }, - .dst_temps = .{.{ .src = 0 }}, + .dst_temps = .{.{ .ref = .src0 }}, .each = .{ .once = &.{ - .{ .p_, mir_tag, .xdst0, .xsrc1, .none, .none }, + .{ ._, .p_, mir_tag, .dst0x, .src1x, ._, ._ }, } }, }, .{ .required_features = .{ .sse, null }, @@ -2572,9 +2575,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, .{ .src = .{ .mut_xmm, .xmm } }, }, - .dst_temps = .{.{ .src = 0 }}, + .dst_temps = .{.{ .ref = .src0 }}, .each = .{ .once = &.{ - .{ ._ps, mir_tag, .xdst0, .xsrc1, .none, .none }, + .{ ._, ._ps, mir_tag, .dst0x, .src1x, ._, ._ }, } }, }, .{ .required_features = .{ .mmx, null }, @@ -2583,12 +2586,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } }, .{ .src = .{ .mut_mm, .mm } }, }, - .dst_temps = .{.{ .src = 0 }}, + .dst_temps = .{.{ .ref = .src0 }}, .each = .{ .once = &.{ - .{ .p_, mir_tag, .rdst0, .rsrc1, .none, .none }, + .{ ._, .p_, mir_tag, .dst0q, .src1q, ._, ._ }, } }, }, .{ - .constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, .patterns = &.{ .{ .src = .{ .mut_mem, .imm8 } }, .{ .src = .{ .imm8, .mut_mem }, .commute = .{ 0, 1 } }, @@ -2601,12 +2604,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .mut_gpr, .gpr } }, }, .clobbers = .{ .eflags = true }, - .dst_temps = .{.{ .src = 0 }}, + .dst_temps = .{.{ .ref = .src0 }}, .each = .{ .once = &.{ - .{ ._, mir_tag, .dst0b, .src1b, .none, .none }, + .{ ._, ._, mir_tag, .dst0b, .src1b, ._, ._ }, } }, }, .{ - .constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, .patterns = &.{ .{ .src = .{ .mut_mem, .imm16 } }, .{ .src = .{ .imm16, .mut_mem }, .commute = .{ 0, 1 } }, @@ -2619,12 +2622,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .mut_gpr, .gpr } }, }, .clobbers = .{ .eflags = true }, - .dst_temps = .{.{ .src = 0 }}, + .dst_temps = .{.{ .ref = .src0 }}, .each = .{ .once = &.{ - .{ ._, mir_tag, .dst0w, .src1w, .none, .none }, + .{ ._, ._, mir_tag, .dst0w, .src1w, ._, ._ }, } }, }, .{ - .constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, .patterns = &.{ .{ .src = .{ .mut_mem, .imm32 } }, .{ .src = .{ .imm32, .mut_mem }, .commute = .{ 0, 1 } }, @@ -2637,13 +2640,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .mut_gpr, .gpr } }, }, .clobbers = .{ .eflags = true }, - .dst_temps = .{.{ .src = 0 }}, + .dst_temps = .{.{ .ref = .src0 }}, .each = .{ .once = &.{ - .{ ._, mir_tag, .edst0, .esrc1, .none, .none }, + .{ ._, ._, mir_tag, .dst0d, .src1d, ._, ._ }, } }, }, .{ .required_features = .{ .@"64bit", null }, - .constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, .patterns = &.{ .{ .src = .{ .mut_mem, .simm32 } }, .{ .src = .{ .simm32, .mut_mem }, .commute = .{ 0, 1 } }, @@ -2656,9 +2659,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .mut_gpr, .gpr } }, }, .clobbers = .{ .eflags = true }, - .dst_temps = .{.{ .src = 0 }}, + .dst_temps = .{.{ .ref = .src0 }}, .each = .{ .once = &.{ - .{ ._, mir_tag, .rdst0, .rsrc1, .none, .none }, + .{ ._, ._, mir_tag, .dst0q, .src1q, ._, ._ }, } }, }, .{ .required_features = .{ .avx2, null }, @@ -2669,14 +2672,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, + .unused, + .unused, + .unused, }, .dst_temps = .{.mem}, .each = .{ .limb = .{ - .of = .ysrc0, + .of = .src0y, .body = &.{ - .{ .v_, .movdqu, .ytmp1, .{ .src_limb = 0 }, .none, .none }, - .{ .vp_, mir_tag, .ytmp1, .ytmp1, .{ .src_limb = 1 }, .none }, - .{ .v_, .movdqu, .{ .dst_limb = 0 }, .ytmp1, .none, .none }, + .{ ._, .v_dqu, .mov, .tmp1y, .limb(.src0y), ._, ._ }, + .{ ._, .vp_, mir_tag, .tmp1y, .tmp1y, .limb(.src1y), ._ }, + .{ ._, .v_dqu, .mov, .limb(.dst0y), .tmp1y, ._, ._ }, }, } }, }, .{ @@ -2688,14 +2694,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, + .unused, + .unused, + .unused, }, .dst_temps = .{.mem}, .each = .{ .limb = .{ - .of = .ysrc0, + .of = .src0y, .body = &.{ - .{ .v_pd, .movu, .ytmp1, .{ .src_limb = 0 }, .none, .none }, - .{ .v_pd, mir_tag, .ytmp1, .ytmp1, .{ .src_limb = 1 }, .none }, - .{ .v_pd, .movu, .{ .dst_limb = 0 }, .ytmp1, .none, .none }, + .{ ._, .v_pd, .movu, .tmp1y, .limb(.src0y), ._, ._ }, + .{ ._, .v_pd, mir_tag, .tmp1y, .tmp1y, .limb(.src1y), ._ }, + .{ ._, .v_pd, .movu, .limb(.dst0y), .tmp1y, ._, ._ }, }, } }, }, .{ @@ -2707,14 +2716,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, + .unused, + .unused, + .unused, }, .dst_temps = .{.mem}, .each = .{ .limb = .{ - .of = .xsrc0, + .of = .src0x, .body = &.{ - .{ .v_, .movdqu, .xtmp1, .{ .src_limb = 0 }, .none, .none }, - .{ .vp_, mir_tag, .xtmp1, .xtmp1, .{ .src_limb = 1 }, .none }, - .{ .v_, .movdqu, .{ .dst_limb = 0 }, .xtmp1, .none, .none }, + .{ ._, .v_dqu, .mov, .tmp1x, .limb(.src0x), ._, ._ }, + .{ ._, .vp_, mir_tag, .tmp1x, .tmp1x, .limb(.src1x), ._ }, + .{ ._, .v_dqu, .mov, .limb(.dst0x), .tmp1x, ._, ._ }, }, } }, }, .{ @@ -2726,14 +2738,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, + .unused, + .unused, + .unused, }, .dst_temps = .{.mem}, .each = .{ .limb = .{ - .of = .xsrc0, + .of = .src0x, .body = &.{ - .{ ._, .movdqu, .xtmp1, .{ .src_limb = 0 }, .none, .none }, - .{ .p_, mir_tag, .xtmp1, .{ .src_limb = 1 }, .none, .none }, - .{ ._, .movdqu, .{ .dst_limb = 0 }, .xtmp1, .none, .none }, + .{ ._, ._dqu, .mov, .tmp1x, .limb(.src0x), ._, ._ }, + .{ ._, .p_, mir_tag, .tmp1x, .limb(.src1x), ._, ._ }, + .{ ._, ._dqu, .mov, .limb(.dst0x), .tmp1x, ._, ._ }, }, } }, }, .{ @@ -2745,14 +2760,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, + .unused, + .unused, + .unused, }, .dst_temps = .{.mem}, .each = .{ .limb = .{ - .of = .xsrc0, + .of = .src0x, .body = &.{ - .{ ._ps, .movu, .xtmp1, .{ .src_limb = 0 }, .none, .none }, - .{ ._ps, mir_tag, .xtmp1, .{ .src_limb = 1 }, .none, .none }, - .{ ._ps, .movu, .{ .dst_limb = 0 }, .xtmp1, .none, .none }, + .{ ._, ._ps, .movu, .tmp1x, .limb(.src0x), ._, ._ }, + .{ ._, ._ps, mir_tag, .tmp1x, .limb(.src1x), ._, ._ }, + .{ ._, ._ps, .movu, .limb(.dst0x), .tmp1x, ._, ._ }, }, } }, }, .{ @@ -2764,33 +2782,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .mmx } }, .unused, - }, - .dst_temps = .{.mem}, - .each = .{ .limb = .{ - .of = .rsrc0, - .body = &.{ - .{ ._q, .mov, .rtmp1, .{ .src_limb = 0 }, .none, .none }, - .{ .p_, mir_tag, .rtmp1, .{ .src_limb = 1 }, .none, .none }, - .{ ._q, .mov, .{ .dst_limb = 0 }, .rtmp1, .none, .none }, - }, - } }, - }, .{ - .required_features = .{ .@"64bit", null }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, .unused, }, .dst_temps = .{.mem}, .each = .{ .limb = .{ - .of = .rsrc0, + .of = .src0q, .body = &.{ - .{ ._, .mov, .rtmp1, .{ .src_limb = 0 }, .none, .none }, - .{ ._, mir_tag, .rtmp1, .{ .src_limb = 1 }, .none, .none }, - .{ ._, .mov, .{ .dst_limb = 0 }, .rtmp1, .none, .none }, + .{ ._, ._q, .mov, .tmp1q, .limb(.src0q), ._, ._ }, + .{ ._, .p_, mir_tag, .tmp1q, .limb(.src1q), ._, ._ }, + .{ ._, ._q, .mov, .limb(.dst0q), .tmp1q, ._, ._ }, }, } }, }, .{ @@ -2799,21 +2801,24 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, .unused, }, .dst_temps = .{.mem}, .each = .{ .limb = .{ - .of = .esrc0, + .of = .src0p, .body = &.{ - .{ ._, .mov, .etmp1, .{ .src_limb = 0 }, .none, .none }, - .{ ._, mir_tag, .etmp1, .{ .src_limb = 1 }, .none, .none }, - .{ ._, .mov, .{ .dst_limb = 0 }, .etmp1, .none, .none }, + .{ ._, ._, .mov, .tmp1p, .limb(.src0p), ._, ._ }, + .{ ._, ._, mir_tag, .tmp1p, .limb(.src1p), ._, ._ }, + .{ ._, ._, .mov, .limb(.dst0p), .tmp1p, ._, ._ }, }, } }, } }, }) catch |err2| switch (err2) { - error.Select2Failed => return cg.fail("failed to select2 {s} {} {} {}", .{ + error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{ @tagName(air_tag), cg.typeOf(bin_op.lhs).fmt(pt), ops[0].tracking(cg), @@ -2875,7 +2880,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .call_never_tail => try cg.airCall(inst, .never_tail), .call_never_inline => try cg.airCall(inst, .never_inline), - .cmp_vector, .cmp_vector_optimized => if (use_old) try cg.airCmpVector(inst) else fallback: { + .cmp_vector, .cmp_vector_optimized => |air_tag| if (use_old) try cg.airCmpVector(inst) else fallback: { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; const extra = cg.air.extraData(Air.VectorCmp, ty_pl.payload).data; switch (extra.compareOperator()) { @@ -2887,7 +2892,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { switch (extra.compareOperator()) { .lt => unreachable, .lte => unreachable, - .eq, .neq => |cmp_op| cg.select2(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Condition, switch (cmp_op) { + .eq, .neq => |cmp_op| cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Condition, switch (cmp_op) { else => unreachable, .eq => .e, .neq => .ne, @@ -2895,7 +2900,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { else => unreachable, inline .e, .ne => |cc| comptime &.{ .{ .required_features = .{ .avx2, null }, - .constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, .patterns = &.{ .{ .src = .{ .ymm, .mem } }, .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, @@ -2911,11 +2916,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .scalar = .byte, } } }}, .each = .{ .once = &.{ - .{ .vp_b, .cmpeq, .ydst0, .ysrc0, .ysrc1, .none }, + .{ ._, .vp_b, .cmpeq, .dst0y, .src0y, .src1y, ._ }, } }, }, .{ .required_features = .{ .avx2, null }, - .constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, .patterns = &.{ .{ .src = .{ .ymm, .mem } }, .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, @@ -2931,11 +2936,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .scalar = .word, } } }}, .each = .{ .once = &.{ - .{ .vp_w, .cmpeq, .ydst0, .ysrc0, .ysrc1, .none }, + .{ ._, .vp_w, .cmpeq, .dst0y, .src0y, .src1y, ._ }, } }, }, .{ .required_features = .{ .avx2, null }, - .constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, .patterns = &.{ .{ .src = .{ .ymm, .mem } }, .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, @@ -2951,11 +2956,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .scalar = .dword, } } }}, .each = .{ .once = &.{ - .{ .vp_d, .cmpeq, .ydst0, .ysrc0, .ysrc1, .none }, + .{ ._, .vp_d, .cmpeq, .dst0y, .src0y, .src1y, ._ }, } }, }, .{ .required_features = .{ .avx2, null }, - .constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, .patterns = &.{ .{ .src = .{ .ymm, .mem } }, .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, @@ -2971,11 +2976,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .scalar = .qword, } } }}, .each = .{ .once = &.{ - .{ .vp_q, .cmpeq, .ydst0, .ysrc0, .ysrc1, .none }, + .{ ._, .vp_q, .cmpeq, .dst0y, .src0y, .src1y, ._ }, } }, }, .{ .required_features = .{ .avx, null }, - .constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, .patterns = &.{ .{ .src = .{ .xmm, .mem } }, .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, @@ -2991,11 +2996,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .scalar = .byte, } } }}, .each = .{ .once = &.{ - .{ .vp_b, .cmpeq, .xdst0, .xsrc0, .xsrc1, .none }, + .{ ._, .vp_b, .cmpeq, .dst0x, .src0x, .src1x, ._ }, } }, }, .{ .required_features = .{ .avx, null }, - .constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, .patterns = &.{ .{ .src = .{ .xmm, .mem } }, .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, @@ -3011,11 +3016,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .scalar = .word, } } }}, .each = .{ .once = &.{ - .{ .vp_w, .cmpeq, .xdst0, .xsrc0, .xsrc1, .none }, + .{ ._, .vp_w, .cmpeq, .dst0x, .src0x, .src1x, ._ }, } }, }, .{ .required_features = .{ .avx, null }, - .constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, .patterns = &.{ .{ .src = .{ .xmm, .mem } }, .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, @@ -3031,11 +3036,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .scalar = .dword, } } }}, .each = .{ .once = &.{ - .{ .vp_d, .cmpeq, .xdst0, .xsrc0, .xsrc1, .none }, + .{ ._, .vp_d, .cmpeq, .dst0x, .src0x, .src1x, ._ }, } }, }, .{ .required_features = .{ .avx, null }, - .constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, .patterns = &.{ .{ .src = .{ .xmm, .mem } }, .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, @@ -3051,17 +3056,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .scalar = .qword, } } }}, .each = .{ .once = &.{ - .{ .vp_q, .cmpeq, .xdst0, .xsrc0, .xsrc1, .none }, + .{ ._, .vp_q, .cmpeq, .dst0x, .src0x, .src1x, ._ }, } }, }, .{ .required_features = .{ .sse2, null }, - .constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, .patterns = &.{ .{ .src = .{ .mut_xmm, .mem } }, .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, .{ .src = .{ .mut_xmm, .xmm } }, }, - .dst_temps = .{.{ .src_mask = .{ .src = 0, .info = .{ + .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ .kind = .all, .inverted = switch (cc) { else => unreachable, @@ -3071,17 +3076,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .scalar = .byte, } } }}, .each = .{ .once = &.{ - .{ .p_b, .cmpeq, .xdst0, .xsrc1, .none, .none }, + .{ ._, .p_b, .cmpeq, .dst0x, .src1x, ._, ._ }, } }, }, .{ .required_features = .{ .sse2, null }, - .constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, .patterns = &.{ .{ .src = .{ .mut_xmm, .mem } }, .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, .{ .src = .{ .mut_xmm, .xmm } }, }, - .dst_temps = .{.{ .src_mask = .{ .src = 0, .info = .{ + .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ .kind = .all, .inverted = switch (cc) { else => unreachable, @@ -3091,17 +3096,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .scalar = .word, } } }}, .each = .{ .once = &.{ - .{ .p_w, .cmpeq, .xdst0, .xsrc1, .none, .none }, + .{ ._, .p_w, .cmpeq, .dst0x, .src1x, ._, ._ }, } }, }, .{ .required_features = .{ .sse2, null }, - .constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, .patterns = &.{ .{ .src = .{ .mut_xmm, .mem } }, .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, .{ .src = .{ .mut_xmm, .xmm } }, }, - .dst_temps = .{.{ .src_mask = .{ .src = 0, .info = .{ + .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ .kind = .all, .inverted = switch (cc) { else => unreachable, @@ -3111,17 +3116,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .scalar = .dword, } } }}, .each = .{ .once = &.{ - .{ .p_d, .cmpeq, .xdst0, .xsrc1, .none, .none }, + .{ ._, .p_d, .cmpeq, .dst0x, .src1x, ._, ._ }, } }, }, .{ .required_features = .{ .sse4_1, null }, - .constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, .patterns = &.{ .{ .src = .{ .mut_xmm, .mem } }, .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, .{ .src = .{ .mut_xmm, .xmm } }, }, - .dst_temps = .{.{ .src_mask = .{ .src = 0, .info = .{ + .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ .kind = .all, .inverted = switch (cc) { else => unreachable, @@ -3131,17 +3136,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .scalar = .qword, } } }}, .each = .{ .once = &.{ - .{ .p_q, .cmpeq, .xdst0, .xsrc1, .none, .none }, + .{ ._, .p_q, .cmpeq, .dst0x, .src1x, ._, ._ }, } }, }, .{ .required_features = .{ .mmx, null }, - .constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, .patterns = &.{ .{ .src = .{ .mut_mm, .mem } }, .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } }, .{ .src = .{ .mut_mm, .mm } }, }, - .dst_temps = .{.{ .src_mask = .{ .src = 0, .info = .{ + .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ .kind = .all, .inverted = switch (cc) { else => unreachable, @@ -3151,17 +3156,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .scalar = .byte, } } }}, .each = .{ .once = &.{ - .{ .p_b, .cmpeq, .rdst0, .rsrc1, .none, .none }, + .{ ._, .p_b, .cmpeq, .dst0q, .src1q, ._, ._ }, } }, }, .{ .required_features = .{ .mmx, null }, - .constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, .patterns = &.{ .{ .src = .{ .mut_mm, .mem } }, .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } }, .{ .src = .{ .mut_mm, .mm } }, }, - .dst_temps = .{.{ .src_mask = .{ .src = 0, .info = .{ + .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ .kind = .all, .inverted = switch (cc) { else => unreachable, @@ -3171,17 +3176,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .scalar = .word, } } }}, .each = .{ .once = &.{ - .{ .p_w, .cmpeq, .rdst0, .rsrc1, .none, .none }, + .{ ._, .p_w, .cmpeq, .dst0q, .src1q, ._, ._ }, } }, }, .{ .required_features = .{ .mmx, null }, - .constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, .patterns = &.{ .{ .src = .{ .mut_mm, .mem } }, .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } }, .{ .src = .{ .mut_mm, .mm } }, }, - .dst_temps = .{.{ .src_mask = .{ .src = 0, .info = .{ + .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ .kind = .all, .inverted = switch (cc) { else => unreachable, @@ -3191,10 +3196,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .scalar = .dword, } } }}, .each = .{ .once = &.{ - .{ .p_d, .cmpeq, .rdst0, .rsrc1, .none, .none }, + .{ ._, .p_d, .cmpeq, .dst0q, .src1q, ._, ._ }, } }, }, .{ - .constraints = .{ .{ .bool_vec = .byte }, .{ .bool_vec = .byte } }, + .src_constraints = .{ .{ .bool_vec = .byte }, .{ .bool_vec = .byte } }, .patterns = &.{ .{ .src = .{ .mut_mem, .imm8 } }, .{ .src = .{ .imm8, .mut_mem }, .commute = .{ 0, 1 } }, @@ -3207,19 +3212,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .mut_gpr, .gpr } }, }, .clobbers = .{ .eflags = true }, - .dst_temps = .{.{ .src = 0 }}, + .dst_temps = .{.{ .ref = .src0 }}, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, .xor, .dst0b, .src1b, .none, .none }, - .{ ._, .not, .dst0b, .none, .none, .none }, + .{ ._, ._, .xor, .dst0b, .src1b, ._, ._ }, + .{ ._, ._, .not, .dst0b, ._, ._, ._ }, }, .ne => &.{ - .{ ._, .xor, .dst0b, .src1b, .none, .none }, + .{ ._, ._, .xor, .dst0b, .src1b, ._, ._ }, }, } }, }, .{ - .constraints = .{ .{ .bool_vec = .word }, .{ .bool_vec = .word } }, + .src_constraints = .{ .{ .bool_vec = .word }, .{ .bool_vec = .word } }, .patterns = &.{ .{ .src = .{ .mut_mem, .imm16 } }, .{ .src = .{ .imm16, .mut_mem }, .commute = .{ 0, 1 } }, @@ -3232,19 +3237,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .mut_gpr, .gpr } }, }, .clobbers = .{ .eflags = true }, - .dst_temps = .{.{ .src = 0 }}, + .dst_temps = .{.{ .ref = .src0 }}, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, .xor, .dst0w, .src1w, .none, .none }, - .{ ._, .not, .dst0w, .none, .none, .none }, + .{ ._, ._, .xor, .dst0w, .src1w, ._, ._ }, + .{ ._, ._, .not, .dst0w, ._, ._, ._ }, }, .ne => &.{ - .{ ._, .xor, .dst0w, .src1w, .none, .none }, + .{ ._, ._, .xor, .dst0w, .src1w, ._, ._ }, }, } }, }, .{ - .constraints = .{ .{ .bool_vec = .dword }, .{ .bool_vec = .dword } }, + .src_constraints = .{ .{ .bool_vec = .dword }, .{ .bool_vec = .dword } }, .patterns = &.{ .{ .src = .{ .mut_mem, .imm32 } }, .{ .src = .{ .imm32, .mut_mem }, .commute = .{ 0, 1 } }, @@ -3257,20 +3262,20 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .mut_gpr, .gpr } }, }, .clobbers = .{ .eflags = true }, - .dst_temps = .{.{ .src = 0 }}, + .dst_temps = .{.{ .ref = .src0 }}, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, .xor, .edst0, .esrc1, .none, .none }, - .{ ._, .not, .edst0, .none, .none, .none }, + .{ ._, ._, .xor, .dst0d, .src1d, ._, ._ }, + .{ ._, ._, .not, .dst0d, ._, ._, ._ }, }, .ne => &.{ - .{ ._, .xor, .edst0, .esrc1, .none, .none }, + .{ ._, ._, .xor, .dst0d, .src1d, ._, ._ }, }, } }, }, .{ .required_features = .{ .@"64bit", null }, - .constraints = .{ .{ .bool_vec = .qword }, .{ .bool_vec = .qword } }, + .src_constraints = .{ .{ .bool_vec = .qword }, .{ .bool_vec = .qword } }, .patterns = &.{ .{ .src = .{ .mut_mem, .simm32 } }, .{ .src = .{ .simm32, .mut_mem }, .commute = .{ 0, 1 } }, @@ -3283,262 +3288,1601 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .mut_gpr, .gpr } }, }, .clobbers = .{ .eflags = true }, - .dst_temps = .{.{ .src = 0 }}, + .dst_temps = .{.{ .ref = .src0 }}, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, .xor, .rdst0, .rsrc1, .none, .none }, - .{ ._, .not, .rdst0, .none, .none, .none }, + .{ ._, ._, .xor, .dst0q, .src1q, ._, ._ }, + .{ ._, ._, .not, .dst0q, ._, ._, ._ }, }, .ne => &.{ - .{ ._, .xor, .rdst0, .rsrc1, .none, .none }, + .{ ._, ._, .xor, .dst0q, .src1q, ._, ._ }, }, } }, + }, .{ + .src_constraints = .{ .any_bool_vec, .any_bool_vec }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .clobbers = .{ .eflags = true }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .not, .tmp1p, ._, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .a(.tmp1, .add_size), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .a(.tmp1, .add_size), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + }, + } }, + }, .{ + .required_features = .{ .avx2, null }, + .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb_and_mask_limb = .{ + .of = .src0y, + .of_mask = .dst0b, + .body = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, .v_dqu, .mov, .tmp3y, .limb(.src0y), ._, ._ }, + .{ ._, .vp_b, .cmpeq, .tmp3y, .tmp3y, .limb(.src1y), ._ }, + .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, + .{ ._, ._, .mov, .maskLimb(.dst0d), .tmp2d, ._, ._ }, + }, + .ne => &.{ + .{ ._, .v_dqu, .mov, .tmp3y, .limb(.src0y), ._, ._ }, + .{ ._, .vp_b, .cmpeq, .tmp3y, .tmp3y, .limb(.src1y), ._ }, + .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, + .{ ._, ._, .not, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .mov, .maskLimb(.dst0d), .tmp2d, ._, ._ }, + }, + }, + } }, + }, .{ + .required_features = .{ .avx2, null }, + .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb_and_mask_limb = .{ + .of = .src0y, + .of_mask = .dst0w, + .body = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, .v_dqu, .mov, .tmp3y, .limb(.src0y), ._, ._ }, + .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .limb(.src1y), ._ }, + .{ ._, .vp_b, .ackssw, .tmp3y, .tmp3y, .tmp3y, ._ }, + .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, + .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ }, + }, + .ne => &.{ + .{ ._, .v_dqu, .mov, .tmp3y, .limb(.src0y), ._, ._ }, + .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .limb(.src1y), ._ }, + .{ ._, .vp_b, .ackssw, .tmp3y, .tmp3y, .tmp3y, ._ }, + .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, + .{ ._, ._, .not, .tmp2w, ._, ._, ._ }, + .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ }, + }, + }, + } }, + }, .{ + .required_features = .{ .avx2, null }, + .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb_and_mask_limb = .{ + .of = .src0y, + .of_mask = .dst0d, + .body = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, .v_dqu, .mov, .tmp3y, .limb(.src0y), ._, ._ }, + .{ ._, .vp_d, .cmpeq, .tmp3y, .tmp3y, .limb(.src1y), ._ }, + .{ ._, .v_ps, .movmsk, .tmp2d, .tmp3y, ._, ._ }, + .{ ._, ._, .mov, .maskLimb(.dst0b), .tmp2b, ._, ._ }, + }, + .ne => &.{ + .{ ._, .v_dqu, .mov, .tmp3y, .limb(.src0y), ._, ._ }, + .{ ._, .vp_d, .cmpeq, .tmp3y, .tmp3y, .limb(.src1y), ._ }, + .{ ._, .v_ps, .movmsk, .tmp2d, .tmp3y, ._, ._ }, + .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, + .{ ._, ._, .mov, .maskLimb(.dst0b), .tmp2b, ._, ._ }, + }, + }, + } }, + }, .{ + .required_features = .{ .avx2, null }, + .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .reg = .rcx } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb_and_mask_limb = .{ + .of = .src0y, + .of_mask = .dst0q, + .body = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, .v_dqu, .mov, .tmp4y, .limb(.src0y), ._, ._ }, + .{ ._, .vp_q, .cmpeq, .tmp4y, .tmp4y, .limb(.src1y), ._ }, + .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4y, ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + }, + .ne => &.{ + .{ ._, .v_dqu, .mov, .tmp4y, .limb(.src0y), ._, ._ }, + .{ ._, .vp_q, .cmpeq, .tmp4y, .tmp4y, .limb(.src1y), ._ }, + .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4y, ._, ._ }, + .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + }, + }, + } }, + }, .{ + .required_features = .{ .avx, null }, + .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb_and_mask_limb = .{ + .of = .src0x, + .of_mask = .dst0b, + .body = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, .v_dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ }, + .{ ._, .vp_b, .cmpeq, .tmp3x, .tmp3x, .limb(.src1x), ._ }, + .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, + .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ }, + }, + .ne => &.{ + .{ ._, .v_dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ }, + .{ ._, .vp_b, .cmpeq, .tmp3x, .tmp3x, .limb(.src1x), ._ }, + .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, + .{ ._, ._, .not, .tmp2w, ._, ._, ._ }, + .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ }, + }, + }, + } }, + }, .{ + .required_features = .{ .avx, null }, + .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb_and_mask_limb = .{ + .of = .src0x, + .of_mask = .dst0w, + .body = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, .v_dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ }, + .{ ._, .vp_w, .cmpeq, .tmp3x, .tmp3x, .limb(.src1x), ._ }, + .{ ._, .vp_b, .ackssw, .tmp3x, .tmp3x, .tmp3x, ._ }, + .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, + .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2b, ._, ._ }, + }, + .ne => &.{ + .{ ._, .v_dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ }, + .{ ._, .vp_w, .cmpeq, .tmp3x, .tmp3x, .limb(.src1x), ._ }, + .{ ._, .vp_b, .ackssw, .tmp3x, .tmp3x, .tmp3x, ._ }, + .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, + .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, + .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2b, ._, ._ }, + }, + }, + } }, + }, .{ + .required_features = .{ .avx, null }, + .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .reg = .rcx } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb_and_mask_limb = .{ + .of = .src0x, + .of_mask = .dst0d, + .body = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, .v_dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ }, + .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .limb(.src1x), ._ }, + .{ ._, .v_ps, .movmsk, .tmp3d, .tmp4x, ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + }, + .ne => &.{ + .{ ._, .v_dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ }, + .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .limb(.src1x), ._ }, + .{ ._, .v_ps, .movmsk, .tmp3d, .tmp4x, ._, ._ }, + .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + }, + }, + } }, + }, .{ + .required_features = .{ .avx, null }, + .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .reg = .rcx } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb_and_mask_limb = .{ + .of = .src0x, + .of_mask = .dst0q, + .body = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, .v_dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ }, + .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .limb(.src1x), ._ }, + .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4x, ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + }, + .ne => &.{ + .{ ._, .v_dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ }, + .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .limb(.src1x), ._ }, + .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4x, ._, ._ }, + .{ ._, ._, .xor, .tmp3b, .i(0b11), ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + }, + }, + } }, + }, .{ + .required_features = .{ .sse2, null }, + .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb_and_mask_limb = .{ + .of = .src0x, + .of_mask = .dst0b, + .body = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ }, + .{ ._, .p_b, .cmpeq, .tmp3x, .limb(.src1x), ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, + .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ }, + .{ ._, .p_b, .cmpeq, .tmp3x, .limb(.src1x), ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, + .{ ._, ._, .not, .tmp2w, ._, ._, ._ }, + .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ }, + }, + }, + } }, + }, .{ + .required_features = .{ .sse2, null }, + .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb_and_mask_limb = .{ + .of = .src0x, + .of_mask = .dst0w, + .body = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ }, + .{ ._, .p_w, .cmpeq, .tmp3x, .limb(.src1x), ._, ._ }, + .{ ._, .p_b, .ackssw, .tmp3x, .tmp3x, ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, + .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2b, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ }, + .{ ._, .p_w, .cmpeq, .tmp3x, .limb(.src1x), ._, ._ }, + .{ ._, .p_b, .ackssw, .tmp3x, .tmp3x, ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, + .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, + .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2b, ._, ._ }, + }, + }, + } }, + }, .{ + .required_features = .{ .sse2, null }, + .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .reg = .rcx } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb_and_mask_limb = .{ + .of = .src0x, + .of_mask = .dst0d, + .body = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ }, + .{ ._, .p_q, .cmpeq, .tmp4x, .limb(.src1x), ._, ._ }, + .{ ._, ._ps, .movmsk, .tmp3d, .tmp4x, ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ }, + .{ ._, .p_d, .cmpeq, .tmp4x, .limb(.src1x), ._, ._ }, + .{ ._, ._ps, .movmsk, .tmp3d, .tmp4x, ._, ._ }, + .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + }, + }, + } }, + }, .{ + .required_features = .{ .sse4_1, null }, + .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .reg = .rcx } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb_and_mask_limb = .{ + .of = .src0x, + .of_mask = .dst0q, + .body = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ }, + .{ ._, .p_q, .cmpeq, .tmp4x, .limb(.src1x), ._, ._ }, + .{ ._, ._pd, .movmsk, .tmp3d, .tmp4x, ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ }, + .{ ._, .p_q, .cmpeq, .tmp4x, .limb(.src1x), ._, ._ }, + .{ ._, ._pd, .movmsk, .tmp3d, .tmp4x, ._, ._ }, + .{ ._, ._, .xor, .tmp3b, .i(0b11), ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + }, + }, + } }, + }, .{ + .required_features = .{ .mmx, null }, + .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .mmx } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb_and_mask_limb = .{ + .of = .src0q, + .of_mask = .dst0b, + .body = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._dqu, .mov, .tmp3q, .limb(.src0q), ._, ._ }, + .{ ._, .p_b, .cmpeq, .tmp3q, .limb(.src1q), ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ }, + .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._dqu, .mov, .tmp3q, .limb(.src0q), ._, ._ }, + .{ ._, .p_b, .cmpeq, .tmp3q, .limb(.src1q), ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ }, + .{ ._, ._, .not, .tmp2w, ._, ._, ._ }, + .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ }, + }, + }, + } }, + }, .{ + .required_features = .{ .mmx, null }, + .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .mmx } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb_and_mask_limb = .{ + .of = .src0q, + .of_mask = .dst0w, + .body = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._dqu, .mov, .tmp3q, .limb(.src0q), ._, ._ }, + .{ ._, .p_w, .cmpeq, .tmp3q, .limb(.src1q), ._, ._ }, + .{ ._, .p_b, .ackssw, .tmp3q, .tmp3q, ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ }, + .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2b, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._dqu, .mov, .tmp3q, .limb(.src0q), ._, ._ }, + .{ ._, .p_w, .cmpeq, .tmp3q, .limb(.src1q), ._, ._ }, + .{ ._, .p_b, .ackssw, .tmp3q, .tmp3q, ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ }, + .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, + .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2b, ._, ._ }, + }, + }, + } }, + }, .{ + .required_features = .{ .mmx, null }, + .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .reg = .rcx } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .mmx } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .limb_and_mask_limb = .{ + .of = .src0q, + .of_mask = .dst0d, + .body = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._dqu, .mov, .tmp4q, .limb(.src0q), ._, ._ }, + .{ ._, .p_q, .cmpeq, .tmp4q, .limb(.src1q), ._, ._ }, + .{ ._, ._ps, .movmsk, .tmp3d, .tmp4q, ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._dqu, .mov, .tmp4q, .limb(.src0q), ._, ._ }, + .{ ._, .p_q, .cmpeq, .tmp4q, .limb(.src1q), ._, ._ }, + .{ ._, ._ps, .movmsk, .tmp3d, .tmp4q, ._, ._ }, + .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + }, + }, + } }, + }, .{ + .required_features = .{ .slow_incdec, null }, + .dst_constraints = .{.{ .bool_vec = .byte }}, + .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp2b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, + .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .bool_vec = .byte }}, + .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp2b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, + .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .slow_incdec, null }, + .dst_constraints = .{.{ .bool_vec = .byte }}, + .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2w, .memia(.src0w, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp2w, .memia(.src1w, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, + .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .bool_vec = .byte }}, + .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2w, .memia(.src0w, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp2w, .memia(.src1w, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, + .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .slow_incdec, null }, + .dst_constraints = .{.{ .bool_vec = .byte }}, + .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .memia(.src0d, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp2d, .memia(.src1d, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, + .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .bool_vec = .byte }}, + .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .memia(.src0d, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp2d, .memia(.src1d, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, + .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .slow_incdec }, + .dst_constraints = .{.{ .bool_vec = .byte }}, + .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, + .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null }, + .dst_constraints = .{.{ .bool_vec = .byte }}, + .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, + .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .slow_incdec, null }, + .dst_constraints = .{.{ .bool_vec = .byte }}, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ }, + .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, + .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ }, + .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ }, + .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .i(1), ._, ._ }, + .{ ._, ._b, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, + .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, + .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ }, + .{ ._, ._b, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .bool_vec = .byte }}, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ }, + .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, + .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ }, + .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ }, + .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ }, + .{ ._, ._, .dec, .tmp2d, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, + .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, + .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ }, + .{ ._, ._b, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .slow_incdec, null }, + .dst_constraints = .{.{ .bool_vec = .dword }}, + .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp3b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, + .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .bool_vec = .dword }}, + .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp3b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, + .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .slow_incdec, null }, + .dst_constraints = .{.{ .bool_vec = .dword }}, + .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, + .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .bool_vec = .dword }}, + .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, + .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .slow_incdec, null }, + .dst_constraints = .{.{ .bool_vec = .dword }}, + .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, + .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .bool_vec = .dword }}, + .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, + .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .slow_incdec }, + .dst_constraints = .{.{ .bool_vec = .dword }}, + .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, + .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null }, + .dst_constraints = .{.{ .bool_vec = .dword }}, + .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, + .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .slow_incdec, null }, + .dst_constraints = .{.{ .bool_vec = .dword }}, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ }, + .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, + .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ }, + .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ }, + .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .i(1), ._, ._ }, + .{ ._, ._b, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, + .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, + .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ }, + .{ ._, ._b, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .bool_vec = .dword }}, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ }, + .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, + .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ }, + .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ }, + .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ }, + .{ ._, ._, .dec, .tmp2d, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0d, .tmp2b, ._, ._ }, + .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, + .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ }, + .{ ._, ._b, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .slow_incdec }, + .dst_constraints = .{.{ .bool_vec = .qword }}, + .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp3b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, + .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null }, + .dst_constraints = .{.{ .bool_vec = .qword }}, + .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp3b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, + .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .slow_incdec }, + .dst_constraints = .{.{ .bool_vec = .qword }}, + .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, + .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null }, + .dst_constraints = .{.{ .bool_vec = .qword }}, + .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, + .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .slow_incdec }, + .dst_constraints = .{.{ .bool_vec = .qword }}, + .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, + .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null }, + .dst_constraints = .{.{ .bool_vec = .qword }}, + .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, + .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .slow_incdec }, + .dst_constraints = .{.{ .bool_vec = .qword }}, + .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, + .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null }, + .dst_constraints = .{.{ .bool_vec = .qword }}, + .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, + .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .slow_incdec }, + .dst_constraints = .{.{ .bool_vec = .qword }}, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ }, + .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, + .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ }, + .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ }, + .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .i(1), ._, ._ }, + .{ ._, ._b, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, + .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, + .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ }, + .{ ._, ._b, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null }, + .dst_constraints = .{.{ .bool_vec = .qword }}, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ }, + .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, + .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ }, + .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ }, + .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ }, + .{ ._, ._, .dec, .tmp2d, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, + .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, + .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ }, + .{ ._, ._b, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .slow_incdec, null }, + .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .reg = .ecx } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"0:", ._, .xor, .tmp3d, .tmp3d, ._, ._ }, + .{ ._, ._, .mov, .tmp4b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp4b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp3b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp3p, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2p, .tmp3p, ._, ._ }, + .{ ._, ._, .add, .tmp1d, .i(1), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .ia(-1, .tmp2, .add_bit_size), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0p, .tmp3, .sub_access_size), .tmp2p, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .i(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .ia(-1, .tmp2, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0p, .tmp3), .tmp2p, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .reg = .ecx } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"0:", ._, .xor, .tmp3d, .tmp3d, ._, ._ }, + .{ ._, ._, .mov, .tmp4b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp4b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp3b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp3p, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2p, .tmp3p, ._, ._ }, + .{ ._, ._, .inc, .tmp1d, ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .ia(-1, .tmp2, .add_bit_size), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0p, .tmp3, .sub_access_size), .tmp2p, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .ia(-1, .tmp2, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0p, .tmp3), .tmp2p, ._, ._ }, + } }, } }, }) catch |err2| switch (err2) { - error.Select2Failed => cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, &.{ - .{ - .required_features = &.{.avx2}, - .scalar = .{ .any_int = .byte }, - .loop = .elementwise, - .mir_tag = .{ .vp_b, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } }, - .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } }, - }, - }, - .{ - .required_features = &.{.avx2}, - .scalar = .{ .any_int = .word }, - .loop = .elementwise, - .mir_tag = .{ .vp_w, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } }, - .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } }, - }, - }, - .{ - .required_features = &.{.avx2}, - .scalar = .{ .any_int = .dword }, - .loop = .elementwise, - .mir_tag = .{ .vp_d, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } }, - .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } }, - }, - }, - .{ - .required_features = &.{.avx2}, - .scalar = .{ .any_int = .qword }, - .loop = .elementwise, - .mir_tag = .{ .vp_q, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .ymm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .ymm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .mem_limb } }, - .{ .ops = &.{ .ymm_mask_limb, .ymm_limb, .ymm_limb } }, - }, - }, - .{ - .required_features = &.{.avx}, - .scalar = .{ .any_int = .byte }, - .loop = .elementwise, - .mir_tag = .{ .vp_b, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } }, - }, - }, - .{ - .required_features = &.{.avx}, - .scalar = .{ .any_int = .word }, - .loop = .elementwise, - .mir_tag = .{ .vp_w, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } }, - }, - }, - .{ - .required_features = &.{.avx}, - .scalar = .{ .any_int = .dword }, - .loop = .elementwise, - .mir_tag = .{ .vp_d, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } }, - }, - }, - .{ - .required_features = &.{.avx}, - .scalar = .{ .any_int = .qword }, - .loop = .elementwise, - .mir_tag = .{ .vp_q, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .xmm_mask_limb, .{ .explicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .explicit = 0 } }, .commute = .{ 1, 2 } }, - .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .xmm_limb, .xmm_limb } }, - }, - }, - .{ - .required_features = &.{.sse2}, - .scalar = .{ .any_int = .byte }, - .loop = .elementwise, - .mir_tag = .{ .p_b, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } }, - }, - }, - .{ - .required_features = &.{.sse2}, - .scalar = .{ .any_int = .word }, - .loop = .elementwise, - .mir_tag = .{ .p_w, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } }, - }, - }, - .{ - .required_features = &.{.sse2}, - .scalar = .{ .any_int = .dword }, - .loop = .elementwise, - .mir_tag = .{ .p_d, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } }, - }, - }, - .{ - .required_features = &.{.sse4_1}, - .scalar = .{ .any_int = .qword }, - .loop = .elementwise, - .mir_tag = .{ .p_q, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .xmm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .xmm_mask_limb, .{ .implicit = 0 }, .xmm_limb } }, - }, - }, - .{ - .required_features = &.{.mmx}, - .scalar = .{ .any_int = .byte }, - .loop = .elementwise, - .mir_tag = .{ .p_b, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .mm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mm_limb } }, - }, - }, - .{ - .required_features = &.{.mmx}, - .scalar = .{ .any_int = .word }, - .loop = .elementwise, - .mir_tag = .{ .p_w, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .mm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mm_limb } }, - }, - }, - .{ - .required_features = &.{.mmx}, - .scalar = .{ .any_int = .dword }, - .loop = .elementwise, - .mir_tag = .{ .p_d, .cmpeq }, - .patterns = &.{ - .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .mm_mask_limb, .mem_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .mm_mask_limb, .{ .implicit = 0 }, .mm_limb } }, - }, - }, - .{ - .scalar = .bool, - .clobbers = .{ .eflags = true }, - .invert_result = true, - .loop = .elementwise, - .mir_tag = .{ ._, .xor }, - .patterns = &.{ - .{ .ops = &.{ .mem_limb, .{ .implicit = 0 }, .gpr_limb } }, - .{ .ops = &.{ .mem_limb, .gpr_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .mem_limb } }, - .{ .ops = &.{ .gpr_limb, .mem_limb, .{ .implicit = 0 } } }, - .{ .ops = &.{ .gpr_limb, .{ .implicit = 0 }, .gpr_limb } }, - }, - }, - .{ - .scalar = .{ .any_int = .byte }, - .clobbers = .{ .eflags = true }, - .loop = .elementwise, - .mir_tag = .{ ._, .cmp }, - .patterns = &.{ - .{ .ops = &.{ .cc_elem, .mem_elem, .gpr_elem } }, - .{ .ops = &.{ .cc_elem, .gpr_elem, .mem_elem } }, - .{ .ops = &.{ .cc_elem, .gpr_elem, .gpr_elem } }, - }, - }, - .{ - .scalar = .{ .any_int = .word }, - .clobbers = .{ .eflags = true }, - .loop = .elementwise, - .mir_tag = .{ ._, .cmp }, - .patterns = &.{ - .{ .ops = &.{ .cc_elem, .mem_elem, .gpr_elem } }, - .{ .ops = &.{ .cc_elem, .gpr_elem, .mem_elem } }, - .{ .ops = &.{ .cc_elem, .gpr_elem, .gpr_elem } }, - }, - }, - .{ - .scalar = .{ .any_int = .dword }, - .clobbers = .{ .eflags = true }, - .loop = .elementwise, - .mir_tag = .{ ._, .cmp }, - .patterns = &.{ - .{ .ops = &.{ .cc_elem, .mem_elem, .gpr_elem } }, - .{ .ops = &.{ .cc_elem, .gpr_elem, .mem_elem } }, - .{ .ops = &.{ .cc_elem, .gpr_elem, .gpr_elem } }, - }, - }, - .{ - .scalar = .{ .any_int = .qword }, - .clobbers = .{ .eflags = true }, - .loop = .elementwise, - .mir_tag = .{ ._, .cmp }, - .patterns = &.{ - .{ .ops = &.{ .cc_elem, .mem_elem, .gpr_elem } }, - .{ .ops = &.{ .cc_elem, .gpr_elem, .mem_elem } }, - .{ .ops = &.{ .cc_elem, .gpr_elem, .gpr_elem } }, - }, - }, - }, .{ - .cc = .e, - .invert_result = switch (cmp_op) { - .eq => false, - .neq => true, - else => unreachable, - }, - }) catch |err| switch (err) { - error.SelectFailed => return cg.fail("failed to select", .{}), - else => |e| return e, - }, + error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{ + @tagName(air_tag), + cg.typeOf(extra.lhs).fmt(pt), + ops[0].tracking(cg), + ops[1].tracking(cg), + }), else => |e| return e, }, .gte => unreachable, @@ -3572,7 +4916,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unsigned; var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); var res: [1]Temp = undefined; - cg.select2(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Condition, switch (signedness) { + cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Condition, switch (signedness) { .signed => switch (air_tag) { else => unreachable, .cmp_lt, .cmp_lt_optimized => .l, @@ -3590,7 +4934,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { })) { else => unreachable, inline .l, .le, .ge, .g, .b, .be, .ae, .a => |cc| comptime &.{ .{ - .constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, .patterns = &.{ .{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } }, .{ .src = .{ .imm8, .gpr }, .commute = .{ 0, 1 } }, @@ -3599,10 +4943,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc.commute() }}, .each = .{ .once = &.{ - .{ ._, .cmp, .src0b, .src1b, .none, .none }, + .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ }, } }, }, .{ - .constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, .patterns = &.{ .{ .src = .{ .mem, .imm8 } }, .{ .src = .{ .gpr, .imm8 } }, @@ -3612,10 +4956,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .once = &.{ - .{ ._, .cmp, .src0b, .src1b, .none, .none }, + .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ }, } }, }, .{ - .constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, .patterns = &.{ .{ .src = .{ .imm16, .mem }, .commute = .{ 0, 1 } }, .{ .src = .{ .imm16, .gpr }, .commute = .{ 0, 1 } }, @@ -3624,10 +4968,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc.commute() }}, .each = .{ .once = &.{ - .{ ._, .cmp, .src0w, .src1w, .none, .none }, + .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ }, } }, }, .{ - .constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, .patterns = &.{ .{ .src = .{ .mem, .imm16 } }, .{ .src = .{ .gpr, .imm16 } }, @@ -3637,10 +4981,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .once = &.{ - .{ ._, .cmp, .src0w, .src1w, .none, .none }, + .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ }, } }, }, .{ - .constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, .patterns = &.{ .{ .src = .{ .imm32, .mem }, .commute = .{ 0, 1 } }, .{ .src = .{ .imm32, .gpr }, .commute = .{ 0, 1 } }, @@ -3649,10 +4993,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc.commute() }}, .each = .{ .once = &.{ - .{ ._, .cmp, .esrc0, .esrc1, .none, .none }, + .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ }, } }, }, .{ - .constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, .patterns = &.{ .{ .src = .{ .mem, .imm32 } }, .{ .src = .{ .gpr, .imm32 } }, @@ -3662,11 +5006,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .once = &.{ - .{ ._, .cmp, .esrc0, .esrc1, .none, .none }, + .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ }, } }, }, .{ .required_features = .{ .@"64bit", null }, - .constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, .patterns = &.{ .{ .src = .{ .simm32, .mem }, .commute = .{ 0, 1 } }, .{ .src = .{ .simm32, .gpr }, .commute = .{ 0, 1 } }, @@ -3675,11 +5019,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc.commute() }}, .each = .{ .once = &.{ - .{ ._, .cmp, .rsrc0, .rsrc1, .none, .none }, + .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ }, } }, }, .{ .required_features = .{ .@"64bit", null }, - .constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, .patterns = &.{ .{ .src = .{ .mem, .simm32 } }, .{ .src = .{ .gpr, .simm32 } }, @@ -3689,60 +5033,38 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .once = &.{ - .{ ._, .cmp, .rsrc0, .rsrc1, .none, .none }, - } }, - }, .{ - .required_features = .{ .@"64bit", null }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, - .{ .type = .bool, .kind = .{ .rc = .general_purpose } }, - .unused, - }, - .clobbers = .{ .eflags = true }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .limb = .{ - .of = .rsrc0, - .header = &.{ - .{ ._, .xor, .tmp1b, .tmp1b, .none, .none }, - }, - .body = &.{ - .{ ._, .mov, .rtmp0, .{ .src_limb = 0 }, .none, .none }, - .{ ._r, .sh, .tmp1b, .{ .simm32 = 1 }, .none, .none }, - .{ ._, .sbb, .rtmp0, .{ .src_limb = 1 }, .none, .none }, - .{ ._c, .set, .tmp1b, .none, .none, .none }, - .{ .fromCondition(cc), .set, .dst0b, .none, .none, .none }, - }, + .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ }, } }, }, .{ .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .type = .bool, .kind = .{ .rc = .general_purpose } }, .unused, + .unused, + .unused, + .unused, }, .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .rc = .general_purpose }}, .each = .{ .limb = .{ - .of = .esrc0, + .of = .src0p, .header = &.{ - .{ ._, .xor, .tmp1b, .tmp1b, .none, .none }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, }, .body = &.{ - .{ ._, .mov, .etmp0, .{ .src_limb = 0 }, .none, .none }, - .{ ._r, .sh, .tmp1b, .{ .simm32 = 1 }, .none, .none }, - .{ ._, .sbb, .etmp0, .{ .src_limb = 1 }, .none, .none }, - .{ ._c, .set, .tmp1b, .none, .none, .none }, - .{ .fromCondition(cc), .set, .dst0b, .none, .none, .none }, + .{ ._, ._, .mov, .tmp0p, .limb(.src0p), ._, ._ }, + .{ ._, ._r, .sh, .tmp1b, .i(1), ._, ._ }, + .{ ._, ._, .sbb, .tmp0p, .limb(.src1p), ._, ._ }, + .{ ._, ._c, .set, .tmp1b, ._, ._, ._ }, + .{ ._, .fromCondition(cc), .set, .dst0b, ._, ._, ._ }, }, } }, } }, }) catch |err| switch (err) { - error.Select2Failed => return cg.fail("failed to select2 {s} {} {} {}", .{ + error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{ @tagName(air_tag), cg.typeOf(bin_op.lhs).fmt(pt), ops[0].tracking(cg), @@ -3767,7 +5089,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }); var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); var res: [1]Temp = undefined; - cg.select2(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Condition, switch (air_tag) { + cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Condition, switch (air_tag) { else => unreachable, .cmp_eq, .cmp_eq_optimized => .e, .cmp_neq, .cmp_neq_optimized => .ne, @@ -3775,52 +5097,73 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { else => unreachable, inline .e, .ne => |cc| comptime &.{ .{ .required_features = .{ .avx2, null }, - .constraints = .{ .any_int, .any_int }, + .src_constraints = .{ .any_int, .any_int }, .patterns = &.{ .{ .src = .{ .ymm, .mem } }, .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, .{ .src = .{ .ymm, .ymm } }, }, .clobbers = .{ .eflags = true }, - .extra_temps = .{ .{ .kind = .{ .rc = .sse } }, .unused, .unused }, + .extra_temps = .{ + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .once = &.{ - .{ .vp_, .xor, .ytmp0, .ysrc0, .ysrc1, .none }, - .{ .vp_, .@"test", .ytmp0, .ytmp0, .none, .none }, + .{ ._, .vp_, .xor, .tmp0y, .src0y, .src1y, ._ }, + .{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ }, } }, }, .{ .required_features = .{ .avx, null }, - .constraints = .{ .any_int, .any_int }, + .src_constraints = .{ .any_int, .any_int }, .patterns = &.{ .{ .src = .{ .ymm, .mem } }, .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, .{ .src = .{ .ymm, .ymm } }, }, .clobbers = .{ .eflags = true }, - .extra_temps = .{ .{ .kind = .{ .rc = .sse } }, .unused, .unused }, + .extra_temps = .{ + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .once = &.{ - .{ .v_pd, .xor, .ytmp0, .ysrc0, .ysrc1, .none }, - .{ .vp_, .@"test", .ytmp0, .ytmp0, .none, .none }, + .{ ._, .v_pd, .xor, .tmp0y, .src0y, .src1y, ._ }, + .{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ }, } }, }, .{ .required_features = .{ .avx, null }, - .constraints = .{ .any_int, .any_int }, + .src_constraints = .{ .any_int, .any_int }, .patterns = &.{ .{ .src = .{ .xmm, .mem } }, .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, .{ .src = .{ .xmm, .xmm } }, }, .clobbers = .{ .eflags = true }, - .extra_temps = .{ .{ .kind = .{ .rc = .sse } }, .unused, .unused }, + .extra_temps = .{ + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .once = &.{ - .{ .vp_, .xor, .xtmp0, .xsrc0, .xsrc1, .none }, - .{ .vp_, .@"test", .xtmp0, .xtmp0, .none, .none }, + .{ ._, .vp_, .xor, .tmp0x, .src0x, .src1x, ._ }, + .{ ._, .vp_, .@"test", .tmp0x, .tmp0x, ._, ._ }, } }, }, .{ .required_features = .{ .sse4_1, null }, - .constraints = .{ .any_int, .any_int }, + .src_constraints = .{ .any_int, .any_int }, .patterns = &.{ .{ .src = .{ .mut_xmm, .mem } }, .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, @@ -3829,12 +5172,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .once = &.{ - .{ .p_, .xor, .xsrc0, .xsrc1, .none, .none }, - .{ .p_, .@"test", .xsrc0, .xsrc0, .none, .none }, + .{ ._, .p_, .xor, .src0x, .src1x, ._, ._ }, + .{ ._, .p_, .@"test", .src0x, .src0x, ._, ._ }, } }, }, .{ .required_features = .{ .sse2, null }, - .constraints = .{ .any_int, .any_int }, + .src_constraints = .{ .any_int, .any_int }, .patterns = &.{ .{ .src = .{ .mut_xmm, .mem } }, .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, @@ -3845,18 +5188,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, + .unused, + .unused, + .unused, }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .once = &.{ - .{ .p_, .xor, .xtmp1, .xtmp1, .none, .none }, - .{ .p_, .xor, .xsrc0, .xsrc1, .none, .none }, - .{ .p_b, .cmpeq, .xtmp1, .xsrc0, .none, .none }, - .{ .p_b, .movmsk, .etmp0, .xtmp1, .none, .none }, - .{ ._, .xor, .etmp0, .{ .simm32 = std.math.maxInt(u16) }, .none, .none }, + .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, + .{ ._, .p_, .xor, .src0x, .src1x, ._, ._ }, + .{ ._, .p_b, .cmpeq, .tmp1x, .src0x, ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp0d, .tmp1x, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .i(0xffff), ._, ._ }, } }, }, .{ .required_features = .{ .sse2, .mmx }, - .constraints = .{ .any_int, .any_int }, + .src_constraints = .{ .any_int, .any_int }, .patterns = &.{ .{ .src = .{ .mut_mm, .mem } }, .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } }, @@ -3867,17 +5213,20 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .mmx } }, .unused, + .unused, + .unused, + .unused, }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .once = &.{ - .{ .p_, .xor, .rtmp1, .rtmp1, .none, .none }, - .{ .p_, .xor, .rsrc0, .rsrc1, .none, .none }, - .{ .p_b, .cmpeq, .rtmp1, .rsrc0, .none, .none }, - .{ .p_b, .movmsk, .etmp0, .rtmp1, .none, .none }, - .{ ._, .xor, .etmp0, .{ .simm32 = std.math.maxInt(u8) }, .none, .none }, + .{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ }, + .{ ._, .p_, .xor, .src0q, .src1q, ._, ._ }, + .{ ._, .p_b, .cmpeq, .tmp1q, .src0q, ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp0d, .tmp1q, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .i(0xff), ._, ._ }, } }, }, .{ - .constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, .patterns = &.{ .{ .src = .{ .mem, .imm8 } }, .{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } }, @@ -3890,10 +5239,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .once = &.{ - .{ ._, .cmp, .src0b, .src1b, .none, .none }, + .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ }, } }, }, .{ - .constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, .patterns = &.{ .{ .src = .{ .mem, .imm16 } }, .{ .src = .{ .imm16, .mem }, .commute = .{ 0, 1 } }, @@ -3906,10 +5255,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .once = &.{ - .{ ._, .cmp, .src0w, .src1w, .none, .none }, + .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ }, } }, }, .{ - .constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, .patterns = &.{ .{ .src = .{ .mem, .imm32 } }, .{ .src = .{ .imm32, .mem }, .commute = .{ 0, 1 } }, @@ -3922,11 +5271,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .once = &.{ - .{ ._, .cmp, .esrc0, .esrc1, .none, .none }, + .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ }, } }, }, .{ .required_features = .{ .@"64bit", null }, - .constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, .patterns = &.{ .{ .src = .{ .mem, .simm32 } }, .{ .src = .{ .simm32, .mem }, .commute = .{ 0, 1 } }, @@ -3939,7 +5288,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .once = &.{ - .{ ._, .cmp, .rsrc0, .rsrc1, .none, .none }, + .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ }, } }, }, .{ .required_features = .{ .avx2, null }, @@ -3950,20 +5299,23 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .limb = .{ - .of = .ysrc0, + .of = .src0y, .header = &.{ - .{ .vp_, .xor, .ytmp2, .ytmp2, .ytmp2, .none }, + .{ ._, .vp_, .xor, .tmp2y, .tmp2y, .tmp2y, ._ }, }, .body = &.{ - .{ .v_, .movdqu, .ytmp1, .{ .src_limb = 0 }, .none, .none }, - .{ .vp_, .xor, .ytmp1, .ytmp1, .{ .src_limb = 1 }, .none }, - .{ .vp_, .@"or", .ytmp2, .ytmp2, .ytmp1, .none }, + .{ ._, .v_dqu, .mov, .tmp1y, .limb(.src0y), ._, ._ }, + .{ ._, .vp_, .xor, .tmp1y, .tmp1y, .limb(.src1y), ._ }, + .{ ._, .vp_, .@"or", .tmp2y, .tmp2y, .tmp1y, ._ }, }, .trailer = &.{ - .{ .vp_, .@"test", .ytmp2, .ytmp2, .none, .none }, + .{ ._, .vp_, .@"test", .tmp2y, .tmp2y, ._, ._ }, }, } }, }, .{ @@ -3975,20 +5327,23 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .limb = .{ - .of = .ysrc0, + .of = .src0y, .header = &.{ - .{ .v_pd, .xor, .ytmp2, .ytmp2, .ytmp2, .none }, + .{ ._, .v_pd, .xor, .tmp2y, .tmp2y, .tmp2y, ._ }, }, .body = &.{ - .{ .v_pd, .movu, .ytmp1, .{ .src_limb = 0 }, .none, .none }, - .{ .v_pd, .xor, .ytmp1, .ytmp1, .{ .src_limb = 1 }, .none }, - .{ .v_pd, .@"or", .ytmp2, .ytmp2, .ytmp1, .none }, + .{ ._, .v_pd, .movu, .tmp1y, .limb(.src0y), ._, ._ }, + .{ ._, .v_pd, .xor, .tmp1y, .tmp1y, .limb(.src1y), ._ }, + .{ ._, .v_pd, .@"or", .tmp2y, .tmp2y, .tmp1y, ._ }, }, .trailer = &.{ - .{ .vp_, .@"test", .ytmp2, .ytmp2, .none, .none }, + .{ ._, .vp_, .@"test", .tmp2y, .tmp2y, ._, ._ }, }, } }, }, .{ @@ -4000,20 +5355,23 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .limb = .{ - .of = .xsrc0, + .of = .src0x, .header = &.{ - .{ .vp_, .xor, .xtmp2, .xtmp2, .xtmp2, .none }, + .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .tmp2x, ._ }, }, .body = &.{ - .{ .v_, .movdqu, .xtmp1, .{ .src_limb = 0 }, .none, .none }, - .{ .vp_, .xor, .xtmp1, .xtmp1, .{ .src_limb = 1 }, .none }, - .{ .vp_, .@"or", .xtmp2, .xtmp2, .xtmp1, .none }, + .{ ._, .v_dqu, .mov, .tmp1x, .limb(.src0x), ._, ._ }, + .{ ._, .vp_, .xor, .tmp1x, .tmp1x, .limb(.src1x), ._ }, + .{ ._, .vp_, .@"or", .tmp2x, .tmp2x, .tmp1x, ._ }, }, .trailer = &.{ - .{ .vp_, .@"test", .xtmp2, .xtmp2, .none, .none }, + .{ ._, .vp_, .@"test", .tmp2x, .tmp2x, ._, ._ }, }, } }, }, .{ @@ -4025,20 +5383,23 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .limb = .{ - .of = .xsrc0, + .of = .src0x, .header = &.{ - .{ .vp_, .xor, .xtmp2, .xtmp2, .xtmp2, .none }, + .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .tmp2x, ._ }, }, .body = &.{ - .{ .v_, .movdqu, .xtmp1, .{ .src_limb = 0 }, .none, .none }, - .{ .vp_, .xor, .xtmp1, .xtmp1, .{ .src_limb = 1 }, .none }, - .{ .vp_, .@"or", .xtmp2, .xtmp2, .xtmp1, .none }, + .{ ._, .v_dqu, .mov, .tmp1x, .limb(.src0x), ._, ._ }, + .{ ._, .vp_, .xor, .tmp1x, .tmp1x, .limb(.src1x), ._ }, + .{ ._, .vp_, .@"or", .tmp2x, .tmp2x, .tmp1x, ._ }, }, .trailer = &.{ - .{ .vp_, .@"test", .xtmp2, .xtmp2, .none, .none }, + .{ ._, .vp_, .@"test", .tmp2x, .tmp2x, ._, ._ }, }, } }, }, .{ @@ -4050,20 +5411,23 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .limb = .{ - .of = .xsrc0, + .of = .src0x, .header = &.{ - .{ .p_, .xor, .xtmp2, .xtmp2, .none, .none }, + .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ }, }, .body = &.{ - .{ ._, .movdqu, .xtmp1, .{ .src_limb = 0 }, .none, .none }, - .{ .p_, .xor, .xtmp1, .{ .src_limb = 1 }, .none, .none }, - .{ .p_, .@"or", .xtmp2, .xtmp1, .none, .none }, + .{ ._, ._dqu, .mov, .tmp1x, .limb(.src0x), ._, ._ }, + .{ ._, .p_, .xor, .tmp1x, .limb(.src1x), ._, ._ }, + .{ ._, .p_, .@"or", .tmp2x, .tmp1x, ._, ._ }, }, .trailer = &.{ - .{ .p_, .@"test", .xtmp2, .xtmp2, .none, .none }, + .{ ._, .p_, .@"test", .tmp2x, .tmp2x, ._, ._ }, }, } }, }, .{ @@ -4075,23 +5439,26 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .limb = .{ - .of = .xsrc0, + .of = .src0x, .header = &.{ - .{ .p_, .xor, .xtmp2, .xtmp2, .none, .none }, + .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ }, }, .body = &.{ - .{ ._, .movdqu, .xtmp1, .{ .src_limb = 0 }, .none, .none }, - .{ .p_, .xor, .xtmp1, .{ .src_limb = 1 }, .none, .none }, - .{ .p_, .@"or", .xtmp2, .xtmp1, .none, .none }, + .{ ._, ._dqu, .mov, .tmp1x, .limb(.src0x), ._, ._ }, + .{ ._, .p_, .xor, .tmp1x, .limb(.src1x), ._, ._ }, + .{ ._, .p_, .@"or", .tmp2x, .tmp1x, ._, ._ }, }, .trailer = &.{ - .{ .p_, .xor, .xtmp1, .xtmp1, .none, .none }, - .{ .p_b, .cmpeq, .xtmp2, .xtmp1, .none, .none }, - .{ .p_b, .movmsk, .etmp0, .xtmp2, .none, .none }, - .{ ._, .xor, .etmp0, .{ .simm32 = std.math.maxInt(u16) }, .none, .none }, + .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, + .{ ._, .p_b, .cmpeq, .tmp2x, .tmp1x, ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp0d, .tmp2x, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .i(0xffff), ._, ._ }, }, } }, }, .{ @@ -4103,48 +5470,26 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .mmx } }, .{ .kind = .{ .rc = .mmx } }, + .unused, + .unused, + .unused, }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .limb = .{ - .of = .rsrc0, + .of = .src0q, .header = &.{ - .{ .p_, .xor, .rtmp2, .rtmp2, .none, .none }, + .{ ._, .p_, .xor, .tmp2q, .tmp2q, ._, ._ }, }, .body = &.{ - .{ ._q, .mov, .rtmp1, .{ .src_limb = 0 }, .none, .none }, - .{ .p_, .xor, .rtmp1, .{ .src_limb = 1 }, .none, .none }, - .{ .p_, .@"or", .rtmp2, .rtmp1, .none, .none }, + .{ ._, ._q, .mov, .tmp1q, .limb(.src0q), ._, ._ }, + .{ ._, .p_, .xor, .tmp1q, .limb(.src1q), ._, ._ }, + .{ ._, .p_, .@"or", .tmp2q, .tmp1q, ._, ._ }, }, .trailer = &.{ - .{ .p_, .xor, .rtmp1, .rtmp1, .none, .none }, - .{ .p_b, .cmpeq, .rtmp2, .rtmp1, .none, .none }, - .{ .p_b, .movmsk, .etmp0, .rtmp2, .none, .none }, - .{ ._, .xor, .etmp0, .{ .simm32 = std.math.maxInt(u8) }, .none, .none }, - }, - } }, - }, .{ - .required_features = .{ .@"64bit", null }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, - }, - .dst_temps = .{.{ .cc = cc }}, - .each = .{ .limb = .{ - .of = .rsrc0, - .header = &.{ - .{ ._, .xor, .rtmp2, .rtmp2, .none, .none }, - }, - .body = &.{ - .{ ._, .mov, .rtmp1, .{ .src_limb = 0 }, .none, .none }, - .{ ._, .xor, .rtmp1, .{ .src_limb = 1 }, .none, .none }, - .{ ._, .@"or", .rtmp2, .rtmp1, .none, .none }, - }, - .trailer = &.{ - .{ ._, .@"test", .rtmp2, .rtmp2, .none, .none }, + .{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ }, + .{ ._, .p_b, .cmpeq, .tmp2q, .tmp1q, ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp0d, .tmp2q, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .i(0xff), ._, ._ }, }, } }, }, .{ @@ -4153,27 +5498,30 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, }, .dst_temps = .{.{ .cc = cc }}, .each = .{ .limb = .{ - .of = .esrc0, + .of = .src0p, .header = &.{ - .{ ._, .xor, .etmp2, .etmp2, .none, .none }, + .{ ._, ._, .xor, .tmp2p, .tmp2p, ._, ._ }, }, .body = &.{ - .{ ._, .mov, .etmp1, .{ .src_limb = 0 }, .none, .none }, - .{ ._, .xor, .etmp1, .{ .src_limb = 1 }, .none, .none }, - .{ ._, .@"or", .etmp2, .etmp1, .none, .none }, + .{ ._, ._, .mov, .tmp1p, .limb(.src0p), ._, ._ }, + .{ ._, ._, .xor, .tmp1p, .limb(.src1p), ._, ._ }, + .{ ._, ._, .@"or", .tmp2p, .tmp1p, ._, ._ }, }, .trailer = &.{ - .{ ._, .@"test", .etmp2, .etmp2, .none, .none }, + .{ ._, ._, .@"test", .tmp2p, .tmp2p, ._, ._ }, }, } }, } }, }) catch |err| switch (err) { - error.Select2Failed => return cg.fail("failed to select2 {s} {} {} {}", .{ + error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{ @tagName(air_tag), cg.typeOf(bin_op.lhs).fmt(pt), ops[0].tracking(cg), @@ -16456,7 +17804,7 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void { } }, } }; } else { - if (mnem_size) |size| if (reg.bitSize() != size.bitSize()) + if (mnem_size) |size| if (reg.bitSize() != size.bitSize(self.target)) return self.fail("invalid register size: '{s}'", .{op_str}); op.* = .{ .reg = reg }; } @@ -16524,14 +17872,14 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void { } else if (std.mem.startsWith(u8, op_str, "$")) { if (std.fmt.parseInt(i32, op_str["$".len..], 0)) |s| { if (mnem_size) |size| { - const max = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - (size.bitSize() - 1)); + const max = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - (size.bitSize(self.target) - 1)); if ((if (s < 0) ~s else s) > max) return self.fail("invalid immediate size: '{s}'", .{op_str}); } op.* = .{ .imm = .s(s) }; } else |_| if (std.fmt.parseInt(u64, op_str["$".len..], 0)) |u| { if (mnem_size) |size| { - const max = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - size.bitSize()); + const max = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - size.bitSize(self.target)); if (u > max) return self.fail("invalid immediate size: '{s}'", .{op_str}); } @@ -16827,10 +18175,13 @@ fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool) else .{ ._q, .mov } }, 9...16 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } - else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + .{ if (aligned) .v_dqa else .v_dqu, .mov } + else if (self.hasFeature(.sse2)) + .{ if (aligned) ._dqa else ._dqu, .mov } + else + .{ ._ps, if (aligned) .mova else .movu } }, 17...32 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, + return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } }, else => {}, } else switch (abi_size) { 4 => return .{ .move = if (self.hasFeature(.avx)) @@ -16842,12 +18193,13 @@ fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool) else .{ ._sd, .mov } }, 9...16 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu } - else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } }, - 17...32 => if (self.hasFeature(.avx)) return .{ .move = if (aligned) - .{ .v_pd, .mova } + .{ .v_pd, if (aligned) .mova else .movu } + else if (self.hasFeature(.sse2)) + .{ ._pd, if (aligned) .mova else .movu } else - .{ .v_pd, .movu } }, + .{ ._ps, if (aligned) .mova else .movu } }, + 17...32 => if (self.hasFeature(.avx)) + return .{ .move = .{ .v_pd, if (aligned) .mova else .movu } }, else => {}, } }, @@ -16868,8 +18220,11 @@ fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool) else .{ ._sd, .mov } }, 128 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } - else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + .{ if (aligned) .v_dqa else .v_dqu, .mov } + else if (self.hasFeature(.sse2)) + .{ if (aligned) ._dqa else ._dqu, .mov } + else + .{ ._ps, if (aligned) .mova else .movu } }, else => {}, }, .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) { @@ -16883,65 +18238,62 @@ fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool) .int => switch (ty.childType(zcu).intInfo(zcu).bits) { 1...8 => switch (ty.vectorLen(zcu)) { 1...16 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } - else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + .{ if (aligned) .v_dqa else .v_dqu, .mov } + else if (self.hasFeature(.sse2)) + .{ if (aligned) ._dqa else ._dqu, .mov } + else + .{ ._ps, if (aligned) .mova else .movu } }, 17...32 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) - .{ .v_, .movdqa } - else - .{ .v_, .movdqu } }, + return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } }, else => {}, }, 9...16 => switch (ty.vectorLen(zcu)) { 1...8 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } - else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + .{ if (aligned) .v_dqa else .v_dqu, .mov } + else if (self.hasFeature(.sse2)) + .{ if (aligned) ._dqa else ._dqu, .mov } + else + .{ ._ps, if (aligned) .mova else .movu } }, 9...16 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) - .{ .v_, .movdqa } - else - .{ .v_, .movdqu } }, + return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } }, else => {}, }, 17...32 => switch (ty.vectorLen(zcu)) { 1...4 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } - else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + .{ if (aligned) .v_dqa else .v_dqu, .mov } + else if (self.hasFeature(.sse2)) + .{ if (aligned) ._dqa else ._dqu, .mov } + else + .{ ._ps, if (aligned) .mova else .movu } }, 5...8 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) - .{ .v_, .movdqa } - else - .{ .v_, .movdqu } }, + return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } }, else => {}, }, 33...64 => switch (ty.vectorLen(zcu)) { 1...2 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } - else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + .{ if (aligned) .v_dqa else .v_dqu, .mov } + else if (self.hasFeature(.sse2)) + .{ if (aligned) ._dqa else ._dqu, .mov } + else + .{ ._ps, if (aligned) .mova else .movu } }, 3...4 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) - .{ .v_, .movdqa } - else - .{ .v_, .movdqu } }, + return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } }, else => {}, }, 65...128 => switch (ty.vectorLen(zcu)) { 1 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } - else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + .{ if (aligned) .v_dqa else .v_dqu, .mov } + else if (self.hasFeature(.sse2)) + .{ if (aligned) ._dqa else ._dqu, .mov } + else + .{ ._ps, if (aligned) .mova else .movu } }, 2 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) - .{ .v_, .movdqa } - else - .{ .v_, .movdqu } }, + return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } }, else => {}, }, 129...256 => switch (ty.vectorLen(zcu)) { 1 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) - .{ .v_, .movdqa } - else - .{ .v_, .movdqu } }, + return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } }, else => {}, }, else => {}, @@ -16949,13 +18301,13 @@ fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool) .pointer, .optional => if (ty.childType(zcu).isPtrAtRuntime(zcu)) switch (ty.vectorLen(zcu)) { 1...2 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } - else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + .{ if (aligned) .v_dqa else .v_dqu, .mov } + else if (self.hasFeature(.sse2)) + .{ if (aligned) ._dqa else ._dqu, .mov } + else + .{ ._ps, if (aligned) .mova else .movu } }, 3...4 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) - .{ .v_, .movdqa } - else - .{ .v_, .movdqu } }, + return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } }, else => {}, } else @@ -16963,46 +18315,42 @@ fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool) .float => switch (ty.childType(zcu).floatBits(self.target.*)) { 16 => switch (ty.vectorLen(zcu)) { 1...8 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } - else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + .{ if (aligned) .v_dqa else .v_dqu, .mov } + else if (self.hasFeature(.sse2)) + .{ if (aligned) ._dqa else ._dqu, .mov } + else + .{ ._ps, if (aligned) .mova else .movu } }, 9...16 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) - .{ .v_, .movdqa } - else - .{ .v_, .movdqu } }, + return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } }, else => {}, }, 32 => switch (ty.vectorLen(zcu)) { 1...4 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } - else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, + .{ .v_ps, if (aligned) .mova else .movu } + else + .{ ._ps, if (aligned) .mova else .movu } }, 5...8 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) - .{ .v_ps, .mova } - else - .{ .v_ps, .movu } }, + return .{ .move = .{ .v_ps, if (aligned) .mova else .movu } }, else => {}, }, 64 => switch (ty.vectorLen(zcu)) { 1...2 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu } - else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } }, + .{ .v_pd, if (aligned) .mova else .movu } + else + .{ ._pd, if (aligned) .mova else .movu } }, 3...4 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) - .{ .v_pd, .mova } - else - .{ .v_pd, .movu } }, + return .{ .move = .{ .v_pd, if (aligned) .mova else .movu } }, else => {}, }, 128 => switch (ty.vectorLen(zcu)) { 1 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } - else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + .{ if (aligned) .v_dqa else .v_dqu, .mov } + else if (self.hasFeature(.sse2)) + .{ if (aligned) ._dqa else ._dqu, .mov } + else + .{ ._ps, if (aligned) .mova else .movu } }, 2 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) - .{ .v_, .movdqa } - else - .{ .v_, .movdqu } }, + return .{ .move = .{ if (aligned) .v_dqa else .v_dqu, .mov } }, else => {}, }, else => {}, @@ -17211,7 +18559,7 @@ fn genSetReg( src_reg, ), .x87, .mmx, .ip => unreachable, - .sse => try self.asmRegisterRegister( + .sse => if (self.hasFeature(.sse2)) try self.asmRegisterRegister( switch (abi_size) { 1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov }, 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov }, @@ -17219,7 +18567,20 @@ fn genSetReg( }, registerAlias(dst_reg, @max(abi_size, 4)), src_reg.to128(), - ), + ) else { + const frame_index = try self.allocFrameIndex(.init(.{ + .size = 4, + .alignment = .@"4", + })); + try self.asmMemoryRegister(.{ ._ss, .mov }, .{ + .base = .{ .frame = frame_index }, + .mod = .{ .rm = .{ .size = .dword } }, + }, src_reg.to128()); + try self.asmRegisterMemory(.{ ._, .mov }, registerAlias(dst_reg, abi_size), .{ + .base = .{ .frame = frame_index }, + .mod = .{ .rm = .{ .size = .fromSize(abi_size) } }, + }); + }, }, .segment => try self.asmRegisterRegister( .{ ._, .mov }, @@ -17264,17 +18625,17 @@ fn genSetReg( .sse => try self.asmRegisterRegister( @as(?Mir.Inst.FixedTag, switch (ty.scalarType(zcu).zigTypeTag(zcu)) { else => switch (abi_size) { - 1...16 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa }, - 17...32 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else null, + 1...16 => if (self.hasFeature(.avx)) .{ .v_dqa, .mov } else .{ ._dqa, .mov }, + 17...32 => if (self.hasFeature(.avx)) .{ .v_dqa, .mov } else null, else => null, }, .float => switch (ty.scalarType(zcu).floatBits(self.target.*)) { 16, 128 => switch (abi_size) { 2...16 => if (self.hasFeature(.avx)) - .{ .v_, .movdqa } + .{ .v_dqa, .mov } else - .{ ._, .movdqa }, - 17...32 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else null, + .{ ._dqa, .mov }, + 17...32 => if (self.hasFeature(.avx)) .{ .v_dqa, .mov } else null, else => null, }, 32 => if (self.hasFeature(.avx)) .{ .v_ps, .mova } else .{ ._ps, .mova }, @@ -17346,7 +18707,7 @@ fn genSetReg( const pack_lock = self.register_manager.lockReg(pack_reg); defer if (pack_lock) |lock| self.register_manager.unlockReg(lock); - var mask_size: u32 = @intCast(ty.vectorLen(zcu) * @divExact(src_reg_mask.info.scalar.bitSize(), 8)); + var mask_size: u32 = @intCast(ty.vectorLen(zcu) * @divExact(src_reg_mask.info.scalar.bitSize(self.target), 8)); switch (src_reg_mask.info.scalar) { else => {}, .word => { @@ -17355,7 +18716,7 @@ fn genSetReg( if (has_avx) { try self.asmRegisterRegisterRegister(.{ .vp_b, .ackssw }, pack_alias, src_alias, src_alias); } else { - try self.asmRegisterRegister(.{ ._, .movdqa }, pack_alias, src_alias); + try self.asmRegisterRegister(.{ ._dqa, .mov }, pack_alias, src_alias); try self.asmRegisterRegister(.{ .p_b, .ackssw }, pack_alias, pack_alias); } mask_size = std.math.divCeil(u32, mask_size, 2) catch unreachable; @@ -17592,7 +18953,10 @@ fn genSetMem( })).write( self, .{ .base = base, .mod = .{ .rm = .{ - .size = .fromBitSize(@min(self.memSize(ty).bitSize(), src_alias.bitSize())), + .size = .fromBitSize(@min( + self.memSize(ty).bitSize(self.target), + src_alias.bitSize(), + )), .disp = disp, } } }, src_alias, @@ -22460,36 +23824,6 @@ const Temp = struct { try cg.asmOpOnly(.{ .@"rep _sb", .mov }); } - // i, m, r - fn add(lhs: *Temp, rhs: *Temp, cg: *CodeGen) !Temp { - const res_index = cg.next_temp_index; - var res: Temp = .{ .index = res_index.toIndex() }; - try cg.select(&.{ &res, lhs, rhs }, .{ ._, .add }, &.{ - .{ .ops = &.{ .{ .match = 1 }, .r, .i } }, - .{ .ops = &.{ .{ .match = 1 }, .m, .i } }, - .{ .ops = &.{ .{ .match = 1 }, .r, .m } }, - .{ .ops = &.{ .{ .match = 1 }, .m, .r } }, - .{ .ops = &.{ .{ .match = 1 }, .r, .r } }, - }); - cg.next_temp_index = @enumFromInt(@intFromEnum(res_index) + 1); - cg.temp_type[@intFromEnum(res_index)] = lhs.typeOf(cg); - return res; - } - - fn mul(lhs: *Temp, rhs: *Temp, cg: *CodeGen) !Temp { - const res_index = cg.next_temp_index; - var res: Temp = .{ .index = cg.next_temp_index.toIndex() }; - try cg.select(&.{ &res, lhs, rhs }, .{ .i_, .mul }, &.{ - .{ .ops = &.{ .r, .m, .i } }, - .{ .ops = &.{ .r, .r, .i } }, - .{ .ops = &.{ .{ .match = 1 }, .r, .m } }, - .{ .ops = &.{ .{ .match = 1 }, .r, .r } }, - }); - cg.next_temp_index = @enumFromInt(@intFromEnum(res_index) + 1); - cg.temp_type[@intFromEnum(res_index)] = lhs.typeOf(cg); - return res; - } - fn moveTo(temp: Temp, inst: Air.Inst.Index, cg: *CodeGen) !void { if (cg.liveness.isUnused(inst)) try temp.die(cg) else switch (temp.unwrap(cg)) { .ref => { @@ -22722,1144 +24056,31 @@ const Operand = union(enum) { inst: Mir.Inst.Index, }; -const Pattern = struct { - ops: []const Op, - commute: struct { u8, u8 } = .{ 0, 0 }, - - const Set = struct { - required_features: []const std.Target.x86.Feature = &.{}, - scalar: union(enum) { - any, - bool, - float: Memory.Size, - any_int: Memory.Size, - signed_int: Memory.Size, - unsigned_int: Memory.Size, - any_float_or_int: Memory.Size, - } = .any, - clobbers: struct { eflags: bool = false } = .{}, - invert_result: bool = false, - loop: enum { - /// only execute the instruction once - once, - /// execute the instruction on all groups of non-overlapping bits in the entire value - bitwise, - /// for each element, execute the instruction on each limb, propagating the carry flag - limbwise_carry, - /// for each element, execute the instruction on each limb, propagating a register - limbwise_reduce, - /// for each element, execute the instruction on pairs of limbs, starting from the - /// least significant, propagating a limb - limbwise_pairs_forward, - /// for each element, execute the instruction on pairs of limbs, starting from the - /// most significant, propagating a limb - limbwise_pairs_reverse, - /// for each element, execute the instruction - elementwise, - } = .once, - mir_tag: Mir.Inst.FixedTag, - final_mir_tag: ?Mir.Inst.FixedTag = null, - patterns: []const Pattern, - }; - - const Op = union(enum) { - /// reuse another operand - implicit: u8, - /// repeat another operand - explicit: u8, - /// a condition code - cc, - /// any general purpose register - gpr, - /// any 64-bit mmx register - mm, - /// any 128-bit sse register - xmm, - /// any 256-bit sse register - ymm, - /// a 64-bit mmx register mask - mm_mask, - /// a 128-bit sse register mask - xmm_mask, - /// a 256-bit sse register mask - ymm_mask, - /// a 64-bit mmx register sign mask - mm_sign_mask, - /// a 128-bit sse register sign mask - xmm_sign_mask, - /// a 256-bit sse register sign mask - ymm_sign_mask, - /// any memory - mem, - /// a limb stored in a general purpose register - gpr_limb, - /// a limb stored in a 64-bit mmx register - mm_limb, - /// a limb stored in a 128-bit sse register - xmm_limb, - /// a limb stored in a 256-bit sse register - ymm_limb, - /// a limb stored in memory - mem_limb, - /// a mutable limb stored in a general purpose register - mut_gpr_limb, - /// a mutable limb stored in memory - mut_mem_limb, - /// an element stored in a condition code - cc_elem, - /// an element stored in a general purpose register - gpr_elem, - /// an element stored in memory - mem_elem, - /// a limb stored in a 64-bit mmx register mask - mm_mask_limb, - /// a limb stored in a 128-bit sse register masuk - xmm_mask_limb, - /// a limb stored in a 256-bit sse register masuk - ymm_mask_limb, - /// specific immediate - imm: i8, - /// any immediate signed extended from 32 bits - simm32, - /// a temp general purpose register containing all ones - umax_gpr, - /// a temp 64-bit mmx register containing all ones - umax_mm, - /// a temp 128-bit sse register containing all ones - umax_xmm, - /// a temp 256-bit sse register containing all ones - umax_ymm, - - fn matches(op: Op, is_mut: bool, temp: Temp, cg: *CodeGen) bool { - switch (op) { - .implicit, .explicit, .cc, .cc_elem => unreachable, - else => {}, - // temp is undefined - .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => return true, - } - const temp_ty = temp.typeOf(cg); - const abi_size = temp_ty.abiSize(cg.pt.zcu); - return switch (op) { - .implicit, .explicit, .cc, .cc_elem, .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => unreachable, - .gpr => abi_size <= 8 and switch (temp.tracking(cg).short) { - .register => |reg| reg.class() == .general_purpose, - .register_offset => |reg_off| reg_off.reg.class() == .general_purpose and - reg_off.off == 0, - else => cg.regClassForType(temp_ty) == .general_purpose, - }, - .mm, .mm_mask, .mm_sign_mask => abi_size <= 8 and switch (temp.tracking(cg).short) { - .register => |reg| reg.class() == .mmx, - .register_offset => |reg_off| reg_off.reg.class() == .mmx and reg_off.off == 0, - else => cg.regClassForType(temp_ty) == .mmx, - }, - .xmm, .xmm_mask, .xmm_sign_mask => abi_size > 8 and abi_size <= 16 and switch (temp.tracking(cg).short) { - .register => |reg| reg.class() == .sse, - .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0, - else => cg.regClassForType(temp_ty) == .sse, - }, - .ymm, .ymm_mask, .ymm_sign_mask => abi_size > 16 and abi_size <= 32 and switch (temp.tracking(cg).short) { - .register => |reg| reg.class() == .sse, - .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0, - else => cg.regClassForType(temp_ty) == .sse, - }, - .mem, .mem_limb, .mut_mem_limb, .mem_elem => (!is_mut or temp.isMut(cg)) and temp.tracking(cg).short.isMemory(), - .gpr_limb, .mut_gpr_limb, .gpr_elem => abi_size > 8 and switch (temp.tracking(cg).short) { - .register, .register_pair, .register_triple, .register_quadruple => true, - else => |mcv| mcv.isMemory(), - }, - .mm_limb, .mm_mask_limb => abi_size > 8 and switch (temp.tracking(cg).short) { - inline .register_pair, .register_triple, .register_quadruple => |regs| for (regs) |reg| { - if (reg.class() != .mmx) break false; - } else true, - else => |mcv| mcv.isMemory() and cg.regClassForType(temp_ty) == .mmx, - }, - .xmm_limb, .xmm_mask_limb => abi_size > 16 and switch (temp.tracking(cg).short) { - inline .register_pair, .register_triple, .register_quadruple => |regs| for (regs) |reg| { - if (reg.class() != .sse) break false; - } else true, - else => |mcv| mcv.isMemory(), - }, - .ymm_limb, .ymm_mask_limb => abi_size > 32 and switch (temp.tracking(cg).short) { - inline .register_pair, .register_triple, .register_quadruple => |regs| for (regs) |reg| { - if (reg.class() != .sse) break false; - } else true, - else => |mcv| mcv.isMemory(), - }, - .imm => |specific_imm| if (is_mut) unreachable else switch (temp.tracking(cg).short) { - .immediate => |imm| @as(i64, @bitCast(imm)) == specific_imm, - else => false, - }, - .simm32 => if (is_mut) unreachable else switch (temp.tracking(cg).short) { - .immediate => |imm| abi_size <= 4 or std.math.cast(i32, @as(i64, @bitCast(imm))) != null, - else => false, - }, - }; - } - }; - - const Instruction = struct { - mir_tag: Mir.Inst.FixedTag, - operands: [4]Instruction.Operand, - - const Operand = union(enum) { - regb: u8, - regw: u8, - ereg: u8, - rreg: u8, - xmm: u8, - ymm: u8, - }; - }; -}; -const SelectOptions = struct { - cc: ?Condition = null, - invert_result: bool = false, -}; -fn select( +const Select = struct { cg: *CodeGen, - dst_temps: []Temp, - dst_tys: []const Type, - src_temps: []Temp, - pattern_sets: []const Pattern.Set, - opts: SelectOptions, -) !void { - var loop: struct { - element_reloc: Mir.Inst.Index, - element_offset: Offset, - element_size: ?u13, - limb_reloc: Mir.Inst.Index, - limb_offset: Offset, - limb_size: ?u8, - shuffle_temp: ?Temp, - mask_limb_temp: ?Temp, - mask_limb_offset: Offset, - mask_limb_offset_lock: ?RegisterLock, - mask_limb_bit_size: ?u7, - mask_store_temp: ?Temp, - mask_store_reg: ?Register, - mask_store_bit_size: ?u7, - remaining_size: ?u64, - - const Offset = union(enum) { - unused, - known: u31, - temp: Temp, - }; - } = .{ - .element_reloc = undefined, - .element_offset = .unused, - .element_size = null, - .limb_reloc = undefined, - .limb_offset = .unused, - .limb_size = null, - .shuffle_temp = null, - .mask_limb_temp = null, - .mask_limb_offset = .unused, - .mask_limb_offset_lock = null, - .mask_limb_bit_size = null, - .mask_store_temp = null, - .mask_store_reg = null, - .mask_store_bit_size = null, - .remaining_size = null, - }; - var extra_temps: [4]?Temp = @splat(null); - pattern_sets: for (pattern_sets) |pattern_set| { - for (pattern_set.required_features) |required_feature| if (!cg.hasFeature(required_feature)) continue :pattern_sets; - for (src_temps) |src_temp| switch (pattern_set.scalar) { - .any => {}, - .bool => if (src_temp.typeOf(cg).scalarType(cg.pt.zcu).toIntern() != .bool_type) continue :pattern_sets, - .float => |size| { - const scalar_ty = src_temp.typeOf(cg).scalarType(cg.pt.zcu); - if (!scalar_ty.isRuntimeFloat()) continue :pattern_sets; - if (scalar_ty.floatBits(cg.target.*) != size.bitSize()) continue :pattern_sets; - }, - .any_int => |size| { - const scalar_ty = src_temp.typeOf(cg).scalarType(cg.pt.zcu); - if (!scalar_ty.isAbiInt(cg.pt.zcu)) continue :pattern_sets; - if (scalar_ty.intInfo(cg.pt.zcu).bits > size.bitSize()) continue :pattern_sets; - }, - .signed_int => |size| { - const scalar_ty = src_temp.typeOf(cg).scalarType(cg.pt.zcu); - if (!scalar_ty.isAbiInt(cg.pt.zcu)) continue :pattern_sets; - const scalar_info = scalar_ty.intInfo(cg.pt.zcu); - if (scalar_info.signedness != .signed) continue :pattern_sets; - if (scalar_info.bits > size.bitSize()) continue :pattern_sets; - }, - .unsigned_int => |size| { - const scalar_ty = src_temp.typeOf(cg).scalarType(cg.pt.zcu); - if (!scalar_ty.isAbiInt(cg.pt.zcu)) continue :pattern_sets; - const scalar_info = scalar_ty.intInfo(cg.pt.zcu); - if (scalar_info.signedness != .unsigned) continue :pattern_sets; - if (scalar_info.bits > size.bitSize()) continue :pattern_sets; - }, - .any_float_or_int => |size| { - const scalar_ty = src_temp.typeOf(cg).scalarType(cg.pt.zcu); - if (scalar_ty.isRuntimeFloat()) { - if (scalar_ty.floatBits(cg.target.*) != size.bitSize()) continue :pattern_sets; - } else if (scalar_ty.isAbiInt(cg.pt.zcu)) { - if (scalar_ty.intInfo(cg.pt.zcu).bits > size.bitSize()) continue :pattern_sets; - } else continue :pattern_sets; - }, - }; - patterns: for (pattern_set.patterns) |pattern| { - for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| { - const ref_src_op, const is_mut = switch (src_op) { - .implicit, .explicit => |linked_index| .{ pattern.ops[linked_index], true }, - .mut_mem_limb, .mut_gpr_limb => .{ src_op, true }, - else => .{ src_op, false }, - }; - if (!ref_src_op.matches(is_mut, src_temp, cg)) continue :patterns; - } - - for (pattern.ops) |op| switch (op) { - else => {}, - .cc_elem, - .mm_mask_limb, - .xmm_mask_limb, - .ymm_mask_limb, - => if (loop.mask_limb_offset_lock == null and !cg.hasFeature(.bmi2)) { - try cg.register_manager.getKnownReg(.rcx, null); - loop.mask_limb_offset_lock = cg.register_manager.lockKnownRegAssumeUnused(.rcx); - }, - }; - while (true) for (src_temps, pattern.ops[dst_temps.len..]) |*src_temp, src_op| { - if (switch (switch (src_op) { - .implicit, .explicit => |linked_index| pattern.ops[linked_index], - else => src_op, - }) { - .implicit, .explicit, .cc, .cc_elem => unreachable, - .gpr => try src_temp.toRegClass(true, .general_purpose, cg), - .mm, .mm_mask, .mm_sign_mask => try src_temp.toRegClass(true, .mmx, cg), - .xmm, - .ymm, - .xmm_mask, - .ymm_mask, - .xmm_sign_mask, - .ymm_sign_mask, - => try src_temp.toRegClass(true, .sse, cg), - .mem => try src_temp.toBase(cg), - .imm, .simm32 => false, - .gpr_limb, - .mm_limb, - .xmm_limb, - .ymm_limb, - .mem_limb, - .mut_gpr_limb, - .mut_mem_limb, - .gpr_elem, - .mem_elem, - => switch (src_temp.tracking(cg).short) { - .register, .register_pair, .register_triple, .register_quadruple => false, - else => try src_temp.toBase(cg), - }, - .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => false, - .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => false, - }) break; - } else break; - - const invert_result = opts.invert_result != pattern_set.invert_result; - var dst_is_linked: std.StaticBitSet(4) = .initEmpty(); - var mir_ops_len: usize = 0; - for (pattern.ops[0..dst_temps.len]) |dst_op| switch (dst_op) { - else => mir_ops_len += 1, - .cc, .cc_elem => {}, - }; - const dst_mir_ops_len = mir_ops_len; - for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| { - defer mir_ops_len += @intFromBool(src_op != .implicit); - const linked_src_op, const extra_temp = op: switch (src_op) { - .implicit, .explicit => |linked_index| { - if (src_temp.isMut(cg)) { - dst_temps[linked_index] = src_temp; - dst_is_linked.set(linked_index); - } - break :op .{ pattern.ops[linked_index], &extra_temps[linked_index] }; - }, - else => .{ src_op, &extra_temps[mir_ops_len] }, - }; - const limb_size: u8, const rc = switch (linked_src_op) { - else => continue, - .gpr_limb, .mut_gpr_limb, .gpr_elem => .{ @intCast(@divExact(Memory.Size.bitSize(switch (pattern_set.scalar) { - .any => .qword, - .bool => unreachable, - .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size, - }), 8)), abi.RegisterClass.gp }, - .mm_limb, .mm_mask_limb => .{ 8, @panic("TODO") }, - .xmm_limb, .xmm_mask_limb => .{ 16, abi.RegisterClass.sse }, - .ymm_limb, .ymm_mask_limb => .{ 32, abi.RegisterClass.sse }, - .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => { - assert(extra_temp.* == null); - extra_temp.* = try cg.tempAllocReg(.noreturn, switch (linked_src_op) { - else => unreachable, - .umax_gpr => abi.RegisterClass.gp, - .umax_mm => @panic("TODO"), - .umax_xmm, .umax_ymm => abi.RegisterClass.sse, - }); - continue; - }, - }; - assert(loop.limb_size == null or loop.limb_size == limb_size); - loop.limb_size = limb_size; - loop.remaining_size = loop.remaining_size orelse src_temp.typeOf(cg).abiSize(cg.pt.zcu); - const src_mcv = src_temp.tracking(cg).short; - switch (src_mcv) { - .register, .register_pair, .register_triple, .register_quadruple => { - switch (loop.limb_offset) { - .unused, .temp => loop.limb_offset = .{ .known = 0 }, - .known => {}, - } - if (switch (linked_src_op) { - .mut_gpr_limb => true, - else => !rc.isSet(RegisterManager.indexOfRegIntoTracked(src_mcv.getRegs()[0]).?), - }) { - if (loop.shuffle_temp == null) loop.shuffle_temp = try cg.tempAllocReg(.noreturn, abi.RegisterClass.sse); - assert(extra_temp.* == null); - extra_temp.* = try cg.tempAllocReg(.usize, rc); - } - }, - else => { - switch (loop.limb_offset) { - .unused => loop.limb_offset = .{ .temp = undefined }, - .known, .temp => {}, - } - assert(extra_temp.* == null); - extra_temp.* = try cg.tempAllocReg(.usize, rc); - }, - } - } - for ( - 0.., - dst_temps, - pattern.ops[0..dst_temps.len], - dst_tys, - extra_temps[0..dst_temps.len], - ) |dst_index, *dst_temp, dst_op, dst_ty, *extra_temp| switch (dst_op) { - else => if (!dst_is_linked.isSet(dst_index)) { - dst_temp.* = dst_temp: switch (dst_op) { - .implicit => unreachable, - .explicit => |linked_index| dst_temps[linked_index], - .cc => try cg.tempFromValue(.bool, .{ .eflags = switch (invert_result) { - false => opts.cc.?, - true => opts.cc.?.negate(), - } }), - .gpr => try cg.tempAllocReg(dst_ty, abi.RegisterClass.gp), - .mm, .mm_mask, .mm_sign_mask => @panic("TODO"), - .xmm, .xmm_mask, .xmm_sign_mask => try cg.tempAllocReg(dst_ty, abi.RegisterClass.sse), - .ymm, .ymm_mask, .ymm_sign_mask => try cg.tempAllocReg(dst_ty, abi.RegisterClass.sse), - .mem => @panic("TODO"), - .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb, .mut_gpr_limb, .gpr_elem => { - if (extra_temp.* == null) extra_temp.* = try cg.tempAllocReg(.noreturn, switch (dst_op) { - else => unreachable, - .gpr_limb, .mut_gpr_limb, .gpr_elem => abi.RegisterClass.gp, - .mm_limb => @panic("TODO"), - .xmm_limb, .ymm_limb => abi.RegisterClass.sse, - }); - break :dst_temp try cg.tempAlloc(dst_ty); - }, - .mem_limb, .mut_mem_limb, .mem_elem => try cg.tempAlloc(dst_ty), - .cc_elem, .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => unreachable, // already checked - .imm, .simm32, .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => unreachable, // unmodifiable destination - }; - }, - .cc_elem, .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => { - const scalar_size = @divExact(Memory.Size.bitSize(switch (pattern_set.scalar) { - .any => .qword, - .bool => unreachable, - .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size, - }), 8); - const mask_bit_size = @divExact(loop.remaining_size.?, scalar_size); - const mask_limb_bit_size: u7 = @intCast(@divExact(loop.limb_size.?, scalar_size)); - assert(loop.mask_limb_bit_size == null or loop.mask_limb_bit_size == mask_limb_bit_size); - loop.mask_limb_bit_size = mask_limb_bit_size; - const mask_store_bit_size = mask_store_bit_size: { - // Try to match limb size so that no shifting will be needed. - if (mask_limb_bit_size % 8 == 0) break :mask_store_bit_size mask_limb_bit_size; - // If abi size <= 8 the entire value can be stored at once, - // enabling store forwarding and minimizing store buffer usage. - // Otherwise, we will be performing shifts that need to wrap at - // store size, which for x86 requires 32 or 64, so just pick 64 - // for the same reasons as above. - break :mask_store_bit_size @min(mask_bit_size, 64); - }; - assert(loop.mask_store_bit_size == null or loop.mask_store_bit_size == mask_store_bit_size); - loop.mask_store_bit_size = mask_store_bit_size; - loop.mask_limb_offset = loop.limb_offset; - if (loop.mask_limb_temp == null) { - loop.mask_limb_temp = try cg.tempAllocReg(.usize, abi.RegisterClass.gp); - if (dst_op == .cc_elem and mask_store_bit_size > 8) { - // setcc only clears 8 bits - const mask_limb_alias = loop.mask_limb_temp.?.tracking(cg).short.register.to32(); - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterRegister(.{ ._, .xor }, mask_limb_alias, mask_limb_alias); - } - } - if (mask_limb_bit_size < mask_store_bit_size and loop.mask_store_reg == null) { - loop.mask_store_temp = try cg.tempAllocReg(.usize, abi.RegisterClass.gp); - loop.mask_store_reg = loop.mask_store_temp.?.tracking(cg).short.register; - } - dst_temp.* = if (mask_store_bit_size < mask_bit_size) - try cg.tempAllocMem(dst_ty) - else if (loop.mask_store_temp) |mask_store_temp| dst_temp: { - loop.mask_store_temp = null; - break :dst_temp mask_store_temp; - } else try cg.tempAlloc(dst_ty); - }, - }; - switch (loop.mask_limb_offset) { - .unused, .known => {}, - .temp => |*mask_limb_offset| { - mask_limb_offset.* = if (cg.hasFeature(.bmi2)) - try cg.tempAllocReg(.usize, abi.RegisterClass.gp) - else if (loop.mask_limb_offset_lock != null) - try cg.tempFromValue(.usize, .{ .register = .rcx }) - else - unreachable; - if (loop.mask_store_reg) |mask_store_reg| { - const mask_store_alias = - if (loop.mask_store_bit_size.? > 8) mask_store_reg.to32() else mask_store_reg.to8(); - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterRegister(.{ ._, .xor }, mask_store_alias, mask_store_alias); - } - }, - } - if (loop.mask_limb_offset_lock) |lock| cg.register_manager.unlockReg(lock); - loop.mask_limb_offset_lock = null; - switch (loop.element_offset) { - .unused, .known => {}, - .temp => |*element_offset| { - element_offset.* = try cg.tempAllocReg(.usize, abi.RegisterClass.gp); - const element_offset_reg = element_offset.tracking(cg).short.register; - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterRegister(.{ ._, .xor }, element_offset_reg.to32(), element_offset_reg.to32()); - loop.element_reloc = @intCast(cg.mir_instructions.len); - }, - } - switch (loop.limb_offset) { - .unused, .known => {}, - .temp => |*limb_offset| limb_offset.* = try cg.tempAllocReg(.usize, abi.RegisterClass.gp), - } - while (true) { - switch (loop.mask_limb_offset) { - .unused, .known => {}, - .temp => |mask_limb_offset| { - const mask_limb_offset_reg = mask_limb_offset.tracking(cg).short.register.to32(); - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterRegister(.{ ._, .xor }, mask_limb_offset_reg, mask_limb_offset_reg); - }, - } - switch (loop.limb_offset) { - .unused, .known => {}, - .temp => |limb_offset| { - const limb_offset_reg = limb_offset.tracking(cg).short.register.to32(); - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterRegister(.{ ._, .xor }, limb_offset_reg.to32(), limb_offset_reg.to32()); - loop.limb_reloc = @intCast(cg.mir_instructions.len); - }, - } - while (true) { - var mir_ops: [4]Operand = @splat(.none); - mir_ops_len = dst_mir_ops_len; - for (src_temps, pattern.ops[dst_temps.len..]) |src_temp, src_op| { - defer mir_ops_len += @intFromBool(src_op != .implicit); - const mir_op, const linked_src_op, const extra_temp = switch (src_op) { - .implicit => |linked_index| .{ &mir_ops[linked_index], pattern.ops[linked_index], extra_temps[linked_index] }, - .explicit => |linked_index| .{ &mir_ops[mir_ops_len], pattern.ops[linked_index], extra_temps[linked_index] }, - else => .{ &mir_ops[mir_ops_len], src_op, extra_temps[mir_ops_len] }, - }; - const src_mcv = switch (linked_src_op) { - else => src_temp, - // src_temp is undefined - .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => extra_temp.?, - }.tracking(cg).short; - switch (linked_src_op) { - else => {}, - .gpr_limb, - .mm_limb, - .xmm_limb, - .ymm_limb, - .mut_gpr_limb, - .gpr_elem, - .mm_mask_limb, - .xmm_mask_limb, - .ymm_mask_limb, - => if (extra_temp) |limb_temp| switch (src_mcv) { - .register, .register_pair, .register_triple, .register_quadruple => { - const limb_reg = registerAlias(limb_temp.tracking(cg).short.register, loop.limb_size.?); - const src_regs = src_mcv.getRegs(); - const src_reg_size: u32 = @intCast(switch (src_mcv) { - .register => src_temp.typeOf(cg).abiSize(cg.pt.zcu), - else => @divExact(src_regs[0].bitSize(), 8), - }); - const src_reg = src_regs[loop.limb_offset.known / src_reg_size]; - assert(src_mcv == .register or src_reg.bitSize() == 8 * src_reg_size); - switch (src_reg.class()) { - else => unreachable, - .general_purpose => try cg.asmRegisterRegister( - .{ ._, .mov }, - limb_reg, - registerAlias(src_reg, src_reg_size), - ), - .sse => { - assert(src_reg_size == 16); - const limb_alias_size = @max(loop.limb_size.?, 4); - const limb_alias = registerAlias(limb_reg, limb_alias_size); - const src_reg_offset = loop.limb_offset.known % src_reg_size; - switch (limb_reg_offset: { - extr: { - const limb_size = if (cg.hasFeature(.sse4_1)) loop.limb_size.? else 2; - if (loop.limb_size.? > limb_size) break :extr; - const limb_offset = src_reg_offset / limb_size; - if (limb_offset == 0) break :extr; - try cg.asmRegisterRegisterImmediate(.{ switch (limb_size) { - else => unreachable, - 1 => if (cg.hasFeature(.avx)) .vp_b else .p_b, - 2 => if (cg.hasFeature(.avx)) .vp_w else .p_w, - 4 => if (cg.hasFeature(.avx)) .vp_d else .p_d, - 8 => if (cg.hasFeature(.avx)) .vp_q else .p_q, - }, .extr }, limb_alias, src_reg.to128(), .u(limb_offset)); - break :limb_reg_offset src_reg_offset % limb_size; - } - try cg.asmRegisterRegister( - .{ switch (limb_alias_size) { - else => unreachable, - 4 => ._d, - 8 => ._q, - }, .mov }, - limb_alias, - if (src_reg_offset < limb_alias_size) src_reg.to128() else shuffle_reg: { - const shuffle_reg = loop.shuffle_temp.?.tracking(cg).short.register.to128(); - const mir_fixes: Mir.Inst.Fixes = if (cg.hasFeature(.sse2)) - if (src_temp.typeOf(cg).scalarType(cg.pt.zcu).isRuntimeFloat()) switch (limb_alias_size) { - else => unreachable, - 4 => if (cg.hasFeature(.avx)) .v_ps else ._ps, - 8 => if (cg.hasFeature(.avx)) .v_pd else ._pd, - } else if (cg.hasFeature(.avx)) .vp_d else .p_d - else - ._ps; - try cg.asmRegisterRegisterImmediate( - .{ mir_fixes, .shuf }, - shuffle_reg, - src_reg: switch (mir_fixes) { - else => unreachable, - ._ps, ._pd => { - try cg.asmRegisterRegister(.{ mir_fixes, .mova }, shuffle_reg, src_reg.to128()); - break :src_reg shuffle_reg; - }, - .p_d => src_reg.to128(), - }, - .u(switch (mir_fixes) { - else => unreachable, - .v_ps, ._ps, .vp_d, .p_d => switch (limb_alias_size) { - else => unreachable, - 4 => switch (src_reg_offset) { - else => unreachable, - 4...7 => 0b01_01_01_01, - 8...11 => 0b10_10_10_10, - 12...15 => 0b11_11_11_11, - }, - 8 => switch (src_reg_offset) { - else => unreachable, - 8...15 => 0b11_10_11_10, - }, - }, - .v_pd, ._pd => switch (limb_alias_size) { - else => unreachable, - 8 => switch (src_reg_offset) { - else => unreachable, - 8...15 => 0b1_1, - }, - }, - }), - ); - break :shuffle_reg shuffle_reg; - }, - ); - break :limb_reg_offset src_reg_offset % limb_alias_size; - }) { - 0 => {}, - else => |limb_reg_offset| { - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterImmediate(.{ ._r, .sh }, limb_alias, .u(limb_reg_offset * 8)); - }, - } - }, - } - }, - else => try cg.asmRegisterMemory( - switch (linked_src_op) { - else => unreachable, - .gpr_limb, .mut_gpr_limb, .gpr_elem => .{ ._, .mov }, - .mm_limb, .mm_mask_limb => .{ ._q, .mov }, - .xmm_limb, - .ymm_limb, - .xmm_mask_limb, - .ymm_mask_limb, - => .{ if (cg.hasFeature(.avx)) .v_ else ._, .movdqu }, - }, - registerAlias(limb_temp.tracking(cg).short.register, loop.limb_size.?), - try src_mcv.mem(cg, switch (loop.limb_offset) { - .unused => unreachable, - .known => |limb_offset| .{ - .size = .fromSize(loop.limb_size.?), - .disp = limb_offset, - }, - .temp => |limb_offset| .{ - .size = .fromSize(loop.limb_size.?), - .index = limb_offset.tracking(cg).short.register.to64(), - }, - }), - ), - }, - } - mir_op.* = switch (linked_src_op) { - .implicit, .explicit, .cc, .cc_elem => unreachable, - .gpr => .{ .reg = registerAlias( - src_mcv.register, - @intCast(src_temp.typeOf(cg).abiSize(cg.pt.zcu)), - ) }, - .umax_gpr => .{ .reg = src_mcv.register.to64() }, // TODO: use other op size? - .mm, .mm_mask, .mm_sign_mask, .umax_mm => .{ .reg = src_mcv.register }, - .xmm, .xmm_mask, .xmm_sign_mask, .umax_xmm => .{ .reg = src_mcv.register.to128() }, - .ymm, .ymm_mask, .ymm_sign_mask, .umax_ymm => .{ .reg = src_mcv.register.to256() }, - .mem => .{ .mem = try src_mcv.mem(cg, .{ .size = cg.memSize(src_temp.typeOf(cg)) }) }, - .gpr_limb, - .mm_limb, - .xmm_limb, - .ymm_limb, - .mut_gpr_limb, - .gpr_elem, - .mm_mask_limb, - .xmm_mask_limb, - .ymm_mask_limb, - => .{ .reg = registerAlias(if (extra_temp) |limb_temp| - limb_temp.tracking(cg).short.register - else - src_mcv.getRegs()[@divExact(loop.limb_offset.known, loop.limb_size.?)], loop.limb_size.?) }, - .mem_limb, .mut_mem_limb, .mem_elem => .{ .mem = switch (src_mcv) { - .register, .register_pair, .register_triple, .register_quadruple => unreachable, - else => switch (loop.limb_offset) { - .unused => unreachable, - .known => |limb_offset| try src_mcv.mem(cg, .{ - .size = .fromSize(loop.limb_size.?), - .disp = limb_offset, - }), - .temp => |limb_offset| try src_mcv.mem(cg, .{ - .size = .fromSize(loop.limb_size.?), - .index = limb_offset.tracking(cg).short.register.to64(), - }), - }, - } }, - .imm => |imm| .{ .imm = .s(imm) }, - .simm32 => switch (src_temp.typeOf(cg).abiSize(cg.pt.zcu)) { - else => unreachable, - 1 => .{ .imm = if (std.math.cast(i8, @as(i64, @bitCast(src_mcv.immediate)))) |small| - .s(small) - else - .u(@as(u8, @intCast(src_mcv.immediate))) }, - 2 => .{ .imm = if (std.math.cast(i16, @as(i64, @bitCast(src_mcv.immediate)))) |small| - .s(small) - else - .u(@as(u16, @intCast(src_mcv.immediate))) }, - 3...8 => .{ .imm = if (std.math.cast(i32, @as(i64, @bitCast(src_mcv.immediate)))) |small| - .s(small) - else - .u(@as(u32, @intCast(src_mcv.immediate))) }, - }, - }; - switch (src_op) { - else => {}, - .explicit => |linked_index| mir_ops[linked_index] = mir_op.*, - } - } - for ( - mir_ops[0..dst_mir_ops_len], - pattern.ops[0..dst_mir_ops_len], - dst_temps[0..dst_mir_ops_len], - dst_tys[0..dst_mir_ops_len], - extra_temps[0..dst_mir_ops_len], - ) |*mir_op, dst_op, dst_temp, dst_ty, extra_temp| { - if (mir_op.* != .none) continue; - mir_op.* = switch (dst_op) { - .implicit, .cc, .cc_elem => unreachable, - .explicit => |linked_index| mir_ops[linked_index], - .gpr => .{ .reg = registerAlias( - dst_temp.tracking(cg).short.register, - @intCast(dst_ty.abiSize(cg.pt.zcu)), - ) }, - .mm, .mm_mask, .mm_sign_mask => @panic("TODO"), - .xmm, .xmm_mask, .xmm_sign_mask => .{ .reg = dst_temp.tracking(cg).short.register.to128() }, - .ymm, .ymm_mask, .ymm_sign_mask => .{ .reg = dst_temp.tracking(cg).short.register.to256() }, - .mem => @panic("TODO"), - .gpr_limb, .mut_gpr_limb, .gpr_elem => .{ .reg = registerAlias( - extra_temp.?.tracking(cg).short.register, - @intCast(@divExact(Memory.Size.bitSize(switch (pattern_set.scalar) { - .any => .qword, - .bool => unreachable, - .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size, - }), 8)), - ) }, - .mm_limb => .{ .reg = extra_temp.?.tracking(cg).short.register }, - .xmm_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to128() }, - .ymm_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to256() }, - .mem_limb, .mut_mem_limb, .mem_elem => .{ .mem = try dst_temp.tracking(cg).short.mem(cg, switch (loop.limb_offset) { - .unused => unreachable, - .known => |limb_offset| .{ - .size = .fromSize(loop.limb_size.?), - .disp = limb_offset, - }, - .temp => |limb_offset| .{ - .size = .fromSize(loop.limb_size.?), - .index = limb_offset.tracking(cg).short.register.to64(), - }, - }) }, - .mm_mask_limb => .{ .reg = extra_temp.?.tracking(cg).short.register }, - .xmm_mask_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to128() }, - .ymm_mask_limb => .{ .reg = extra_temp.?.tracking(cg).short.register.to256() }, - .imm, .simm32, .umax_gpr, .umax_mm, .umax_xmm, .umax_ymm => unreachable, // unmodifiable destination - }; - } - std.mem.swap(Operand, &mir_ops[pattern.commute[0]], &mir_ops[pattern.commute[1]]); - if (pattern_set.clobbers.eflags) try cg.spillEflagsIfOccupied(); - cg.asmOps((if (loop.remaining_size != null and loop.limb_size != null and - loop.remaining_size.? <= loop.limb_size.?) - pattern_set.final_mir_tag - else - null) orelse pattern_set.mir_tag, mir_ops) catch |err| switch (err) { - error.InvalidInstruction => { - const fixes = @tagName(pattern_set.mir_tag[0]); - const fixes_blank = std.mem.indexOfScalar(u8, fixes, '_').?; - return cg.fail( - "invalid instruction: '{s}{s}{s} {s} {s} {s} {s}'", - .{ - fixes[0..fixes_blank], - @tagName(pattern_set.mir_tag[1]), - fixes[fixes_blank + 1 ..], - @tagName(mir_ops[0]), - @tagName(mir_ops[1]), - @tagName(mir_ops[2]), - @tagName(mir_ops[3]), - }, - ); - }, - else => |e| return e, - }; - for ( - extra_temps[0..dst_temps.len], - pattern.ops[0..dst_temps.len], - mir_ops[0..dst_temps.len], - dst_temps, - ) |extra_temp, dst_op, mir_op, dst_temp| switch (dst_op) { - else => if (invert_result) { - try cg.spillEflagsIfOccupied(); - cg.asmOps( - .{ ._, .not }, - .{ mir_op, .none, .none, .none }, - ) catch |err| switch (err) { - error.InvalidInstruction => return cg.fail( - "invalid instruction: 'not {s} none none none'", - .{@tagName(mir_op)}, - ), - else => |e| return e, - }; - }, - .mm_mask, - .xmm_mask, - .ymm_mask, - .mm_sign_mask, - .xmm_sign_mask, - .ymm_sign_mask, - => dst_temp.asMask(.{ - .kind = switch (dst_op) { - else => unreachable, - .mm_mask, .xmm_mask, .ymm_mask => .all, - .mm_sign_mask, .xmm_sign_mask, .ymm_sign_mask => .sign, - }, - .inverted = invert_result, - .scalar = switch (pattern_set.scalar) { - .any, .bool => unreachable, - .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size, - }, - }, cg), - .gpr_limb, .mm_limb, .xmm_limb, .ymm_limb, .mut_gpr_limb, .gpr_elem => if (extra_temp) |limb_temp| { - const dst_mcv = dst_temp.tracking(cg).short; - switch (dst_mcv) { - .register_pair, .register_triple, .register_quadruple => try cg.asmRegisterRegister( - .{ ._, .mov }, - dst_mcv.getRegs()[@divExact(loop.limb_offset.known, loop.limb_size.?)].to64(), - limb_temp.tracking(cg).short.register.to64(), - ), - else => try cg.asmMemoryRegister( - switch (dst_op) { - else => unreachable, - .gpr_limb, .mut_gpr_limb, .gpr_elem => .{ ._, .mov }, - .mm_limb => .{ ._q, .mov }, - .xmm_limb, .ymm_limb => .{ if (cg.hasFeature(.avx)) .v_ else ._, .movdqu }, - }, - try dst_mcv.mem(cg, switch (loop.limb_offset) { - .unused => unreachable, - .known => |limb_offset| .{ - .size = .fromSize(loop.limb_size.?), - .disp = limb_offset, - }, - .temp => |limb_offset| .{ - .size = .fromSize(loop.limb_size.?), - .index = limb_offset.tracking(cg).short.register.to64(), - }, - }), - registerAlias(limb_temp.tracking(cg).short.register, loop.limb_size.?), - ), - } - }, - .cc_elem, .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => { - const scalar_size = switch (pattern_set.scalar) { - .any => .qword, - .bool => unreachable, - .float, .any_int, .signed_int, .unsigned_int, .any_float_or_int => |size| size, - }; - const mask_store_size: u4 = - @intCast(std.math.divCeil(u7, loop.mask_store_bit_size.?, 8) catch unreachable); - const known_shl_count = if (loop.mask_store_reg) |_| switch (loop.mask_limb_offset) { - .unused => unreachable, - .known => |mask_limb_offset| mask_limb_offset & (loop.mask_store_bit_size.? - 1), - .temp => null, - } else null; - const mask_limb_reg = registerAlias(if (known_shl_count != 0) - loop.mask_limb_temp.?.tracking(cg).short.register - else - loop.mask_store_reg.?, mask_store_size); - switch (dst_op) { - else => unreachable, - .cc_elem => try cg.asmSetccRegister(switch (invert_result) { - false => opts.cc.?, - true => opts.cc.?.negate(), - }, mask_limb_reg.to8()), - .mm_mask_limb, .xmm_mask_limb, .ymm_mask_limb => { - if (scalar_size == .word) if (cg.hasFeature(.avx)) try cg.asmRegisterRegisterRegister( - .{ .vp_b, .ackssw }, - mir_op.reg, - mir_op.reg, - mir_op.reg, - ) else try cg.asmRegisterRegister( - .{ .p_b, .ackssw }, - mir_op.reg, - mir_op.reg, - ); - try cg.asmRegisterRegister(switch (scalar_size) { - else => unreachable, - .byte, .word => .{ if (cg.hasFeature(.avx)) .vp_b else .p_b, .movmsk }, - .dword => .{ if (cg.hasFeature(.avx)) .v_ps else ._ps, .movmsk }, - .qword => .{ if (cg.hasFeature(.avx)) .v_pd else ._pd, .movmsk }, - }, mask_limb_reg.to32(), mir_op.reg); - if (invert_result) if (loop.mask_store_reg) |_| { - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterImmediate( - .{ ._, .xor }, - registerAlias(mask_limb_reg, @min(mask_store_size, 4)), - .u((@as(u32, 1) << @intCast(loop.mask_limb_bit_size.?)) - 1), - ); - } else try cg.asmRegister(.{ ._, .not }, mask_limb_reg); - }, - } - if (loop.mask_store_reg) |mask_store_reg| { - const mask_store_alias = registerAlias(mask_store_reg, mask_store_size); - switch (loop.mask_limb_offset) { - .unused => unreachable, - .known => if (known_shl_count.? != 0) { - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterImmediate(.{ ._l, .sh }, mask_limb_reg, .u(known_shl_count.?)); - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterRegister(.{ ._, .@"or" }, mask_store_alias, mask_limb_reg); - }, - .temp => |mask_limb_offset| { - if (cg.hasFeature(.bmi2)) { - const shlx_size = @max(mask_store_size, 4); - const shlx_mask_limb_reg = registerAlias(mask_limb_reg, shlx_size); - try cg.asmRegisterRegisterRegister( - .{ ._lx, .sh }, - shlx_mask_limb_reg, - shlx_mask_limb_reg, - registerAlias(mask_limb_offset.tracking(cg).short.register, shlx_size), - ); - } else { - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterRegister( - .{ ._l, .sh }, - mask_limb_reg, - mask_limb_offset.tracking(cg).short.register.to8(), - ); - } - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterRegister(.{ ._, .@"or" }, mask_store_alias, mask_limb_reg); - }, - } - } - const dst_mcv = dst_temp.tracking(cg).short; - switch (loop.mask_limb_offset) { - .unused => unreachable, - .known => |*mask_limb_offset| { - mask_limb_offset.* += loop.mask_limb_bit_size.?; - if (mask_limb_offset.* & (loop.mask_store_bit_size.? - 1) == 0) switch (dst_mcv) { - .register => {}, - else => { - try cg.asmMemoryRegister( - .{ ._, .mov }, - try dst_mcv.mem(cg, .{ - .size = .fromSize(mask_store_size), - .disp = @divExact(mask_limb_offset.*, 8) - mask_store_size, - }), - registerAlias(loop.mask_store_reg orelse mask_limb_reg, mask_store_size), - ); - if (loop.mask_store_reg) |mask_store_reg| { - const mask_store_alias = registerAlias(mask_store_reg, @min(mask_store_size, 4)); - try cg.asmRegisterRegister(.{ ._, .xor }, mask_store_alias, mask_store_alias); - } - }, - }; - }, - .temp => |mask_limb_offset| { - const mask_limb_offset_reg = mask_limb_offset.tracking(cg).short.register.to32(); - if (loop.mask_store_reg) |mask_store_reg| { - try cg.asmRegisterMemory(.{ ._, .lea }, mask_limb_offset_reg, .{ - .base = .{ .reg = mask_limb_offset_reg.to64() }, - .mod = .{ .rm = .{ - .size = .qword, - .disp = loop.mask_limb_bit_size.?, - } }, - }); - switch (dst_mcv) { - .register => {}, - else => { - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterImmediate( - .{ ._, .@"test" }, - mask_limb_offset_reg, - .u(loop.mask_store_bit_size.? - 1), - ); - const skip_store_reloc = try cg.asmJccReloc(.nz, undefined); - const mask_store_offset_reg = mask_limb_reg.to32(); - try cg.asmRegisterRegister(.{ ._, .mov }, mask_store_offset_reg, mask_limb_offset_reg); - try cg.asmRegisterImmediate(.{ ._r, .sh }, mask_store_offset_reg, .u(3)); - try cg.asmMemoryRegister(.{ ._, .mov }, try dst_mcv.mem(cg, .{ - .size = .fromSize(mask_store_size), - .index = mask_store_offset_reg.to64(), - .disp = -@as(i8, mask_store_size), - }), registerAlias(mask_store_reg, mask_store_size)); - const mask_store_alias = registerAlias(mask_store_reg, @min(mask_store_size, 4)); - try cg.asmRegisterRegister(.{ ._, .xor }, mask_store_alias, mask_store_alias); - cg.performReloc(skip_store_reloc); - }, - } - } else { - switch (dst_mcv) { - .register => {}, - else => try cg.asmMemoryRegister(.{ ._, .mov }, try dst_mcv.mem(cg, .{ - .size = .fromSize(mask_store_size), - .index = mask_limb_offset_reg.to64(), - }), mask_limb_reg), - } - try cg.asmRegisterMemory(.{ ._, .lea }, mask_limb_offset_reg, .{ - .base = .{ .reg = mask_limb_offset_reg.to64() }, - .mod = .{ .rm = .{ - .size = .qword, - .disp = mask_store_size, - } }, - }); - } - }, - } - }, - }; - switch (pattern_set.loop) { - .once => break :pattern_sets, - .bitwise => {}, - .limbwise_carry => @panic("TODO"), - .limbwise_reduce => @panic("TODO"), - .limbwise_pairs_forward => @panic("TODO"), - .limbwise_pairs_reverse => @panic("TODO"), - .elementwise => {}, - } - switch (loop.limb_offset) { - .unused => break, - .known => |*limb_offset| { - limb_offset.* += loop.limb_size.?; - loop.remaining_size.? -= loop.limb_size.?; - if (loop.remaining_size.? < loop.limb_size.? or - (loop.element_size != null and limb_offset.* >= loop.element_size.?)) - { - switch (loop.mask_limb_offset) { - .unused => {}, - .known => |*mask_limb_offset| mask_limb_offset.* = 0, - .temp => unreachable, - } - limb_offset.* = 0; - break; - } - }, - .temp => |limb_offset| { - const limb_offset_reg = limb_offset.tracking(cg).short.register; - try cg.asmRegisterMemory(.{ ._, .lea }, limb_offset_reg.to32(), .{ - .base = .{ .reg = limb_offset_reg.to64() }, - .mod = .{ .rm = .{ - .size = .qword, - .disp = loop.limb_size.?, - } }, - }); - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterImmediate( - .{ ._, .cmp }, - limb_offset_reg.to32(), - .u(loop.element_size orelse loop.remaining_size.?), - ); - _ = try cg.asmJccReloc(.b, loop.limb_reloc); - try limb_offset.die(cg); - break; - }, - } - } - if (loop.shuffle_temp) |shuffle_temp| try shuffle_temp.die(cg); - if (loop.mask_limb_temp) |mask_limb_temp| try mask_limb_temp.die(cg); - if (loop.mask_store_temp) |mask_store_temp| try mask_store_temp.die(cg); - switch (loop.mask_limb_offset) { - .unused, .known => {}, - .temp => |mask_limb_offset| try mask_limb_offset.die(cg), - } - switch (loop.element_offset) { - .unused => break :pattern_sets, - .known => |*element_offset| { - if (loop.remaining_size.? == 0) break :pattern_sets; - element_offset.* += loop.element_size.?; - }, - .temp => |element_offset| { - if (true) @panic("TODO"); - try element_offset.die(cg); - if (loop.remaining_size.? == 0) break :pattern_sets; - break; - }, - } - } - } - } else { - log.err("failed to select {s}:", .{@tagName(pattern_sets[0].mir_tag[1])}); - for (src_temps) |src_temp| log.err("{}", .{src_temp.tracking(cg)}); - return error.SelectFailed; - } - for (extra_temps) |extra_temp| if (extra_temp) |temp| try temp.die(cg); -} - -const Select2 = struct { - cg: *CodeGen, - case: *const Case, - pattern: *const Select2.Pattern, - extra_temps: [3]Temp, - dst_temps: []const Temp, - src_temps: []const Temp, - commute: struct { u8, u8 }, + temps: [@intFromEnum(Select.Operand.Ref.none)]Temp, + labels: [@intFromEnum(Label._)]struct { + backward: ?Mir.Inst.Index, + forward: [1]?Mir.Inst.Index, + }, limb: Memory.Mod.Rm, + mask_limb: Memory.Mod.Rm, - fn emit(s: Select2, inst: Instruction) !void { - const mir_tag: Mir.Inst.FixedTag = .{ inst[0], inst[1] }; + fn emitLabel(s: *Select, label_index: Label) void { + if (label_index == ._) return; + const label = &s.labels[@intFromEnum(label_index)]; + for (&label.forward) |*reloc| { + if (reloc.*) |r| s.cg.performReloc(r); + reloc.* = null; + } + label.backward = @intCast(s.cg.mir_instructions.len); + } + + fn emit(s: *Select, inst: Instruction) !void { + s.emitLabel(inst[0]); + const mir_tag: Mir.Inst.FixedTag = .{ inst[1], inst[2] }; var mir_ops: [4]CodeGen.Operand = undefined; - inline for (&mir_ops, 2..) |*mir_op, inst_index| mir_op.* = try inst[inst_index].lower(s); + inline for (&mir_ops, 3..) |*mir_op, inst_index| mir_op.* = try inst[inst_index].lower(s); s.cg.asmOps(mir_tag, mir_ops) catch |err| switch (err) { error.InvalidInstruction => { const fixes = @tagName(mir_tag[0]); @@ -23881,32 +24102,27 @@ const Select2 = struct { }; } - fn lowerLimb(s: Select2, temp: Temp) !CodeGen.Operand { - return .{ .mem = try temp.tracking(s.cg).short.mem(s.cg, s.limb) }; - } - - fn srcTemp(s: Select2, index: u8) Temp { - return s.src_temps[ - if (index == s.commute[0]) - s.commute[1] - else if (index == s.commute[1]) - s.commute[0] - else - index - ]; - } - const Case = struct { required_features: [2]?std.Target.x86.Feature = @splat(null), - constraints: [2]Constraint = @splat(.any), - patterns: []const Select2.Pattern, + dst_constraints: [@intFromEnum(Select.Operand.Ref.src0) - @intFromEnum(Select.Operand.Ref.dst0)]Constraint = @splat(.any), + src_constraints: [@intFromEnum(Select.Operand.Ref.none) - @intFromEnum(Select.Operand.Ref.src0)]Constraint = @splat(.any), + patterns: []const Select.Pattern, clobbers: struct { eflags: bool = false } = .{}, - extra_temps: [3]TempSpec = @splat(.unused), - dst_temps: [1]TempSpec.Kind = @splat(.unused), + extra_temps: [@intFromEnum(Select.Operand.Ref.dst0) - @intFromEnum(Select.Operand.Ref.tmp0)]TempSpec = @splat(.unused), + dst_temps: [@intFromEnum(Select.Operand.Ref.src0) - @intFromEnum(Select.Operand.Ref.dst0)]TempSpec.Kind = @splat(.unused), each: union(enum) { once: []const Instruction, limb: struct { - of: Select2.Operand, + of: Select.Operand.Ref.Sized, + header: []const Instruction = &.{}, + first: ?[]const Instruction = null, + body: []const Instruction, + last: ?[]const Instruction = null, + trailer: []const Instruction = &.{}, + }, + limb_and_mask_limb: struct { + of: Select.Operand.Ref.Sized, + of_mask: Select.Operand.Ref.Sized, header: []const Instruction = &.{}, first: ?[]const Instruction = null, body: []const Instruction, @@ -23918,6 +24134,7 @@ const Select2 = struct { const Constraint = union(enum) { any, + any_bool_vec, any_int, any_float, bool_vec: Memory.Size, @@ -23925,37 +24142,35 @@ const Select2 = struct { signed_int: Memory.Size, unsigned_int: Memory.Size, - fn accepts(constraint: Constraint, temp: Temp, cg: *CodeGen) bool { + fn accepts(constraint: Constraint, ty: Type, cg: *CodeGen) bool { const zcu = cg.pt.zcu; switch (constraint) { .any => return true, + .any_bool_vec => return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type, .any_int => { - const scalar_ty = temp.typeOf(cg).scalarType(zcu); + const scalar_ty = ty.scalarType(zcu); return scalar_ty.isAbiInt(zcu) or scalar_ty.isPtrAtRuntime(zcu); }, - .any_float => return temp.typeOf(cg).scalarType(zcu).isRuntimeFloat(), - .bool_vec => |size| { - const ty = temp.typeOf(cg); - return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type and - ty.vectorLen(zcu) <= size.bitSize(); - }, + .any_float => return ty.scalarType(zcu).isRuntimeFloat(), + .bool_vec => |size| return ty.isVector(zcu) and + ty.scalarType(zcu).toIntern() == .bool_type and ty.vectorLen(zcu) <= size.bitSize(cg.target), .int => |size| { - const scalar_ty = temp.typeOf(cg).scalarType(zcu); - if (scalar_ty.isPtrAtRuntime(zcu)) return cg.target.ptrBitWidth() <= size.bitSize(); - return scalar_ty.isAbiInt(zcu) and scalar_ty.intInfo(zcu).bits <= size.bitSize(); + const scalar_ty = ty.scalarType(zcu); + if (scalar_ty.isPtrAtRuntime(zcu)) return cg.target.ptrBitWidth() <= size.bitSize(cg.target); + return scalar_ty.isAbiInt(zcu) and scalar_ty.intInfo(zcu).bits <= size.bitSize(cg.target); }, .signed_int => |size| { - const scalar_ty = temp.typeOf(cg).scalarType(zcu); + const scalar_ty = ty.scalarType(zcu); if (!scalar_ty.isAbiInt(zcu)) return false; const info = scalar_ty.intInfo(zcu); - return info.signedness == .signed and info.bits <= size.bitSize(); + return info.signedness == .signed and info.bits <= size.bitSize(cg.target); }, .unsigned_int => |size| { - const scalar_ty = temp.typeOf(cg).scalarType(zcu); - if (scalar_ty.isPtrAtRuntime(zcu)) return cg.target.ptrBitWidth() <= size.bitSize(); + const scalar_ty = ty.scalarType(zcu); + if (scalar_ty.isPtrAtRuntime(zcu)) return cg.target.ptrBitWidth() <= size.bitSize(cg.target); if (!scalar_ty.isAbiInt(zcu)) return false; const info = scalar_ty.intInfo(zcu); - return info.signedness == .unsigned and info.bits <= size.bitSize(); + return info.signedness == .unsigned and info.bits <= size.bitSize(cg.target); }, } } @@ -24083,189 +24298,578 @@ const Select2 = struct { rc: Register.Class, rc_mask: struct { rc: Register.Class, info: MaskInfo }, mem, - src: u8, - src_mask: struct { src: u8, info: MaskInfo }, + ref: Select.Operand.Ref, + ref_mask: struct { ref: Select.Operand.Ref, info: MaskInfo }, - fn finish(kind: Kind, temp: Temp, s: Select2) void { + fn finish(kind: Kind, temp: Temp, s: *const Select) void { switch (kind) { else => {}, - inline .rc_mask, .src_mask => |mask| temp.asMask(mask.info, s.cg), + inline .rc_mask, .ref_mask => |mask| temp.asMask(mask.info, s.cg), } } }; - fn create(spec: TempSpec, s: Select2) !?Temp { + fn create(spec: TempSpec, s: *Select) !?Temp { return switch (spec.kind) { .unused => null, .any => try s.cg.tempAlloc(spec.type), .cc => |cc| try s.cg.tempFromValue(spec.type, .{ .eflags = cc }), .reg => |reg| try s.cg.tempFromValue(spec.type, .{ .register = reg }), .rc => |rc| try s.cg.tempAllocReg(spec.type, regSetForRegClass(rc)), - .rc_mask => |mask| try s.cg.tempAllocReg(spec.type, regSetForRegClass(mask.rc)), + .rc_mask => |rc_mask| try s.cg.tempAllocReg(spec.type, regSetForRegClass(rc_mask.rc)), .mem => try s.cg.tempAllocMem(spec.type), - .src => |src| s.srcTemp(src), - .src_mask => |mask| s.srcTemp(mask.src), + .ref => |ref| ref.deref(s), + .ref_mask => |ref_mask| ref_mask.ref.deref(s), }; } }; const Instruction = struct { + Label, Mir.Inst.Fixes, Mir.Inst.Tag, - Select2.Operand, - Select2.Operand, - Select2.Operand, - Select2.Operand, + Select.Operand, + Select.Operand, + Select.Operand, + Select.Operand, }; - const Operand = union(enum) { - none, - extra: struct { Memory.Size, u8 }, - dst: struct { Memory.Size, u8 }, - src: struct { Memory.Size, u8 }, - dst_limb: u8, - src_limb: u8, - simm32: i32, + const Label = enum { @"0:", @"1:", @"_" }; + const Operand = struct { + tag: Tag, + base: Ref.Sized = .none, + index: packed struct(u6) { + ref: Ref, + scale: Memory.Scale, + } = .{ .ref = .none, .scale = .@"1" }, + adjust: Adjust = .none, + imm: i32 = 0, - const tmp0b: Select2.Operand = .{ .extra = .{ .byte, 0 } }; - const tmp0w: Select2.Operand = .{ .extra = .{ .word, 0 } }; - const etmp0: Select2.Operand = .{ .extra = .{ .dword, 0 } }; - const rtmp0: Select2.Operand = .{ .extra = .{ .qword, 0 } }; - const xtmp0: Select2.Operand = .{ .extra = .{ .xword, 0 } }; - const ytmp0: Select2.Operand = .{ .extra = .{ .yword, 0 } }; + const Tag = enum { + none, + backward_label, + forward_label, + ref, + limb, + mask_limb, + simm, + lea, + mem, + }; + const Adjust = enum { + none, + add_access_size, + sub_access_size, + add_size, + sub_size, + add_bit_size, + sub_bit_size, + add_limbs, + sub_limbs, + add_len, + sub_len, + add_elem_size, + sub_elem_size, + add_elem_limbs, + sub_elem_limbs, + }; + const Ref = enum(u4) { + tmp0, + tmp1, + tmp2, + tmp3, + tmp4, + tmp5, + dst0, + src0, + src1, + none, - const tmp1b: Select2.Operand = .{ .extra = .{ .byte, 1 } }; - const tmp1w: Select2.Operand = .{ .extra = .{ .word, 1 } }; - const etmp1: Select2.Operand = .{ .extra = .{ .dword, 1 } }; - const rtmp1: Select2.Operand = .{ .extra = .{ .qword, 1 } }; - const xtmp1: Select2.Operand = .{ .extra = .{ .xword, 1 } }; - const ytmp1: Select2.Operand = .{ .extra = .{ .yword, 1 } }; + const Sized = packed struct(u8) { + ref: Ref, + size: Memory.Size, - const tmp2b: Select2.Operand = .{ .extra = .{ .byte, 2 } }; - const tmp2w: Select2.Operand = .{ .extra = .{ .word, 2 } }; - const etmp2: Select2.Operand = .{ .extra = .{ .dword, 2 } }; - const rtmp2: Select2.Operand = .{ .extra = .{ .qword, 2 } }; - const xtmp2: Select2.Operand = .{ .extra = .{ .xword, 2 } }; - const ytmp2: Select2.Operand = .{ .extra = .{ .yword, 2 } }; + const none: Sized = .{ .ref = .none, .size = .none }; - const dst0b: Select2.Operand = .{ .dst = .{ .byte, 0 } }; - const dst0w: Select2.Operand = .{ .dst = .{ .word, 0 } }; - const edst0: Select2.Operand = .{ .dst = .{ .dword, 0 } }; - const rdst0: Select2.Operand = .{ .dst = .{ .qword, 0 } }; - const xdst0: Select2.Operand = .{ .dst = .{ .xword, 0 } }; - const ydst0: Select2.Operand = .{ .dst = .{ .yword, 0 } }; + const tmp0: Sized = .{ .ref = .tmp0, .size = .none }; + const tmp0b: Sized = .{ .ref = .tmp0, .size = .byte }; + const tmp0w: Sized = .{ .ref = .tmp0, .size = .word }; + const tmp0d: Sized = .{ .ref = .tmp0, .size = .dword }; + const tmp0p: Sized = .{ .ref = .tmp0, .size = .ptr }; + const tmp0q: Sized = .{ .ref = .tmp0, .size = .qword }; + const tmp0x: Sized = .{ .ref = .tmp0, .size = .xword }; + const tmp0y: Sized = .{ .ref = .tmp0, .size = .yword }; - const src0b: Select2.Operand = .{ .src = .{ .byte, 0 } }; - const src0w: Select2.Operand = .{ .src = .{ .word, 0 } }; - const esrc0: Select2.Operand = .{ .src = .{ .dword, 0 } }; - const rsrc0: Select2.Operand = .{ .src = .{ .qword, 0 } }; - const xsrc0: Select2.Operand = .{ .src = .{ .xword, 0 } }; - const ysrc0: Select2.Operand = .{ .src = .{ .yword, 0 } }; + const tmp1: Sized = .{ .ref = .tmp1, .size = .none }; + const tmp1b: Sized = .{ .ref = .tmp1, .size = .byte }; + const tmp1w: Sized = .{ .ref = .tmp1, .size = .word }; + const tmp1d: Sized = .{ .ref = .tmp1, .size = .dword }; + const tmp1p: Sized = .{ .ref = .tmp1, .size = .ptr }; + const tmp1q: Sized = .{ .ref = .tmp1, .size = .qword }; + const tmp1x: Sized = .{ .ref = .tmp1, .size = .xword }; + const tmp1y: Sized = .{ .ref = .tmp1, .size = .yword }; - const src1b: Select2.Operand = .{ .src = .{ .byte, 1 } }; - const src1w: Select2.Operand = .{ .src = .{ .word, 1 } }; - const esrc1: Select2.Operand = .{ .src = .{ .dword, 1 } }; - const rsrc1: Select2.Operand = .{ .src = .{ .qword, 1 } }; - const xsrc1: Select2.Operand = .{ .src = .{ .xword, 1 } }; - const ysrc1: Select2.Operand = .{ .src = .{ .yword, 1 } }; + const tmp2: Sized = .{ .ref = .tmp2, .size = .none }; + const tmp2b: Sized = .{ .ref = .tmp2, .size = .byte }; + const tmp2w: Sized = .{ .ref = .tmp2, .size = .word }; + const tmp2d: Sized = .{ .ref = .tmp2, .size = .dword }; + const tmp2p: Sized = .{ .ref = .tmp2, .size = .ptr }; + const tmp2q: Sized = .{ .ref = .tmp2, .size = .qword }; + const tmp2x: Sized = .{ .ref = .tmp2, .size = .xword }; + const tmp2y: Sized = .{ .ref = .tmp2, .size = .yword }; - fn unwrap(op: Select2.Operand, s: Select2) struct { Memory.Size, Temp } { - return switch (op) { - else => unreachable, - .extra => |extra| .{ extra[0], s.extra_temps[extra[1]] }, - .dst => |dst| .{ dst[0], s.dst_temps[dst[1]] }, - .src => |src| .{ src[0], s.srcTemp(src[1]) }, + const tmp3: Sized = .{ .ref = .tmp3, .size = .none }; + const tmp3b: Sized = .{ .ref = .tmp3, .size = .byte }; + const tmp3w: Sized = .{ .ref = .tmp3, .size = .word }; + const tmp3d: Sized = .{ .ref = .tmp3, .size = .dword }; + const tmp3p: Sized = .{ .ref = .tmp3, .size = .ptr }; + const tmp3q: Sized = .{ .ref = .tmp3, .size = .qword }; + const tmp3x: Sized = .{ .ref = .tmp3, .size = .xword }; + const tmp3y: Sized = .{ .ref = .tmp3, .size = .yword }; + + const tmp4: Sized = .{ .ref = .tmp4, .size = .none }; + const tmp4b: Sized = .{ .ref = .tmp4, .size = .byte }; + const tmp4w: Sized = .{ .ref = .tmp4, .size = .word }; + const tmp4d: Sized = .{ .ref = .tmp4, .size = .dword }; + const tmp4p: Sized = .{ .ref = .tmp4, .size = .ptr }; + const tmp4q: Sized = .{ .ref = .tmp4, .size = .qword }; + const tmp4x: Sized = .{ .ref = .tmp4, .size = .xword }; + const tmp4y: Sized = .{ .ref = .tmp4, .size = .yword }; + + const dst0: Sized = .{ .ref = .dst0, .size = .none }; + const dst0b: Sized = .{ .ref = .dst0, .size = .byte }; + const dst0w: Sized = .{ .ref = .dst0, .size = .word }; + const dst0d: Sized = .{ .ref = .dst0, .size = .dword }; + const dst0p: Sized = .{ .ref = .dst0, .size = .ptr }; + const dst0q: Sized = .{ .ref = .dst0, .size = .qword }; + const dst0x: Sized = .{ .ref = .dst0, .size = .xword }; + const dst0y: Sized = .{ .ref = .dst0, .size = .yword }; + + const src0: Sized = .{ .ref = .src0, .size = .none }; + const src0b: Sized = .{ .ref = .src0, .size = .byte }; + const src0w: Sized = .{ .ref = .src0, .size = .word }; + const src0d: Sized = .{ .ref = .src0, .size = .dword }; + const src0p: Sized = .{ .ref = .src0, .size = .ptr }; + const src0q: Sized = .{ .ref = .src0, .size = .qword }; + const src0x: Sized = .{ .ref = .src0, .size = .xword }; + const src0y: Sized = .{ .ref = .src0, .size = .yword }; + + const src1: Sized = .{ .ref = .src1, .size = .none }; + const src1b: Sized = .{ .ref = .src1, .size = .byte }; + const src1w: Sized = .{ .ref = .src1, .size = .word }; + const src1d: Sized = .{ .ref = .src1, .size = .dword }; + const src1p: Sized = .{ .ref = .src1, .size = .ptr }; + const src1q: Sized = .{ .ref = .src1, .size = .qword }; + const src1x: Sized = .{ .ref = .src1, .size = .xword }; + const src1y: Sized = .{ .ref = .src1, .size = .yword }; + }; + + fn deref(ref: Ref, s: *const Select) Temp { + return s.temps[@intFromEnum(ref)]; + } + }; + + const @"_": Select.Operand = .{ .tag = .none }; + + const @"0b": Select.Operand = .{ .tag = .backward_label, .base = .{ .ref = .tmp0, .size = .none } }; + const @"0f": Select.Operand = .{ .tag = .forward_label, .base = .{ .ref = .tmp0, .size = .none } }; + const @"1b": Select.Operand = .{ .tag = .backward_label, .base = .{ .ref = .tmp1, .size = .none } }; + const @"1f": Select.Operand = .{ .tag = .forward_label, .base = .{ .ref = .tmp1, .size = .none } }; + + const tmp0b: Select.Operand = .{ .tag = .ref, .base = .tmp0b }; + const tmp0w: Select.Operand = .{ .tag = .ref, .base = .tmp0w }; + const tmp0d: Select.Operand = .{ .tag = .ref, .base = .tmp0d }; + const tmp0p: Select.Operand = .{ .tag = .ref, .base = .tmp0p }; + const tmp0q: Select.Operand = .{ .tag = .ref, .base = .tmp0q }; + const tmp0x: Select.Operand = .{ .tag = .ref, .base = .tmp0x }; + const tmp0y: Select.Operand = .{ .tag = .ref, .base = .tmp0y }; + + const tmp1b: Select.Operand = .{ .tag = .ref, .base = .tmp1b }; + const tmp1w: Select.Operand = .{ .tag = .ref, .base = .tmp1w }; + const tmp1d: Select.Operand = .{ .tag = .ref, .base = .tmp1d }; + const tmp1p: Select.Operand = .{ .tag = .ref, .base = .tmp1p }; + const tmp1q: Select.Operand = .{ .tag = .ref, .base = .tmp1q }; + const tmp1x: Select.Operand = .{ .tag = .ref, .base = .tmp1x }; + const tmp1y: Select.Operand = .{ .tag = .ref, .base = .tmp1y }; + + const tmp2b: Select.Operand = .{ .tag = .ref, .base = .tmp2b }; + const tmp2w: Select.Operand = .{ .tag = .ref, .base = .tmp2w }; + const tmp2d: Select.Operand = .{ .tag = .ref, .base = .tmp2d }; + const tmp2p: Select.Operand = .{ .tag = .ref, .base = .tmp2p }; + const tmp2q: Select.Operand = .{ .tag = .ref, .base = .tmp2q }; + const tmp2x: Select.Operand = .{ .tag = .ref, .base = .tmp2x }; + const tmp2y: Select.Operand = .{ .tag = .ref, .base = .tmp2y }; + + const tmp3b: Select.Operand = .{ .tag = .ref, .base = .tmp3b }; + const tmp3w: Select.Operand = .{ .tag = .ref, .base = .tmp3w }; + const tmp3d: Select.Operand = .{ .tag = .ref, .base = .tmp3d }; + const tmp3p: Select.Operand = .{ .tag = .ref, .base = .tmp3p }; + const tmp3q: Select.Operand = .{ .tag = .ref, .base = .tmp3q }; + const tmp3x: Select.Operand = .{ .tag = .ref, .base = .tmp3x }; + const tmp3y: Select.Operand = .{ .tag = .ref, .base = .tmp3y }; + + const tmp4b: Select.Operand = .{ .tag = .ref, .base = .tmp4b }; + const tmp4w: Select.Operand = .{ .tag = .ref, .base = .tmp4w }; + const tmp4d: Select.Operand = .{ .tag = .ref, .base = .tmp4d }; + const tmp4p: Select.Operand = .{ .tag = .ref, .base = .tmp4p }; + const tmp4q: Select.Operand = .{ .tag = .ref, .base = .tmp4q }; + const tmp4x: Select.Operand = .{ .tag = .ref, .base = .tmp4x }; + const tmp4y: Select.Operand = .{ .tag = .ref, .base = .tmp4y }; + + const dst0b: Select.Operand = .{ .tag = .ref, .base = .dst0b }; + const dst0w: Select.Operand = .{ .tag = .ref, .base = .dst0w }; + const dst0d: Select.Operand = .{ .tag = .ref, .base = .dst0d }; + const dst0p: Select.Operand = .{ .tag = .ref, .base = .dst0p }; + const dst0q: Select.Operand = .{ .tag = .ref, .base = .dst0q }; + const dst0x: Select.Operand = .{ .tag = .ref, .base = .dst0x }; + const dst0y: Select.Operand = .{ .tag = .ref, .base = .dst0y }; + + const src0b: Select.Operand = .{ .tag = .ref, .base = .src0b }; + const src0w: Select.Operand = .{ .tag = .ref, .base = .src0w }; + const src0d: Select.Operand = .{ .tag = .ref, .base = .src0d }; + const src0p: Select.Operand = .{ .tag = .ref, .base = .src0p }; + const src0q: Select.Operand = .{ .tag = .ref, .base = .src0q }; + const src0x: Select.Operand = .{ .tag = .ref, .base = .src0x }; + const src0y: Select.Operand = .{ .tag = .ref, .base = .src0y }; + + const src1b: Select.Operand = .{ .tag = .ref, .base = .src1b }; + const src1w: Select.Operand = .{ .tag = .ref, .base = .src1w }; + const src1d: Select.Operand = .{ .tag = .ref, .base = .src1d }; + const src1p: Select.Operand = .{ .tag = .ref, .base = .src1p }; + const src1q: Select.Operand = .{ .tag = .ref, .base = .src1q }; + const src1x: Select.Operand = .{ .tag = .ref, .base = .src1x }; + const src1y: Select.Operand = .{ .tag = .ref, .base = .src1y }; + + fn limb(ref: Ref.Sized) Select.Operand { + return .{ .tag = .limb, .base = ref }; + } + fn maskLimb(ref: Ref.Sized) Select.Operand { + return .{ .tag = .mask_limb, .base = ref }; + } + + fn i(imm: i32) Select.Operand { + return .{ .tag = .simm, .imm = imm }; + } + fn a(base: Ref.Sized, adjust: Adjust) Select.Operand { + return .{ .tag = .simm, .base = base, .adjust = adjust }; + } + fn ia(imm: i32, base: Ref.Sized, adjust: Adjust) Select.Operand { + return .{ .tag = .simm, .base = base, .adjust = adjust, .imm = imm }; + } + + fn lea(size: Memory.Size, base: Ref) Select.Operand { + return .{ + .tag = .lea, + .base = .{ .ref = base, .size = size }, + }; + } + fn lead(size: Memory.Size, base: Ref, disp: i32) Select.Operand { + return .{ + .tag = .lea, + .base = .{ .ref = base, .size = size }, + .imm = disp, + }; + } + fn leai(size: Memory.Size, base: Ref, index: Ref) Select.Operand { + return .{ + .tag = .lea, + .base = .{ .ref = base, .size = size }, + .index_ = .{ .ref = index, .scale = .@"1" }, + }; + } + fn leaid(size: Memory.Size, base: Ref, index: Ref, disp: i32) Select.Operand { + return .{ + .tag = .lea, + .base = .{ .ref = base, .size = size }, + .index_ = .{ .ref = index, .scale = .@"1" }, + .imm = disp, + }; + } + fn leasi(size: Memory.Size, base: Ref, scale: Memory.Scale, index: Ref) Select.Operand { + return .{ + .tag = .lea, + .base = .{ .ref = base, .size = size }, + .index_ = .{ .ref = index, .scale = scale }, + }; + } + fn leasid(size: Memory.Size, base: Ref, scale: Memory.Scale, index: Ref, disp: i32) Select.Operand { + return .{ + .tag = .lea, + .base = .{ .ref = base, .size = size }, + .index_ = .{ .ref = index, .scale = scale }, + .imm = disp, + }; + } + fn leasida(size: Memory.Size, base: Ref, scale: Memory.Scale, index: Ref, disp: i32, adjust: Adjust) Select.Operand { + return .{ + .tag = .lea, + .base = .{ .ref = base, .size = size }, + .index_ = .{ .ref = index, .scale = scale }, + .adjust = adjust, + .imm = disp, }; } - fn lower(op: Select2.Operand, s: Select2) !CodeGen.Operand { - switch (op) { - .none => return .none, - else => {}, - .dst_limb => |dst| return s.lowerLimb(s.dst_temps[dst]), - .src_limb => |src| return s.lowerLimb(s.srcTemp(src)), - .simm32 => |imm| return .{ .imm = .s(imm) }, - } - const size, const temp = op.unwrap(s); - return switch (temp.tracking(s.cg).short) { - .immediate => |imm| .{ .imm = switch (size) { - .byte => if (std.math.cast(i8, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(@as(u8, @intCast(imm))), - .word => if (std.math.cast(i16, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(@as(u16, @intCast(imm))), - .dword => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(@as(u32, @intCast(imm))), - .qword => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(imm), - else => unreachable, + fn mem(base: Ref.Sized) Select.Operand { + return .{ + .tag = .mem, + .base = base, + }; + } + fn memd(base: Ref.Sized, disp: i32) Select.Operand { + return .{ + .tag = .mem, + .base = base, + .imm = disp, + }; + } + fn memi(base: Ref.Sized, index: Ref) Select.Operand { + return .{ + .tag = .mem, + .base = base, + .index = .{ .ref = index, .scale = .@"1" }, + }; + } + fn memia(base: Ref.Sized, index: Ref, adjust: Adjust) Select.Operand { + return .{ + .tag = .mem, + .base = base, + .index = .{ .ref = index, .scale = .@"1" }, + .adjust = adjust, + }; + } + fn memid(base: Ref.Sized, index: Ref, disp: i32) Select.Operand { + return .{ + .tag = .mem, + .base = base, + .index = .{ .ref = index, .scale = .@"1" }, + .imm = disp, + }; + } + fn memsi(base: Ref.Sized, scale: Memory.Scale, index: Ref) Select.Operand { + return .{ + .tag = .mem, + .base = base, + .index = .{ .ref = index, .scale = scale }, + }; + } + fn memsid(base: Ref.Sized, scale: Memory.Scale, index: Ref, disp: i32) Select.Operand { + return .{ + .tag = .mem, + .base = base, + .index = .{ .ref = index, .scale = scale }, + .imm = disp, + }; + } + fn memsida(base: Ref.Sized, scale: Memory.Scale, index: Ref, disp: i32, adjust: Adjust) Select.Operand { + return .{ + .tag = .mem, + .base = base, + .index = .{ .ref = index, .scale = scale }, + .adjust = adjust, + .imm = disp, + }; + } + + fn adjustedImm(op: Select.Operand, s: *const Select) i32 { + return switch (op.adjust) { + .none => op.imm, + .add_access_size => op.imm + @as(i32, @intCast(@divExact(op.base.size.bitSize(s.cg.target), 8))), + .sub_access_size => op.imm - @as(i32, @intCast(@divExact(op.base.size.bitSize(s.cg.target), 8))), + .add_size => op.imm + @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu))), + .sub_size => op.imm - @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu))), + .add_bit_size => op.imm + @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).bitSize(s.cg.pt.zcu))), + .sub_bit_size => op.imm - @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).bitSize(s.cg.pt.zcu))), + .add_limbs => op.imm + @as(i32, @intCast(@divExact( + op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu), + @divExact(op.base.size.bitSize(s.cg.target), 8), + ))), + .sub_limbs => op.imm + @as(i32, @intCast(@divExact( + op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu), + @divExact(op.base.size.bitSize(s.cg.target), 8), + ))), + .add_len => op.imm + @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).vectorLen(s.cg.pt.zcu))), + .sub_len => op.imm - @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).vectorLen(s.cg.pt.zcu))), + .add_elem_size => op.imm + @as(i32, @intCast( + op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu), + )), + .sub_elem_size => op.imm - @as(i32, @intCast( + op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu), + )), + .add_elem_limbs => op.imm + @as(i32, @intCast(@divExact( + op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu), + @divExact(op.base.size.bitSize(s.cg.target), 8), + ))), + .sub_elem_limbs => op.imm - @as(i32, @intCast(@divExact( + op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu), + @divExact(op.base.size.bitSize(s.cg.target), 8), + ))), + }; + } + + fn lower(op: Select.Operand, s: *Select) !CodeGen.Operand { + return switch (op.tag) { + .none => .none, + .backward_label => .{ .inst = s.labels[@intFromEnum(op.base.ref)].backward.? }, + .forward_label => for (&s.labels[@intFromEnum(op.base.ref)].forward) |*label| { + if (label.*) |_| continue; + label.* = @intCast(s.cg.mir_instructions.len); + break .{ .inst = undefined }; + } else unreachable, + .ref => switch (op.base.ref.deref(s).tracking(s.cg).short) { + .immediate => |imm| .{ .imm = switch (op.base.size) { + .byte => if (std.math.cast(i8, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(@as(u8, @intCast(imm))), + .word => if (std.math.cast(i16, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(@as(u16, @intCast(imm))), + .dword => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(@as(u32, @intCast(imm))), + .qword => if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |simm| .s(simm) else .u(imm), + else => unreachable, + } }, + else => |mcv| .{ .mem = try mcv.mem(s.cg, .{ .size = op.base.size }) }, + .register => |reg| .{ .reg = registerAlias(reg, @intCast(@divExact(op.base.size.bitSize(s.cg.target), 8))) }, + }, + inline .limb, .mask_limb => |kind| .{ .mem = try op.base.ref.deref(s).tracking(s.cg).short.mem(s.cg, @field(s, @tagName(kind))) }, + .simm => .{ .imm = .s(op.adjustedImm(s)) }, + .lea => .{ .mem = .{ + .base = .{ .reg = registerAlias(op.base.ref.deref(s).tracking(s.cg).short.register, @divExact(s.cg.target.ptrBitWidth(), 8)) }, + .mod = .{ .rm = .{ + .size = op.base.size, + .index = switch (op.index.ref) { + else => |ref| registerAlias(ref.deref(s).tracking(s.cg).short.register, @divExact(s.cg.target.ptrBitWidth(), 8)), + .none => .none, + }, + .scale = op.index.scale, + .disp = op.adjustedImm(s), + } }, } }, - else => |mcv| .{ .mem = try mcv.mem(s.cg, .{ .size = size }) }, - .register => |reg| .{ .reg = registerAlias(reg, @intCast(@divExact(size.bitSize(), 8))) }, + .mem => .{ .mem = try op.base.ref.deref(s).tracking(s.cg).short.mem(s.cg, .{ + .size = op.base.size, + .index = switch (op.index.ref) { + else => |ref| registerAlias(ref.deref(s).tracking(s.cg).short.register, @divExact(s.cg.target.ptrBitWidth(), 8)), + .none => .none, + }, + .scale = op.index.scale, + .disp = op.adjustedImm(s), + }) }, }; } }; }; -fn select2( +fn select( cg: *CodeGen, dst_temps: []Temp, dst_tys: []const Type, src_temps: []Temp, - cases: []const Select2.Case, + cases: []const Select.Case, ) !void { - cases: for (cases) |*case| { + cases: for (cases) |case| { for (case.required_features) |required_feature| if (required_feature) |feature| if (!switch (feature) { - .@"64bit" => cg.target.cpu.arch == .x86_64, + .@"64bit" => cg.target.ptrBitWidth() == 64, .mmx => false, else => cg.hasFeature(feature), }) continue :cases; - for (case.constraints[0..src_temps.len], src_temps) |src_constraint, src_temp| if (!src_constraint.accepts(src_temp, cg)) continue :cases; - patterns: for (case.patterns) |*pattern| { + for (case.dst_constraints[0..dst_temps.len], dst_tys) |dst_constraint, dst_ty| if (!dst_constraint.accepts(dst_ty, cg)) continue :cases; + for (case.src_constraints[0..src_temps.len], src_temps) |src_constraint, src_temp| if (!src_constraint.accepts(src_temp.typeOf(cg), cg)) continue :cases; + patterns: for (case.patterns) |pattern| { for (pattern.src, src_temps) |src_pattern, src_temp| if (!src_pattern.matches(src_temp, cg)) continue :patterns; - var s: Select2 = .{ + var s: Select = .{ .cg = cg, - .case = case, - .pattern = pattern, - .extra_temps = undefined, - .dst_temps = dst_temps, - .src_temps = src_temps, - .commute = pattern.commute, + .temps = undefined, + .labels = @splat(.{ .forward = @splat(null), .backward = null }), .limb = undefined, + .mask_limb = undefined, }; - for (&s.extra_temps, case.extra_temps) |*temp, spec| temp.* = try spec.create(s) orelse continue; + const tmp_slots = s.temps[@intFromEnum(Select.Operand.Ref.tmp0)..@intFromEnum(Select.Operand.Ref.dst0)]; + const dst_slots = s.temps[@intFromEnum(Select.Operand.Ref.dst0)..@intFromEnum(Select.Operand.Ref.src0)]; + const src_slots = s.temps[@intFromEnum(Select.Operand.Ref.src0)..@intFromEnum(Select.Operand.Ref.none)]; + + for (tmp_slots, case.extra_temps) |*slot, spec| slot.* = try spec.create(&s) orelse continue; while (true) for (pattern.src, src_temps) |src_pattern, *src_temp| { if (try src_pattern.convert(src_temp, cg)) break; } else break; + @memcpy(src_slots[0..src_temps.len], src_temps); + std.mem.swap(Temp, &src_slots[pattern.commute[0]], &src_slots[pattern.commute[1]]); if (case.clobbers.eflags or case.each != .once) try cg.spillEflagsIfOccupied(); for (dst_temps, dst_tys, case.dst_temps[0..dst_temps.len]) |*dst_temp, dst_ty, dst_kind| - dst_temp.* = (try Select2.TempSpec.create(.{ .type = dst_ty, .kind = dst_kind }, s)).?; + dst_temp.* = (try Select.TempSpec.create(.{ .type = dst_ty, .kind = dst_kind }, &s)).?; + @memcpy(dst_slots[0..dst_temps.len], dst_temps); switch (case.each) { .once => |body| for (body) |inst| try s.emit(inst), .limb => |limb| { - const limb_size, const limb_of_temp = limb.of.unwrap(s); - const limb_of_size: u31 = @intCast(limb_of_temp.typeOf(cg).abiSize(cg.pt.zcu)); + const limb_of_size: i32 = @intCast(limb.of.ref.deref(&s).typeOf(cg).abiSize(cg.pt.zcu)); s.limb = .{ - .size = limb_size, - .index = s.extra_temps[0].tracking(cg).short.register.to64(), + .size = limb.of.size, + .index = (try Select.Operand.tmp0p.lower(&s)).reg, .disp = limb_of_size, }; for (limb.header) |inst| try s.emit(inst); - try cg.asmRegisterImmediate(.{ ._, .mov }, s.limb.index, .s(-@as(i32, limb_of_size))); - const limb_loop_reloc: u32 = @intCast(cg.mir_instructions.len); + try s.emit(.{ ._, ._, .mov, .tmp0p, .i(-limb_of_size), ._, ._ }); + assert(s.labels[0].backward == null); + s.labels[0].backward = @intCast(cg.mir_instructions.len); for (limb.body) |inst| try s.emit(inst); - try cg.asmRegisterImmediate( - .{ ._, .add }, - s.limb.index, - .s(@intCast(@divExact(limb_size.bitSize(), 8))), - ); - _ = try cg.asmJccReloc(.nc, limb_loop_reloc); + try s.emit(.{ ._, ._, .add, .tmp0p, .i(@intCast(@divExact(limb.of.size.bitSize(cg.target), 8))), ._, ._ }); + try s.emit(.{ ._, ._nc, .j, .@"0b", ._, ._, ._ }); for (limb.trailer) |inst| try s.emit(inst); }, + .limb_and_mask_limb => |limb| { + const limb_of_size: i32 = @intCast(limb.of.ref.deref(&s).typeOf(cg).abiSize(cg.pt.zcu)); + s.limb = .{ + .size = limb.of.size, + .index = (try Select.Operand.tmp0p.lower(&s)).reg, + .disp = limb_of_size, + }; + const mask_limb_bit_size: u31 = @intCast(@divExact( + limb.of.size.bitSize(cg.target), + limb.of_mask.size.bitSize(cg.target), + )); + if (mask_limb_bit_size >= 8) { + s.mask_limb = .{ + .size = .fromBitSize(mask_limb_bit_size), + .index = (try Select.Operand.tmp1p.lower(&s)).reg, + }; + for (limb.header) |inst| try s.emit(inst); + try s.emit(.{ ._, ._, .mov, .tmp0p, .i(-limb_of_size), ._, ._ }); + try s.emit(.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }); + assert(s.labels[0].backward == null); + s.labels[0].backward = @intCast(cg.mir_instructions.len); + for (limb.body) |inst| try s.emit(inst); + try s.emit(.{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, @divExact(mask_limb_bit_size, 8)), ._, ._ }); + try s.emit(.{ ._, ._, .add, .tmp0p, .i(@intCast(@divExact(limb.of.size.bitSize(cg.target), 8))), ._, ._ }); + try s.emit(.{ ._, ._nc, .j, .@"0b", ._, ._, ._ }); + for (limb.trailer) |inst| try s.emit(inst); + } else { + for (limb.header) |inst| try s.emit(inst); + try s.emit(.{ ._, ._, .mov, .tmp0p, .i(-limb_of_size), ._, ._ }); + try s.emit(.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }); + try s.emit(.{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }); + assert(s.labels[0].backward == null); + s.labels[0].backward = @intCast(cg.mir_instructions.len); + for (limb.body) |inst| try s.emit(inst); + try s.emit(.{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, mask_limb_bit_size), ._, ._ }); + try s.emit(.{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }); + try s.emit(.{ ._, ._nz, .j, .@"1f", ._, ._, ._ }); + try s.emit(.{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }); + try s.emit(.{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }); + try s.emit(.{ ._, ._, .mov, .memid(.{ .ref = limb.of_mask.ref, .size = .byte }, .tmp3, -1), .tmp2b, ._, ._ }); + try s.emit(.{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }); + try s.emit(.{ .@"1:", ._, .add, .tmp0p, .i(@intCast(@divExact(limb.of.size.bitSize(cg.target), 8))), ._, ._ }); + try s.emit(.{ ._, ._nc, .j, .@"0b", ._, ._, ._ }); + try s.emit(.{ ._, ._, .lea, .tmp3d, .lead(.none, .tmp1, -1), ._, ._ }); + try s.emit(.{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }); + try s.emit(.{ ._, ._, .mov, .memi(.{ .ref = limb.of_mask.ref, .size = .byte }, .tmp3), .tmp2b, ._, ._ }); + for (limb.trailer) |inst| try s.emit(inst); + } + }, } + s.emitLabel(.@"0:"); - for (dst_temps, case.dst_temps[0..dst_temps.len]) |dst_temp, dst_kind| dst_kind.finish(dst_temp, s); - for (case.extra_temps, s.extra_temps) |spec, temp| if (spec.kind != .unused) try temp.die(cg); + for (dst_temps, case.dst_temps[0..dst_temps.len]) |dst_temp, dst_kind| dst_kind.finish(dst_temp, &s); + for (case.extra_temps, tmp_slots) |spec, temp| if (spec.kind != .unused) try temp.die(cg); return; } } - return error.Select2Failed; + return error.SelectFailed; } diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 4be9acf334..251cf7d7cd 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -30,9 +30,10 @@ pub fn findByMnemonic( prefix: Instruction.Prefix, mnemonic: Mnemonic, ops: []const Instruction.Operand, + target: *const std.Target, ) !?Encoding { - var input_ops = [1]Op{.none} ** 4; - for (input_ops[0..ops.len], ops) |*input_op, op| input_op.* = Op.fromOperand(op); + var input_ops: [4]Op = @splat(.none); + for (input_ops[0..ops.len], ops) |*input_op, op| input_op.* = Op.fromOperand(op, target); const rex_required = for (ops) |op| switch (op) { .reg => |r| switch (r) { @@ -57,6 +58,16 @@ pub fn findByMnemonic( var shortest_enc: ?Encoding = null; var shortest_len: ?usize = null; next: for (mnemonic_to_encodings_map[@intFromEnum(mnemonic)]) |data| { + if (!switch (data.feature) { + .none => true, + inline else => |tag| has_features: { + comptime var feature_it = std.mem.splitScalar(u8, @tagName(tag), ' '); + comptime var features: []const std.Target.x86.Feature = &.{}; + inline while (comptime feature_it.next()) |feature| features = features ++ .{@field(std.Target.x86.Feature, feature)}; + break :has_features std.Target.x86.featureSetHasAll(target.cpu.features, features[0..features.len].*); + }, + }) continue; + switch (data.mode) { .none, .short => if (rex_required) continue, .rex, .rex_short => if (!rex_required) continue, @@ -64,7 +75,7 @@ pub fn findByMnemonic( } for (input_ops, data.ops) |input_op, data_op| if (!input_op.isSubset(data_op)) continue :next; - const enc = Encoding{ .mnemonic = mnemonic, .data = data }; + const enc: Encoding = .{ .mnemonic = mnemonic, .data = data }; if (shortest_enc) |previous_shortest_enc| { const len = estimateInstructionLength(prefix, enc, ops); const previous_shortest_len = shortest_len orelse @@ -474,7 +485,7 @@ pub const Op = enum { ymm, ymm_m256, // zig fmt: on - pub fn fromOperand(operand: Instruction.Operand) Op { + pub fn fromOperand(operand: Instruction.Operand, target: *const std.Target) Op { return switch (operand) { .none => .none, @@ -516,7 +527,7 @@ pub const Op = enum { .mem => |mem| switch (mem) { .moffs => .moffs, - .sib, .rip => switch (mem.bitSize()) { + .sib, .rip => switch (mem.bitSize(target)) { 0 => .m, 8 => .m8, 16 => .m16, @@ -835,7 +846,7 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op var inst = Instruction{ .prefix = prefix, .encoding = encoding, - .ops = [1]Operand{.none} ** 4, + .ops = @splat(.none), }; @memcpy(inst.ops[0..ops.len], ops); @@ -850,7 +861,7 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op const mnemonic_to_encodings_map = init: { @setEvalBranchQuota(5_000); const mnemonic_count = @typeInfo(Mnemonic).@"enum".fields.len; - var mnemonic_map: [mnemonic_count][]Data = .{&.{}} ** mnemonic_count; + var mnemonic_map: [mnemonic_count][]Data = @splat(&.{}); const encodings = @import("encodings.zig"); for (encodings.table) |entry| mnemonic_map[@intFromEnum(entry[0])].len += 1; var data_storage: [encodings.table.len]Data = undefined; @@ -859,7 +870,7 @@ const mnemonic_to_encodings_map = init: { value.ptr = data_storage[storage_i..].ptr; storage_i += value.len; } - var mnemonic_i: [mnemonic_count]usize = .{0} ** mnemonic_count; + var mnemonic_i: [mnemonic_count]usize = @splat(0); const ops_len = @typeInfo(std.meta.FieldType(Data, .ops)).array.len; const opc_len = @typeInfo(std.meta.FieldType(Data, .opc)).array.len; for (encodings.table) |entry| { @@ -876,7 +887,7 @@ const mnemonic_to_encodings_map = init: { i.* += 1; } const final_storage = data_storage; - var final_map: [mnemonic_count][]const Data = .{&.{}} ** mnemonic_count; + var final_map: [mnemonic_count][]const Data = @splat(&.{}); storage_i = 0; for (&final_map, mnemonic_map) |*final_value, value| { final_value.* = final_storage[storage_i..][0..value.len]; diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index 015b3ba12e..e025f4ddbd 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -1,6 +1,7 @@ //! This file contains the functionality for lowering x86_64 MIR to Instructions bin_file: *link.File, +target: *const std.Target, output_mode: std.builtin.OutputMode, link_mode: std.builtin.LinkMode, pic: bool, @@ -193,7 +194,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .pseudo_probe_align_ri_s => { try lower.emit(.none, .@"test", &.{ .{ .reg = inst.data.ri.r1 }, - .{ .imm = Immediate.s(@bitCast(inst.data.ri.i)) }, + .{ .imm = .s(@bitCast(inst.data.ri.i)) }, }); try lower.emit(.none, .jz, &.{ .{ .imm = lower.reloc(.{ .inst = index + 1 }, 0) }, @@ -229,14 +230,14 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { } try lower.emit(.none, .sub, &.{ .{ .reg = inst.data.ri.r1 }, - .{ .imm = Immediate.s(@bitCast(inst.data.ri.i)) }, + .{ .imm = .s(@bitCast(inst.data.ri.i)) }, }); assert(lower.result_insts_len <= pseudo_probe_adjust_unrolled_max_insts); }, .pseudo_probe_adjust_setup_rri_s => { try lower.emit(.none, .mov, &.{ .{ .reg = inst.data.rri.r2.to32() }, - .{ .imm = Immediate.s(@bitCast(inst.data.rri.i)) }, + .{ .imm = .s(@bitCast(inst.data.rri.i)) }, }); try lower.emit(.none, .sub, &.{ .{ .reg = inst.data.rri.r1 }, @@ -255,7 +256,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { }); try lower.emit(.none, .sub, &.{ .{ .reg = inst.data.rr.r2 }, - .{ .imm = Immediate.s(page_size) }, + .{ .imm = .s(page_size) }, }); try lower.emit(.none, .jae, &.{ .{ .imm = lower.reloc(.{ .inst = index }, 0) }, @@ -355,7 +356,7 @@ pub fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate { .mi_s, .rmi_s, .pseudo_dbg_local_ai_s, - => Immediate.s(@bitCast(i)), + => .s(@bitCast(i)), .rrri, .rri_u, @@ -368,11 +369,11 @@ pub fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate { .rrm, .rrmi, .pseudo_dbg_local_ai_u, - => Immediate.u(i), + => .u(i), .ri_64, .pseudo_dbg_local_ai_64, - => Immediate.u(lower.mir.extraData(Mir.Imm64, i).data.decode()), + => .u(lower.mir.extraData(Mir.Imm64, i).data.decode()), else => unreachable, }; @@ -389,7 +390,7 @@ fn reloc(lower: *Lower, target: Reloc.Target, off: i32) Immediate { .off = off, }; lower.result_relocs_len += 1; - return Immediate.s(0); + return .s(0); } fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) Error!void { @@ -421,15 +422,15 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) try Instruction.new(.none, .lea, &[_]Operand{ .{ .reg = .rdi }, .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) }, - }); + }, lower.target); lower.result_insts_len += 1; _ = lower.reloc(.{ .linker_extern_fn = try elf_file.getGlobalSymbol("__tls_get_addr", null), }, 0); lower.result_insts[lower.result_insts_len] = try Instruction.new(.none, .call, &[_]Operand{ - .{ .imm = Immediate.s(0) }, - }); + .{ .imm = .s(0) }, + }, lower.target); lower.result_insts_len += 1; _ = lower.reloc(.{ .linker_dtpoff = sym_index }, 0); emit_mnemonic = .lea; @@ -443,7 +444,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) try Instruction.new(.none, .mov, &[_]Operand{ .{ .reg = .rax }, .{ .mem = Memory.initSib(.qword, .{ .base = .{ .reg = .fs } }) }, - }); + }, lower.target); lower.result_insts_len += 1; _ = lower.reloc(.{ .linker_reloc = sym_index }, 0); emit_mnemonic = .lea; @@ -467,7 +468,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) try Instruction.new(.none, .mov, &[_]Operand{ .{ .reg = reg.to64() }, .{ .mem = Memory.initRip(.qword, 0) }, - }); + }, lower.target); lower.result_insts_len += 1; break :op .{ .mem = Memory.initSib(mem_op.sib.ptr_size, .{ .base = .{ .reg = reg.to64(), @@ -482,7 +483,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) }) }, .lea => { emit_mnemonic = .mov; - break :op .{ .imm = Immediate.s(0) }; + break :op .{ .imm = .s(0) }; }, .mov => break :op .{ .mem = Memory.initSib(mem_op.sib.ptr_size, .{ .base = .{ .reg = .ds }, @@ -541,7 +542,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) }; } lower.result_insts[lower.result_insts_len] = - try Instruction.new(emit_prefix, emit_mnemonic, emit_ops); + try Instruction.new(emit_prefix, emit_mnemonic, emit_ops, lower.target); lower.result_insts_len += 1; } @@ -743,7 +744,7 @@ fn pushPopRegList(lower: *Lower, comptime mnemonic: Mnemonic, inst: Mir.Inst) Er while (it.next()) |i| { try lower.emit(.directive, .@".cfi_rel_offset", &.{ .{ .reg = callee_preserved_regs[i] }, - .{ .imm = Immediate.s(off) }, + .{ .imm = .s(off) }, }); off += 8; } diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 45ad2c13e9..19bacdcac6 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -214,6 +214,10 @@ pub const Inst = struct { p_q, /// Packed ___ Double Quadword p_dq, + /// ___ Aligned Packed Integer Values + _dqa, + /// ___ Unaligned Packed Integer Values + _dqu, /// ___ Scalar Single-Precision Values _ss, @@ -234,6 +238,10 @@ pub const Inst = struct { v_d, /// VEX-Encoded ___ QuadWord v_q, + /// VEX-Encoded ___ Aligned Packed Integer Values + v_dqa, + /// VEX-Encoded ___ Unaligned Packed Integer Values + v_dqu, /// VEX-Encoded ___ Integer Data v_i128, /// VEX-Encoded Packed ___ @@ -362,6 +370,8 @@ pub const Inst = struct { /// Move scalar double-precision floating-point value /// Move doubleword /// Move quadword + /// Move aligned packed integer values + /// Move unaligned packed integer values mov, /// Move data after swapping bytes movbe, @@ -609,10 +619,6 @@ pub const Inst = struct { cvttps2dq, /// Convert with truncation scalar double-precision floating-point value to doubleword integer cvttsd2si, - /// Move aligned packed integer values - movdqa, - /// Move unaligned packed integer values - movdqu, /// Packed interleave shuffle of quadruplets of single-precision floating-point values /// Packed interleave shuffle of pairs of double-precision floating-point values /// Shuffle packed doublewords diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig index 032a57a3d0..6d1ab76c5a 100644 --- a/src/arch/x86_64/bits.zig +++ b/src/arch/x86_64/bits.zig @@ -479,8 +479,8 @@ pub const RegisterOffset = struct { reg: Register, off: i32 = 0 }; pub const SymbolOffset = struct { sym_index: u32, off: i32 = 0 }; pub const Memory = struct { - base: Base, - mod: Mod, + base: Base = .none, + mod: Mod = .{ .rm = .{} }, pub const Base = union(enum(u2)) { none, @@ -503,7 +503,7 @@ pub const Memory = struct { off: u64, pub const Rm = struct { - size: Size, + size: Size = .none, index: Register = .none, scale: Scale = .@"1", disp: i32 = 0, @@ -512,6 +512,7 @@ pub const Memory = struct { pub const Size = enum(u4) { none, + ptr, byte, word, dword, @@ -548,9 +549,10 @@ pub const Memory = struct { }; } - pub fn bitSize(s: Size) u64 { + pub fn bitSize(s: Size, target: *const std.Target) u64 { return switch (s) { .none => 0, + .ptr => target.ptrBitWidth(), .byte => 8, .word => 16, .dword => 32, @@ -569,8 +571,11 @@ pub const Memory = struct { writer: anytype, ) @TypeOf(writer).Error!void { if (s == .none) return; - try writer.writeAll(@tagName(s)); - try writer.writeAll(" ptr"); + if (s != .ptr) { + try writer.writeAll(@tagName(s)); + try writer.writeByte(' '); + } + try writer.writeAll("ptr"); } }; diff --git a/src/arch/x86_64/encoder.zig b/src/arch/x86_64/encoder.zig index 81467de515..048fb6508d 100644 --- a/src/arch/x86_64/encoder.zig +++ b/src/arch/x86_64/encoder.zig @@ -167,11 +167,11 @@ pub const Instruction = struct { }; } - pub fn bitSize(mem: Memory) u64 { + pub fn bitSize(mem: Memory, target: *const std.Target) u64 { return switch (mem) { - .rip => |r| r.ptr_size.bitSize(), - .sib => |s| s.ptr_size.bitSize(), - .moffs => 64, + .rip => |r| r.ptr_size.bitSize(target), + .sib => |s| s.ptr_size.bitSize(target), + .moffs => target.ptrBitWidth(), }; } }; @@ -314,16 +314,21 @@ pub const Instruction = struct { } }; - pub fn new(prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) !Instruction { + pub fn new( + prefix: Prefix, + mnemonic: Mnemonic, + ops: []const Operand, + target: *const std.Target, + ) !Instruction { const encoding: Encoding = switch (prefix) { - else => (try Encoding.findByMnemonic(prefix, mnemonic, ops)) orelse { + else => (try Encoding.findByMnemonic(prefix, mnemonic, ops, target)) orelse { log.err("no encoding found for: {s} {s} {s} {s} {s} {s}", .{ @tagName(prefix), @tagName(mnemonic), - @tagName(if (ops.len > 0) Encoding.Op.fromOperand(ops[0]) else .none), - @tagName(if (ops.len > 1) Encoding.Op.fromOperand(ops[1]) else .none), - @tagName(if (ops.len > 2) Encoding.Op.fromOperand(ops[2]) else .none), - @tagName(if (ops.len > 3) Encoding.Op.fromOperand(ops[3]) else .none), + @tagName(if (ops.len > 0) Encoding.Op.fromOperand(ops[0], target) else .none), + @tagName(if (ops.len > 1) Encoding.Op.fromOperand(ops[1], target) else .none), + @tagName(if (ops.len > 2) Encoding.Op.fromOperand(ops[2], target) else .none), + @tagName(if (ops.len > 3) Encoding.Op.fromOperand(ops[3], target) else .none), }); return error.InvalidInstruction; }, @@ -332,10 +337,10 @@ pub const Instruction = struct { .data = .{ .op_en = .zo, .ops = .{ - if (ops.len > 0) Encoding.Op.fromOperand(ops[0]) else .none, - if (ops.len > 1) Encoding.Op.fromOperand(ops[1]) else .none, - if (ops.len > 2) Encoding.Op.fromOperand(ops[2]) else .none, - if (ops.len > 3) Encoding.Op.fromOperand(ops[3]) else .none, + if (ops.len > 0) Encoding.Op.fromOperand(ops[0], target) else .none, + if (ops.len > 1) Encoding.Op.fromOperand(ops[1], target) else .none, + if (ops.len > 2) Encoding.Op.fromOperand(ops[2], target) else .none, + if (ops.len > 3) Encoding.Op.fromOperand(ops[3], target) else .none, }, .opc_len = 0, .opc = undefined, diff --git a/src/link/Elf/Atom.zig b/src/link/Elf/Atom.zig index f0eb0dce3f..68cb154d3b 100644 --- a/src/link/Elf/Atom.zig +++ b/src/link/Elf/Atom.zig @@ -976,6 +976,7 @@ const x86_64 = struct { it: *RelocsIterator, ) !void { dev.check(.x86_64_backend); + const t = &elf_file.base.comp.root_mod.resolved_target.result; const is_static = elf_file.base.isStatic(); const is_dyn_lib = elf_file.isEffectivelyDynLib(); @@ -1046,7 +1047,7 @@ const x86_64 = struct { .GOTTPOFF => { const should_relax = blk: { if (is_dyn_lib or symbol.flags.import) break :blk false; - if (!x86_64.canRelaxGotTpOff(code.?[r_offset - 3 ..])) break :blk false; + if (!x86_64.canRelaxGotTpOff(code.?[r_offset - 3 ..], t)) break :blk false; break :blk true; }; if (!should_relax) { @@ -1090,6 +1091,7 @@ const x86_64 = struct { stream: anytype, ) (error{ InvalidInstruction, CannotEncode } || RelocError)!void { dev.check(.x86_64_backend); + const t = &elf_file.base.comp.root_mod.resolved_target.result; const diags = &elf_file.base.comp.link_diags; const r_type: elf.R_X86_64 = @enumFromInt(rel.r_type()); const r_offset = std.math.cast(usize, rel.r_offset) orelse return error.Overflow; @@ -1120,7 +1122,7 @@ const x86_64 = struct { .GOTPCRELX => { if (!target.flags.import and !target.isIFunc(elf_file) and !target.isAbs(elf_file)) blk: { - x86_64.relaxGotpcrelx(code[r_offset - 2 ..]) catch break :blk; + x86_64.relaxGotpcrelx(code[r_offset - 2 ..], t) catch break :blk; try cwriter.writeInt(i32, @as(i32, @intCast(S + A - P)), .little); return; } @@ -1129,7 +1131,7 @@ const x86_64 = struct { .REX_GOTPCRELX => { if (!target.flags.import and !target.isIFunc(elf_file) and !target.isAbs(elf_file)) blk: { - x86_64.relaxRexGotpcrelx(code[r_offset - 3 ..]) catch break :blk; + x86_64.relaxRexGotpcrelx(code[r_offset - 3 ..], t) catch break :blk; try cwriter.writeInt(i32, @as(i32, @intCast(S + A - P)), .little); return; } @@ -1184,7 +1186,7 @@ const x86_64 = struct { const S_ = target.tlsDescAddress(elf_file); try cwriter.writeInt(i32, @as(i32, @intCast(S_ + A - P)), .little); } else { - x86_64.relaxGotPcTlsDesc(code[r_offset - 3 ..]) catch { + x86_64.relaxGotPcTlsDesc(code[r_offset - 3 ..], t) catch { var err = try diags.addErrorWithNotes(1); try err.addMsg("could not relax {s}", .{@tagName(r_type)}); err.addNote("in {}:{s} at offset 0x{x}", .{ @@ -1208,7 +1210,7 @@ const x86_64 = struct { const S_ = target.gotTpAddress(elf_file); try cwriter.writeInt(i32, @as(i32, @intCast(S_ + A - P)), .little); } else { - x86_64.relaxGotTpOff(code[r_offset - 3 ..]); + x86_64.relaxGotTpOff(code[r_offset - 3 ..], t); try cwriter.writeInt(i32, @as(i32, @intCast(S - TP)), .little); } }, @@ -1269,31 +1271,31 @@ const x86_64 = struct { } } - fn relaxGotpcrelx(code: []u8) !void { + fn relaxGotpcrelx(code: []u8, t: *const std.Target) !void { dev.check(.x86_64_backend); const old_inst = disassemble(code) orelse return error.RelaxFailure; const inst = switch (old_inst.encoding.mnemonic) { .call => try Instruction.new(old_inst.prefix, .call, &.{ // TODO: hack to force imm32s in the assembler .{ .imm = Immediate.s(-129) }, - }), + }, t), .jmp => try Instruction.new(old_inst.prefix, .jmp, &.{ // TODO: hack to force imm32s in the assembler .{ .imm = Immediate.s(-129) }, - }), + }, t), else => return error.RelaxFailure, }; relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); - const nop = try Instruction.new(.none, .nop, &.{}); + const nop = try Instruction.new(.none, .nop, &.{}, t); try encode(&.{ nop, inst }, code); } - fn relaxRexGotpcrelx(code: []u8) !void { + fn relaxRexGotpcrelx(code: []u8, t: *const std.Target) !void { dev.check(.x86_64_backend); const old_inst = disassemble(code) orelse return error.RelaxFailure; switch (old_inst.encoding.mnemonic) { .mov => { - const inst = try Instruction.new(old_inst.prefix, .lea, &old_inst.ops); + const inst = try Instruction.new(old_inst.prefix, .lea, &old_inst.ops, t); relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); try encode(&.{inst}, code); }, @@ -1398,7 +1400,7 @@ const x86_64 = struct { } } - fn canRelaxGotTpOff(code: []const u8) bool { + fn canRelaxGotTpOff(code: []const u8, t: *const std.Target) bool { dev.check(.x86_64_backend); const old_inst = disassemble(code) orelse return false; switch (old_inst.encoding.mnemonic) { @@ -1406,7 +1408,7 @@ const x86_64 = struct { old_inst.ops[0], // TODO: hack to force imm32s in the assembler .{ .imm = Immediate.s(-129) }, - })) |inst| { + }, t)) |inst| { inst.encode(std.io.null_writer, .{}) catch return false; return true; } else |_| return false, @@ -1414,7 +1416,7 @@ const x86_64 = struct { } } - fn relaxGotTpOff(code: []u8) void { + fn relaxGotTpOff(code: []u8, t: *const std.Target) void { dev.check(.x86_64_backend); const old_inst = disassemble(code) orelse unreachable; switch (old_inst.encoding.mnemonic) { @@ -1423,7 +1425,7 @@ const x86_64 = struct { old_inst.ops[0], // TODO: hack to force imm32s in the assembler .{ .imm = Immediate.s(-129) }, - }) catch unreachable; + }, t) catch unreachable; relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); encode(&.{inst}, code) catch unreachable; }, @@ -1431,7 +1433,7 @@ const x86_64 = struct { } } - fn relaxGotPcTlsDesc(code: []u8) !void { + fn relaxGotPcTlsDesc(code: []u8, target: *const std.Target) !void { dev.check(.x86_64_backend); const old_inst = disassemble(code) orelse return error.RelaxFailure; switch (old_inst.encoding.mnemonic) { @@ -1440,7 +1442,7 @@ const x86_64 = struct { old_inst.ops[0], // TODO: hack to force imm32s in the assembler .{ .imm = Immediate.s(-129) }, - }); + }, target); relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); try encode(&.{inst}, code); }, diff --git a/test/behavior/x86_64/math.zig b/test/behavior/x86_64/math.zig index 031f1d9e1b..7860c945a0 100644 --- a/test/behavior/x86_64/math.zig +++ b/test/behavior/x86_64/math.zig @@ -264,11 +264,48 @@ fn testBinary(comptime op: anytype) !void { 0xed533d18f8657f3f, 0x1ddd7cd7f6bab957, }); - if (false) try testType(@Vector(1, u128), .{ + try testType(@Vector(1, u128), .{ 0x5f11e16b0ca3392f907a857881455d2e, }, .{ 0xf9142d73b408fd6955922f9fc147f7d7, }); + try testType(@Vector(2, u128), .{ + 0xee0fb41fabd805923fb21b5c658e3a87, + 0x2352e74aad6c58b3255ff0bba5aa6552, + }, .{ + 0x8d822f9fdd9cb9a5b43513b14419b224, + 0x1aef2a02704379e38ead4d53d69e4cc4, + }); + try testType(@Vector(4, u128), .{ + 0xc74437a4ea3bbbb193dbf0ea2f0c5281, + 0x039e4b1640868248780db1834a0027eb, + 0xb9e8bb34155b2b238da20331d08ff85b, + 0x863802d34a54c2e6aa71dd0f067c4904, + }, .{ + 0x7471bae24ff7b84ab107f86ba2b7d1e7, + 0x8f34c449d0576e682c20bda74aa6b6c9, + 0x1f34c3efa167b61c48c9d5ec01a1a93f, + 0x71c8318fcf3ddc7be058c73a52dce9e3, + }); + try testType(@Vector(8, u128), .{ + 0xbf2db71463037f55ee338431f902a906, + 0xb7ad317626655f38ab25ae30d8a1aa67, + 0x7d3c5a3ffaa607b5560d69ae3fcf7863, + 0x009a39a8badf8b628c686dc176aa1273, + 0x49dba3744c91304cc7bbbdab61b6c969, + 0x6ec664b624f7acf79ce69d80ed7bc85c, + 0xe02d7a303c0f00c39010f3b815547f1c, + 0xb13e1ee914616f58cffe6acd33d9b5c8, + }, .{ + 0x2f2d355a071942a7384f82ba72a945b8, + 0x61f151b3afec8cb7664f813cecf581d1, + 0x5bfbf5484f3a07f0eacc4739ff48af80, + 0x59c0abbf8d829cf525a87d5c9c41a38a, + 0xdad8b18eb680f0520ca49ebfb5842e22, + 0xa05adcaedd9057480b3ba0413d003cec, + 0x8b0b4a27fc94a0e90652d19bc755b63d, + 0xa858bce5ad0e48c13588a4e170e8667c, + }); } inline fn bitAnd(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs & rhs) { From a7efc56d8680bb51cc2488bbc0680b2fc080174f Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Thu, 26 Dec 2024 22:05:38 -0500 Subject: [PATCH 08/25] x86_64: the previous loop abstraction was too confusing The temp usage was non-obvious, and implicit instructions hard to reason about. --- src/arch/x86_64/CodeGen.zig | 1529 +++++++++++++++++++---------------- 1 file changed, 822 insertions(+), 707 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 4d7c51ffd0..6f3ec618fe 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2669,7 +2669,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, .unused, @@ -2677,13 +2677,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, - .each = .{ .limb = .{ - .of = .src0y, - .body = &.{ - .{ ._, .v_dqu, .mov, .tmp1y, .limb(.src0y), ._, ._ }, - .{ ._, .vp_, mir_tag, .tmp1y, .tmp1y, .limb(.src1y), ._ }, - .{ ._, .v_dqu, .mov, .limb(.dst0y), .tmp1y, ._, ._ }, - }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, + .{ ._, .vp_, mir_tag, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ }, + .{ ._, .v_dqu, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ .required_features = .{ .avx, null }, @@ -2691,7 +2691,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, .unused, @@ -2699,13 +2699,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, - .each = .{ .limb = .{ - .of = .src0y, - .body = &.{ - .{ ._, .v_pd, .movu, .tmp1y, .limb(.src0y), ._, ._ }, - .{ ._, .v_pd, mir_tag, .tmp1y, .tmp1y, .limb(.src1y), ._ }, - .{ ._, .v_pd, .movu, .limb(.dst0y), .tmp1y, ._, ._ }, - }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .v_pd, .movu, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, + .{ ._, .v_pd, mir_tag, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ }, + .{ ._, .v_pd, .movu, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ .required_features = .{ .avx, null }, @@ -2713,7 +2713,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, .unused, @@ -2721,13 +2721,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, - .each = .{ .limb = .{ - .of = .src0x, - .body = &.{ - .{ ._, .v_dqu, .mov, .tmp1x, .limb(.src0x), ._, ._ }, - .{ ._, .vp_, mir_tag, .tmp1x, .tmp1x, .limb(.src1x), ._ }, - .{ ._, .v_dqu, .mov, .limb(.dst0x), .tmp1x, ._, ._ }, - }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .vp_, mir_tag, .tmp1x, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._ }, + .{ ._, .v_dqu, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ .required_features = .{ .sse2, null }, @@ -2735,7 +2735,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, .unused, @@ -2743,13 +2743,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, - .each = .{ .limb = .{ - .of = .src0x, - .body = &.{ - .{ ._, ._dqu, .mov, .tmp1x, .limb(.src0x), ._, ._ }, - .{ ._, .p_, mir_tag, .tmp1x, .limb(.src1x), ._, ._ }, - .{ ._, ._dqu, .mov, .limb(.dst0x), .tmp1x, ._, ._ }, - }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._dqu, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_, mir_tag, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._dqu, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ .required_features = .{ .sse, null }, @@ -2757,7 +2757,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, .unused, @@ -2765,13 +2765,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, - .each = .{ .limb = .{ - .of = .src0x, - .body = &.{ - .{ ._, ._ps, .movu, .tmp1x, .limb(.src0x), ._, ._ }, - .{ ._, ._ps, mir_tag, .tmp1x, .limb(.src1x), ._, ._ }, - .{ ._, ._ps, .movu, .limb(.dst0x), .tmp1x, ._, ._ }, - }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._ps, .movu, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._ps, mir_tag, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._ps, .movu, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ .required_features = .{ .mmx, null }, @@ -2779,7 +2779,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .mmx } }, .unused, .unused, @@ -2787,20 +2787,20 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, - .each = .{ .limb = .{ - .of = .src0q, - .body = &.{ - .{ ._, ._q, .mov, .tmp1q, .limb(.src0q), ._, ._ }, - .{ ._, .p_, mir_tag, .tmp1q, .limb(.src1q), ._, ._ }, - .{ ._, ._q, .mov, .limb(.dst0q), .tmp1q, ._, ._ }, - }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._q, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_, mir_tag, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, + .{ ._, ._q, .mov, .memia(.dst0q, .tmp0, .add_size), .tmp1q, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .unused, .unused, @@ -2808,13 +2808,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, - .each = .{ .limb = .{ - .of = .src0p, - .body = &.{ - .{ ._, ._, .mov, .tmp1p, .limb(.src0p), ._, ._ }, - .{ ._, ._, mir_tag, .tmp1p, .limb(.src1p), ._, ._ }, - .{ ._, ._, .mov, .limb(.dst0p), .tmp1p, ._, ._ }, - }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, mir_tag, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .a(.tmp1, .add_size), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, } }, }) catch |err2| switch (err2) { @@ -3306,7 +3306,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .clobbers = .{ .eflags = true }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .unused, .unused, @@ -3341,32 +3341,38 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, .unused, }, .dst_temps = .{.mem}, - .each = .{ .limb_and_mask_limb = .{ - .of = .src0y, - .of_mask = .dst0b, - .body = switch (cc) { - else => unreachable, - .e => &.{ - .{ ._, .v_dqu, .mov, .tmp3y, .limb(.src0y), ._, ._ }, - .{ ._, .vp_b, .cmpeq, .tmp3y, .tmp3y, .limb(.src1y), ._ }, - .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, - .{ ._, ._, .mov, .maskLimb(.dst0d), .tmp2d, ._, ._ }, - }, - .ne => &.{ - .{ ._, .v_dqu, .mov, .tmp3y, .limb(.src0y), ._, ._ }, - .{ ._, .vp_b, .cmpeq, .tmp3y, .tmp3y, .limb(.src1y), ._ }, - .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, - .{ ._, ._, .not, .tmp2d, ._, ._, ._ }, - .{ ._, ._, .mov, .maskLimb(.dst0d), .tmp2d, ._, ._ }, - }, + .each = .{ .once = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, + .{ ._, .vp_b, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, + .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0d, .tmp1), .tmp2d, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, + .{ ._, .vp_b, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, + .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, + .{ ._, ._, .not, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0d, .tmp1), .tmp2d, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ @@ -3376,34 +3382,40 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, .unused, }, .dst_temps = .{.mem}, - .each = .{ .limb_and_mask_limb = .{ - .of = .src0y, - .of_mask = .dst0w, - .body = switch (cc) { - else => unreachable, - .e => &.{ - .{ ._, .v_dqu, .mov, .tmp3y, .limb(.src0y), ._, ._ }, - .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .limb(.src1y), ._ }, - .{ ._, .vp_b, .ackssw, .tmp3y, .tmp3y, .tmp3y, ._ }, - .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, - .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ }, - }, - .ne => &.{ - .{ ._, .v_dqu, .mov, .tmp3y, .limb(.src0y), ._, ._ }, - .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .limb(.src1y), ._ }, - .{ ._, .vp_b, .ackssw, .tmp3y, .tmp3y, .tmp3y, ._ }, - .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, - .{ ._, ._, .not, .tmp2w, ._, ._, ._ }, - .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ }, - }, + .each = .{ .once = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, + .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, + .{ ._, .vp_b, .ackssw, .tmp3y, .tmp3y, .tmp3y, ._ }, + .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, + .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, + .{ ._, .vp_b, .ackssw, .tmp3y, .tmp3y, .tmp3y, ._ }, + .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, + .{ ._, ._, .not, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ @@ -3413,32 +3425,38 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, .unused, }, .dst_temps = .{.mem}, - .each = .{ .limb_and_mask_limb = .{ - .of = .src0y, - .of_mask = .dst0d, - .body = switch (cc) { - else => unreachable, - .e => &.{ - .{ ._, .v_dqu, .mov, .tmp3y, .limb(.src0y), ._, ._ }, - .{ ._, .vp_d, .cmpeq, .tmp3y, .tmp3y, .limb(.src1y), ._ }, - .{ ._, .v_ps, .movmsk, .tmp2d, .tmp3y, ._, ._ }, - .{ ._, ._, .mov, .maskLimb(.dst0b), .tmp2b, ._, ._ }, - }, - .ne => &.{ - .{ ._, .v_dqu, .mov, .tmp3y, .limb(.src0y), ._, ._ }, - .{ ._, .vp_d, .cmpeq, .tmp3y, .tmp3y, .limb(.src1y), ._ }, - .{ ._, .v_ps, .movmsk, .tmp2d, .tmp3y, ._, ._ }, - .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, - .{ ._, ._, .mov, .maskLimb(.dst0b), .tmp2b, ._, ._ }, - }, + .each = .{ .once = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, + .{ ._, .vp_d, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, + .{ ._, .v_ps, .movmsk, .tmp2d, .tmp3y, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, + .{ ._, .vp_d, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, + .{ ._, .v_ps, .movmsk, .tmp2d, .tmp3y, ._, ._ }, + .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ @@ -3448,34 +3466,64 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .reg = .rcx } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .reg = .rcx } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, }, .dst_temps = .{.mem}, - .each = .{ .limb_and_mask_limb = .{ - .of = .src0y, - .of_mask = .dst0q, - .body = switch (cc) { - else => unreachable, - .e => &.{ - .{ ._, .v_dqu, .mov, .tmp4y, .limb(.src0y), ._, ._ }, - .{ ._, .vp_q, .cmpeq, .tmp4y, .tmp4y, .limb(.src1y), ._ }, - .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4y, ._, ._ }, - .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, - }, - .ne => &.{ - .{ ._, .v_dqu, .mov, .tmp4y, .limb(.src0y), ._, ._ }, - .{ ._, .vp_q, .cmpeq, .tmp4y, .tmp4y, .limb(.src1y), ._ }, - .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4y, ._, ._ }, - .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ }, - .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, - }, + .each = .{ .once = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp4y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, + .{ ._, .vp_q, .cmpeq, .tmp4y, .tmp4y, .memia(.src1y, .tmp0, .add_size), ._ }, + .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4y, ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp4y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, + .{ ._, .vp_q, .cmpeq, .tmp4y, .tmp4y, .memia(.src1y, .tmp0, .add_size), ._ }, + .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4y, ._, ._ }, + .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, } }, }, .{ @@ -3485,32 +3533,38 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, .unused, }, .dst_temps = .{.mem}, - .each = .{ .limb_and_mask_limb = .{ - .of = .src0x, - .of_mask = .dst0b, - .body = switch (cc) { - else => unreachable, - .e => &.{ - .{ ._, .v_dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ }, - .{ ._, .vp_b, .cmpeq, .tmp3x, .tmp3x, .limb(.src1x), ._ }, - .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, - .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ }, - }, - .ne => &.{ - .{ ._, .v_dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ }, - .{ ._, .vp_b, .cmpeq, .tmp3x, .tmp3x, .limb(.src1x), ._ }, - .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, - .{ ._, ._, .not, .tmp2w, ._, ._, ._ }, - .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ }, - }, + .each = .{ .once = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .vp_b, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ }, + .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .vp_b, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ }, + .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, + .{ ._, ._, .not, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ @@ -3520,34 +3574,40 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, .unused, }, .dst_temps = .{.mem}, - .each = .{ .limb_and_mask_limb = .{ - .of = .src0x, - .of_mask = .dst0w, - .body = switch (cc) { - else => unreachable, - .e => &.{ - .{ ._, .v_dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ }, - .{ ._, .vp_w, .cmpeq, .tmp3x, .tmp3x, .limb(.src1x), ._ }, - .{ ._, .vp_b, .ackssw, .tmp3x, .tmp3x, .tmp3x, ._ }, - .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, - .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2b, ._, ._ }, - }, - .ne => &.{ - .{ ._, .v_dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ }, - .{ ._, .vp_w, .cmpeq, .tmp3x, .tmp3x, .limb(.src1x), ._ }, - .{ ._, .vp_b, .ackssw, .tmp3x, .tmp3x, .tmp3x, ._ }, - .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, - .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, - .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2b, ._, ._ }, - }, + .each = .{ .once = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .vp_w, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ }, + .{ ._, .vp_b, .ackssw, .tmp3x, .tmp3x, .tmp3x, ._ }, + .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .vp_w, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ }, + .{ ._, .vp_b, .ackssw, .tmp3x, .tmp3x, .tmp3x, ._ }, + .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, + .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ @@ -3557,34 +3617,64 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .reg = .rcx } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .reg = .rcx } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, }, .dst_temps = .{.mem}, - .each = .{ .limb_and_mask_limb = .{ - .of = .src0x, - .of_mask = .dst0d, - .body = switch (cc) { - else => unreachable, - .e => &.{ - .{ ._, .v_dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ }, - .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .limb(.src1x), ._ }, - .{ ._, .v_ps, .movmsk, .tmp3d, .tmp4x, ._, ._ }, - .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, - }, - .ne => &.{ - .{ ._, .v_dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ }, - .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .limb(.src1x), ._ }, - .{ ._, .v_ps, .movmsk, .tmp3d, .tmp4x, ._, ._ }, - .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ }, - .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, - }, + .each = .{ .once = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .vp_d, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ }, + .{ ._, .v_ps, .movmsk, .tmp3d, .tmp4x, ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .vp_d, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ }, + .{ ._, .v_ps, .movmsk, .tmp3d, .tmp4x, ._, ._ }, + .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, } }, }, .{ @@ -3594,34 +3684,64 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .reg = .rcx } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .reg = .rcx } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, }, .dst_temps = .{.mem}, - .each = .{ .limb_and_mask_limb = .{ - .of = .src0x, - .of_mask = .dst0q, - .body = switch (cc) { - else => unreachable, - .e => &.{ - .{ ._, .v_dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ }, - .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .limb(.src1x), ._ }, - .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4x, ._, ._ }, - .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, - }, - .ne => &.{ - .{ ._, .v_dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ }, - .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .limb(.src1x), ._ }, - .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4x, ._, ._ }, - .{ ._, ._, .xor, .tmp3b, .i(0b11), ._, ._ }, - .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, - }, + .each = .{ .once = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ }, + .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4x, ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ }, + .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4x, ._, ._ }, + .{ ._, ._, .xor, .tmp3b, .i(0b11), ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, } }, }, .{ @@ -3631,32 +3751,38 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, .unused, }, .dst_temps = .{.mem}, - .each = .{ .limb_and_mask_limb = .{ - .of = .src0x, - .of_mask = .dst0b, - .body = switch (cc) { - else => unreachable, - .e => &.{ - .{ ._, ._dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ }, - .{ ._, .p_b, .cmpeq, .tmp3x, .limb(.src1x), ._, ._ }, - .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, - .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ }, - }, - .ne => &.{ - .{ ._, ._dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ }, - .{ ._, .p_b, .cmpeq, .tmp3x, .limb(.src1x), ._, ._ }, - .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, - .{ ._, ._, .not, .tmp2w, ._, ._, ._ }, - .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ }, - }, + .each = .{ .once = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_b, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_b, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, + .{ ._, ._, .not, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ @@ -3666,34 +3792,40 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, .unused, }, .dst_temps = .{.mem}, - .each = .{ .limb_and_mask_limb = .{ - .of = .src0x, - .of_mask = .dst0w, - .body = switch (cc) { - else => unreachable, - .e => &.{ - .{ ._, ._dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ }, - .{ ._, .p_w, .cmpeq, .tmp3x, .limb(.src1x), ._, ._ }, - .{ ._, .p_b, .ackssw, .tmp3x, .tmp3x, ._, ._ }, - .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, - .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2b, ._, ._ }, - }, - .ne => &.{ - .{ ._, ._dqu, .mov, .tmp3x, .limb(.src0x), ._, ._ }, - .{ ._, .p_w, .cmpeq, .tmp3x, .limb(.src1x), ._, ._ }, - .{ ._, .p_b, .ackssw, .tmp3x, .tmp3x, ._, ._ }, - .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, - .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, - .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2b, ._, ._ }, - }, + .each = .{ .once = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_w, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_b, .ackssw, .tmp3x, .tmp3x, ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_w, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_b, .ackssw, .tmp3x, .tmp3x, ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, + .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ @@ -3703,34 +3835,64 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .reg = .rcx } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .reg = .rcx } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, }, .dst_temps = .{.mem}, - .each = .{ .limb_and_mask_limb = .{ - .of = .src0x, - .of_mask = .dst0d, - .body = switch (cc) { - else => unreachable, - .e => &.{ - .{ ._, ._dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ }, - .{ ._, .p_q, .cmpeq, .tmp4x, .limb(.src1x), ._, ._ }, - .{ ._, ._ps, .movmsk, .tmp3d, .tmp4x, ._, ._ }, - .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, - }, - .ne => &.{ - .{ ._, ._dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ }, - .{ ._, .p_d, .cmpeq, .tmp4x, .limb(.src1x), ._, ._ }, - .{ ._, ._ps, .movmsk, .tmp3d, .tmp4x, ._, ._ }, - .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ }, - .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, - }, + .each = .{ .once = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_d, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._ps, .movmsk, .tmp3d, .tmp4x, ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_d, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._ps, .movmsk, .tmp3d, .tmp4x, ._, ._ }, + .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, } }, }, .{ @@ -3740,143 +3902,249 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .reg = .rcx } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .reg = .rcx } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .unused, }, .dst_temps = .{.mem}, - .each = .{ .limb_and_mask_limb = .{ - .of = .src0x, - .of_mask = .dst0q, - .body = switch (cc) { - else => unreachable, - .e => &.{ - .{ ._, ._dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ }, - .{ ._, .p_q, .cmpeq, .tmp4x, .limb(.src1x), ._, ._ }, - .{ ._, ._pd, .movmsk, .tmp3d, .tmp4x, ._, ._ }, - .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, - }, - .ne => &.{ - .{ ._, ._dqu, .mov, .tmp4x, .limb(.src0x), ._, ._ }, - .{ ._, .p_q, .cmpeq, .tmp4x, .limb(.src1x), ._, ._ }, - .{ ._, ._pd, .movmsk, .tmp3d, .tmp4x, ._, ._ }, - .{ ._, ._, .xor, .tmp3b, .i(0b11), ._, ._ }, - .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, - }, + .each = .{ .once = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_q, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._pd, .movmsk, .tmp3d, .tmp4x, ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_q, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._pd, .movmsk, .tmp3d, .tmp4x, ._, ._ }, + .{ ._, ._, .xor, .tmp3b, .i(0b11), ._, ._ }, + .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, } }, }, .{ - .required_features = .{ .mmx, null }, + .required_features = .{ .sse, .mmx }, .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .rc = .mmx } }, - .unused, - .unused, - }, - .dst_temps = .{.mem}, - .each = .{ .limb_and_mask_limb = .{ - .of = .src0q, - .of_mask = .dst0b, - .body = switch (cc) { - else => unreachable, - .e => &.{ - .{ ._, ._dqu, .mov, .tmp3q, .limb(.src0q), ._, ._ }, - .{ ._, .p_b, .cmpeq, .tmp3q, .limb(.src1q), ._, ._ }, - .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ }, - .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ }, - }, - .ne => &.{ - .{ ._, ._dqu, .mov, .tmp3q, .limb(.src0q), ._, ._ }, - .{ ._, .p_b, .cmpeq, .tmp3q, .limb(.src1q), ._, ._ }, - .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ }, - .{ ._, ._, .not, .tmp2w, ._, ._, ._ }, - .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2w, ._, ._ }, - }, - }, - } }, - }, .{ - .required_features = .{ .mmx, null }, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .mmx } }, .unused, .unused, }, .dst_temps = .{.mem}, - .each = .{ .limb_and_mask_limb = .{ - .of = .src0q, - .of_mask = .dst0w, - .body = switch (cc) { - else => unreachable, - .e => &.{ - .{ ._, ._dqu, .mov, .tmp3q, .limb(.src0q), ._, ._ }, - .{ ._, .p_w, .cmpeq, .tmp3q, .limb(.src1q), ._, ._ }, - .{ ._, .p_b, .ackssw, .tmp3q, .tmp3q, ._, ._ }, - .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ }, - .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2b, ._, ._ }, - }, - .ne => &.{ - .{ ._, ._dqu, .mov, .tmp3q, .limb(.src0q), ._, ._ }, - .{ ._, .p_w, .cmpeq, .tmp3q, .limb(.src1q), ._, ._ }, - .{ ._, .p_b, .ackssw, .tmp3q, .tmp3q, ._, ._ }, - .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ }, - .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, - .{ ._, ._, .mov, .maskLimb(.dst0w), .tmp2b, ._, ._ }, - }, + .each = .{ .once = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ .@"0:", ._q, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_b, .cmpeq, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ .@"0:", ._q, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_b, .cmpeq, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ }, + .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ - .required_features = .{ .mmx, null }, + .required_features = .{ .sse, .mmx }, + .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .mmx } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .mmx } }, + }, + .dst_temps = .{.mem}, + .each = .{ .once = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ }, + .{ .@"0:", ._q, .mov, .tmp5q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_w, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ }, + .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ }, + .{ .@"0:", ._q, .mov, .tmp5q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_w, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ }, + .{ ._, ._, .xor, .tmp4b, .i(0b1111), ._, ._ }, + .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ }, + }, + } }, + }, .{ + .required_features = .{ .sse, .mmx }, .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .reg = .rcx } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .reg = .rcx } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .mmx } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .mmx } }, - .unused, }, .dst_temps = .{.mem}, - .each = .{ .limb_and_mask_limb = .{ - .of = .src0q, - .of_mask = .dst0d, - .body = switch (cc) { - else => unreachable, - .e => &.{ - .{ ._, ._dqu, .mov, .tmp4q, .limb(.src0q), ._, ._ }, - .{ ._, .p_q, .cmpeq, .tmp4q, .limb(.src1q), ._, ._ }, - .{ ._, ._ps, .movmsk, .tmp3d, .tmp4q, ._, ._ }, - .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, - }, - .ne => &.{ - .{ ._, ._dqu, .mov, .tmp4q, .limb(.src0q), ._, ._ }, - .{ ._, .p_q, .cmpeq, .tmp4q, .limb(.src1q), ._, ._ }, - .{ ._, ._ps, .movmsk, .tmp3d, .tmp4q, ._, ._ }, - .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ }, - .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, - }, + .each = .{ .once = switch (cc) { + else => unreachable, + .e => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ }, + .{ .@"0:", ._q, .mov, .tmp5q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_d, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_w, .ackssd, .tmp5q, .tmp3q, ._, ._ }, + .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ }, + .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ }, + }, + .ne => &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ }, + .{ .@"0:", ._q, .mov, .tmp5q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_d, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_w, .ackssd, .tmp5q, .tmp3q, ._, ._ }, + .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ }, + .{ ._, ._, .xor, .tmp4b, .i(0b11), ._, ._ }, + .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ }, + .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ }, + .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ }, }, } }, }, .{ @@ -3887,7 +4155,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, .unused, @@ -3915,7 +4183,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, .unused, @@ -3944,7 +4212,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, .unused, @@ -3972,7 +4240,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, .unused, @@ -4001,7 +4269,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .unused, @@ -4029,7 +4297,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .unused, @@ -4058,7 +4326,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, .unused, @@ -4087,7 +4355,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, .unused, @@ -4185,7 +4453,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, @@ -4214,7 +4482,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, @@ -4244,7 +4512,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, @@ -4273,7 +4541,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, @@ -4303,7 +4571,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, @@ -4332,7 +4600,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, @@ -4362,7 +4630,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, @@ -4392,7 +4660,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, @@ -4493,7 +4761,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, @@ -4523,7 +4791,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, @@ -4553,7 +4821,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, @@ -4582,7 +4850,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, @@ -4612,7 +4880,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, @@ -4642,7 +4910,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, @@ -4672,7 +4940,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, @@ -4702,7 +4970,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, @@ -4803,7 +5071,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u32, .kind = .{ .reg = .ecx } }, .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, @@ -4822,15 +5090,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._l, .sh, .tmp3p, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2p, .tmp3p, ._, ._ }, .{ ._, ._, .add, .tmp1d, .i(1), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .ia(-1, .tmp2, .add_bit_size), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .ia(-1, .none, .add_ptr_bit_size), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, - .{ ._, ._, .mov, .memia(.dst0p, .tmp3, .sub_access_size), .tmp2p, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0p, .tmp3, .sub_ptr_size), .tmp2p, ._, ._ }, .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ .@"1:", ._, .add, .tmp0p, .i(1), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .ia(-1, .tmp2, .add_bit_size), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .ia(-1, .none, .add_ptr_bit_size), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, @@ -4842,7 +5110,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u32, .kind = .{ .reg = .ecx } }, .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, @@ -4861,15 +5129,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._l, .sh, .tmp3p, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2p, .tmp3p, ._, ._ }, .{ ._, ._, .inc, .tmp1d, ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .ia(-1, .tmp2, .add_bit_size), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .ia(-1, .none, .add_ptr_bit_size), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, - .{ ._, ._, .mov, .memia(.dst0p, .tmp3, .sub_access_size), .tmp2p, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0p, .tmp3, .sub_ptr_size), .tmp2p, ._, ._ }, .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ .@"1:", ._, .inc, .tmp0p, ._, ._, ._ }, .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .ia(-1, .tmp2, .add_bit_size), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .ia(-1, .none, .add_ptr_bit_size), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, @@ -5040,8 +5308,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .bool, .kind = .{ .rc = .general_purpose } }, .unused, .unused, .unused, @@ -5049,18 +5317,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .limb = .{ - .of = .src0p, - .header = &.{ - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - }, - .body = &.{ - .{ ._, ._, .mov, .tmp0p, .limb(.src0p), ._, ._ }, - .{ ._, ._r, .sh, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .sbb, .tmp0p, .limb(.src1p), ._, ._ }, - .{ ._, ._c, .set, .tmp1b, ._, ._, ._ }, - .{ ._, .fromCondition(cc), .set, .dst0b, ._, ._, ._ }, - }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._r, .sh, .tmp1b, .i(1), ._, ._ }, + .{ ._, ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .sbb, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ }, + .{ ._, ._c, .set, .tmp1b, ._, ._, ._ }, + .{ ._, .fromCondition(cc), .set, .dst0b, ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .a(.tmp1, .add_size), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, } }, }) catch |err| switch (err) { @@ -5296,7 +5562,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .{ .kind = .{ .rc = .sse } }, .unused, @@ -5304,19 +5570,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, - .each = .{ .limb = .{ - .of = .src0y, - .header = &.{ - .{ ._, .vp_, .xor, .tmp2y, .tmp2y, .tmp2y, ._ }, - }, - .body = &.{ - .{ ._, .v_dqu, .mov, .tmp1y, .limb(.src0y), ._, ._ }, - .{ ._, .vp_, .xor, .tmp1y, .tmp1y, .limb(.src1y), ._ }, - .{ ._, .vp_, .@"or", .tmp2y, .tmp2y, .tmp1y, ._ }, - }, - .trailer = &.{ - .{ ._, .vp_, .@"test", .tmp2y, .tmp2y, ._, ._ }, - }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, .vp_, .xor, .tmp1y, .tmp1y, .tmp1y, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, + .{ ._, .vp_, .xor, .tmp2y, .tmp2y, .memia(.src1y, .tmp0, .add_size), ._ }, + .{ ._, .vp_, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ }, + .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ }, } }, }, .{ .required_features = .{ .avx, null }, @@ -5324,7 +5586,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .{ .kind = .{ .rc = .sse } }, .unused, @@ -5332,19 +5594,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, - .each = .{ .limb = .{ - .of = .src0y, - .header = &.{ - .{ ._, .v_pd, .xor, .tmp2y, .tmp2y, .tmp2y, ._ }, - }, - .body = &.{ - .{ ._, .v_pd, .movu, .tmp1y, .limb(.src0y), ._, ._ }, - .{ ._, .v_pd, .xor, .tmp1y, .tmp1y, .limb(.src1y), ._ }, - .{ ._, .v_pd, .@"or", .tmp2y, .tmp2y, .tmp1y, ._ }, - }, - .trailer = &.{ - .{ ._, .vp_, .@"test", .tmp2y, .tmp2y, ._, ._ }, - }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, .v_pd, .xor, .tmp1y, .tmp1y, .tmp1y, ._ }, + .{ .@"0:", .v_pd, .movu, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, + .{ ._, .v_pd, .xor, .tmp2y, .tmp2y, .memia(.src1y, .tmp0, .add_size), ._ }, + .{ ._, .v_pd, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ }, + .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ }, } }, }, .{ .required_features = .{ .avx, null }, @@ -5352,7 +5610,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .{ .kind = .{ .rc = .sse } }, .unused, @@ -5360,47 +5618,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, - .each = .{ .limb = .{ - .of = .src0x, - .header = &.{ - .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .tmp2x, ._ }, - }, - .body = &.{ - .{ ._, .v_dqu, .mov, .tmp1x, .limb(.src0x), ._, ._ }, - .{ ._, .vp_, .xor, .tmp1x, .tmp1x, .limb(.src1x), ._ }, - .{ ._, .vp_, .@"or", .tmp2x, .tmp2x, .tmp1x, ._ }, - }, - .trailer = &.{ - .{ ._, .vp_, .@"test", .tmp2x, .tmp2x, ._, ._ }, - }, - } }, - }, .{ - .required_features = .{ .avx, null }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .rc = .sse } }, - .{ .kind = .{ .rc = .sse } }, - .unused, - .unused, - .unused, - }, - .dst_temps = .{.{ .cc = cc }}, - .each = .{ .limb = .{ - .of = .src0x, - .header = &.{ - .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .tmp2x, ._ }, - }, - .body = &.{ - .{ ._, .v_dqu, .mov, .tmp1x, .limb(.src0x), ._, ._ }, - .{ ._, .vp_, .xor, .tmp1x, .tmp1x, .limb(.src1x), ._ }, - .{ ._, .vp_, .@"or", .tmp2x, .tmp2x, .tmp1x, ._ }, - }, - .trailer = &.{ - .{ ._, .vp_, .@"test", .tmp2x, .tmp2x, ._, ._ }, - }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, .vp_, .xor, .tmp1x, .tmp1x, .tmp1x, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._ }, + .{ ._, .vp_, .@"or", .tmp1x, .tmp1x, .tmp2x, ._ }, + .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .vp_, .@"test", .tmp1x, .tmp1x, ._, ._ }, } }, }, .{ .required_features = .{ .sse4_1, null }, @@ -5408,7 +5634,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .{ .kind = .{ .rc = .sse } }, .unused, @@ -5416,19 +5642,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, - .each = .{ .limb = .{ - .of = .src0x, - .header = &.{ - .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ }, - }, - .body = &.{ - .{ ._, ._dqu, .mov, .tmp1x, .limb(.src0x), ._, ._ }, - .{ ._, .p_, .xor, .tmp1x, .limb(.src1x), ._, ._ }, - .{ ._, .p_, .@"or", .tmp2x, .tmp1x, ._, ._ }, - }, - .trailer = &.{ - .{ ._, .p_, .@"test", .tmp2x, .tmp2x, ._, ._ }, - }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, + .{ .@"0:", ._dqu, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_, .xor, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_, .@"or", .tmp1x, .tmp2x, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .p_, .@"test", .tmp1x, .tmp1x, ._, ._ }, } }, }, .{ .required_features = .{ .sse2, null }, @@ -5436,7 +5658,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, .{ .kind = .{ .rc = .sse } }, .unused, @@ -5444,22 +5666,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, - .each = .{ .limb = .{ - .of = .src0x, - .header = &.{ - .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ }, - }, - .body = &.{ - .{ ._, ._dqu, .mov, .tmp1x, .limb(.src0x), ._, ._ }, - .{ ._, .p_, .xor, .tmp1x, .limb(.src1x), ._, ._ }, - .{ ._, .p_, .@"or", .tmp2x, .tmp1x, ._, ._ }, - }, - .trailer = &.{ - .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, - .{ ._, .p_b, .cmpeq, .tmp2x, .tmp1x, ._, ._ }, - .{ ._, .p_b, .movmsk, .tmp0d, .tmp2x, ._, ._ }, - .{ ._, ._, .xor, .tmp0d, .i(0xffff), ._, ._ }, - }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, + .{ .@"0:", ._dqu, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_, .xor, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_, .@"or", .tmp1x, .tmp2x, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ }, + .{ ._, .p_b, .cmpeq, .tmp1x, .tmp2x, ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp0d, .tmp1x, ._, ._ }, + .{ ._, ._, .cmp, .tmp0d, .i(0xffff), ._, ._ }, } }, }, .{ .required_features = .{ .sse, .mmx }, @@ -5467,7 +5685,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .mmx } }, .{ .kind = .{ .rc = .mmx } }, .unused, @@ -5475,29 +5693,25 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, - .each = .{ .limb = .{ - .of = .src0q, - .header = &.{ - .{ ._, .p_, .xor, .tmp2q, .tmp2q, ._, ._ }, - }, - .body = &.{ - .{ ._, ._q, .mov, .tmp1q, .limb(.src0q), ._, ._ }, - .{ ._, .p_, .xor, .tmp1q, .limb(.src1q), ._, ._ }, - .{ ._, .p_, .@"or", .tmp2q, .tmp1q, ._, ._ }, - }, - .trailer = &.{ - .{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ }, - .{ ._, .p_b, .cmpeq, .tmp2q, .tmp1q, ._, ._ }, - .{ ._, .p_b, .movmsk, .tmp0d, .tmp2q, ._, ._ }, - .{ ._, ._, .xor, .tmp0d, .i(0xff), ._, ._ }, - }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ }, + .{ .@"0:", ._q, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_, .xor, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_, .@"or", .tmp1q, .tmp2q, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .p_, .xor, .tmp2q, .tmp2q, ._, ._ }, + .{ ._, .p_b, .cmpeq, .tmp1q, .tmp2q, ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp0d, .tmp1q, ._, ._ }, + .{ ._, ._, .cmp, .tmp0d, .i(0xff), ._, ._ }, } }, }, .{ .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .unused, @@ -5505,19 +5719,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, - .each = .{ .limb = .{ - .of = .src0p, - .header = &.{ - .{ ._, ._, .xor, .tmp2p, .tmp2p, ._, ._ }, - }, - .body = &.{ - .{ ._, ._, .mov, .tmp1p, .limb(.src0p), ._, ._ }, - .{ ._, ._, .xor, .tmp1p, .limb(.src1p), ._, ._ }, - .{ ._, ._, .@"or", .tmp2p, .tmp1p, ._, ._ }, - }, - .trailer = &.{ - .{ ._, ._, .@"test", .tmp2p, .tmp2p, ._, ._ }, - }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1p, .tmp1p, ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2p, .memia(.src0p, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .xor, .tmp2p, .memia(.src1p, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .@"or", .tmp1p, .tmp2p, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .a(.tmp2, .add_size), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp1p, .tmp1p, ._, ._ }, } }, } }, }) catch |err| switch (err) { @@ -24063,8 +24273,6 @@ const Select = struct { backward: ?Mir.Inst.Index, forward: [1]?Mir.Inst.Index, }, - limb: Memory.Mod.Rm, - mask_limb: Memory.Mod.Rm, fn emitLabel(s: *Select, label_index: Label) void { if (label_index == ._) return; @@ -24112,23 +24320,6 @@ const Select = struct { dst_temps: [@intFromEnum(Select.Operand.Ref.src0) - @intFromEnum(Select.Operand.Ref.dst0)]TempSpec.Kind = @splat(.unused), each: union(enum) { once: []const Instruction, - limb: struct { - of: Select.Operand.Ref.Sized, - header: []const Instruction = &.{}, - first: ?[]const Instruction = null, - body: []const Instruction, - last: ?[]const Instruction = null, - trailer: []const Instruction = &.{}, - }, - limb_and_mask_limb: struct { - of: Select.Operand.Ref.Sized, - of_mask: Select.Operand.Ref.Sized, - header: []const Instruction = &.{}, - first: ?[]const Instruction = null, - body: []const Instruction, - last: ?[]const Instruction = null, - trailer: []const Instruction = &.{}, - }, }, }; @@ -24349,26 +24540,20 @@ const Select = struct { backward_label, forward_label, ref, - limb, - mask_limb, simm, lea, mem, }; const Adjust = enum { none, - add_access_size, - sub_access_size, + add_ptr_size, + sub_ptr_size, + add_ptr_bit_size, + sub_ptr_bit_size, add_size, sub_size, - add_bit_size, - sub_bit_size, - add_limbs, - sub_limbs, add_len, sub_len, - add_elem_size, - sub_elem_size, add_elem_limbs, sub_elem_limbs, }; @@ -24435,6 +24620,15 @@ const Select = struct { const tmp4x: Sized = .{ .ref = .tmp4, .size = .xword }; const tmp4y: Sized = .{ .ref = .tmp4, .size = .yword }; + const tmp5: Sized = .{ .ref = .tmp5, .size = .none }; + const tmp5b: Sized = .{ .ref = .tmp5, .size = .byte }; + const tmp5w: Sized = .{ .ref = .tmp5, .size = .word }; + const tmp5d: Sized = .{ .ref = .tmp5, .size = .dword }; + const tmp5p: Sized = .{ .ref = .tmp5, .size = .ptr }; + const tmp5q: Sized = .{ .ref = .tmp5, .size = .qword }; + const tmp5x: Sized = .{ .ref = .tmp5, .size = .xword }; + const tmp5y: Sized = .{ .ref = .tmp5, .size = .yword }; + const dst0: Sized = .{ .ref = .dst0, .size = .none }; const dst0b: Sized = .{ .ref = .dst0, .size = .byte }; const dst0w: Sized = .{ .ref = .dst0, .size = .word }; @@ -24515,6 +24709,14 @@ const Select = struct { const tmp4x: Select.Operand = .{ .tag = .ref, .base = .tmp4x }; const tmp4y: Select.Operand = .{ .tag = .ref, .base = .tmp4y }; + const tmp5b: Select.Operand = .{ .tag = .ref, .base = .tmp5b }; + const tmp5w: Select.Operand = .{ .tag = .ref, .base = .tmp5w }; + const tmp5d: Select.Operand = .{ .tag = .ref, .base = .tmp5d }; + const tmp5p: Select.Operand = .{ .tag = .ref, .base = .tmp5p }; + const tmp5q: Select.Operand = .{ .tag = .ref, .base = .tmp5q }; + const tmp5x: Select.Operand = .{ .tag = .ref, .base = .tmp5x }; + const tmp5y: Select.Operand = .{ .tag = .ref, .base = .tmp5y }; + const dst0b: Select.Operand = .{ .tag = .ref, .base = .dst0b }; const dst0w: Select.Operand = .{ .tag = .ref, .base = .dst0w }; const dst0d: Select.Operand = .{ .tag = .ref, .base = .dst0d }; @@ -24539,13 +24741,6 @@ const Select = struct { const src1x: Select.Operand = .{ .tag = .ref, .base = .src1x }; const src1y: Select.Operand = .{ .tag = .ref, .base = .src1y }; - fn limb(ref: Ref.Sized) Select.Operand { - return .{ .tag = .limb, .base = ref }; - } - fn maskLimb(ref: Ref.Sized) Select.Operand { - return .{ .tag = .mask_limb, .base = ref }; - } - fn i(imm: i32) Select.Operand { return .{ .tag = .simm, .imm = imm }; } @@ -24673,28 +24868,14 @@ const Select = struct { fn adjustedImm(op: Select.Operand, s: *const Select) i32 { return switch (op.adjust) { .none => op.imm, - .add_access_size => op.imm + @as(i32, @intCast(@divExact(op.base.size.bitSize(s.cg.target), 8))), - .sub_access_size => op.imm - @as(i32, @intCast(@divExact(op.base.size.bitSize(s.cg.target), 8))), + .add_ptr_size => op.imm + @divExact(s.cg.target.ptrBitWidth(), 8), + .sub_ptr_size => op.imm - @divExact(s.cg.target.ptrBitWidth(), 8), + .add_ptr_bit_size => op.imm + s.cg.target.ptrBitWidth(), + .sub_ptr_bit_size => op.imm - s.cg.target.ptrBitWidth(), .add_size => op.imm + @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu))), .sub_size => op.imm - @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu))), - .add_bit_size => op.imm + @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).bitSize(s.cg.pt.zcu))), - .sub_bit_size => op.imm - @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).bitSize(s.cg.pt.zcu))), - .add_limbs => op.imm + @as(i32, @intCast(@divExact( - op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu), - @divExact(op.base.size.bitSize(s.cg.target), 8), - ))), - .sub_limbs => op.imm + @as(i32, @intCast(@divExact( - op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu), - @divExact(op.base.size.bitSize(s.cg.target), 8), - ))), .add_len => op.imm + @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).vectorLen(s.cg.pt.zcu))), .sub_len => op.imm - @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).vectorLen(s.cg.pt.zcu))), - .add_elem_size => op.imm + @as(i32, @intCast( - op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu), - )), - .sub_elem_size => op.imm - @as(i32, @intCast( - op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu), - )), .add_elem_limbs => op.imm + @as(i32, @intCast(@divExact( op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu), @divExact(op.base.size.bitSize(s.cg.target), 8), @@ -24726,7 +24907,6 @@ const Select = struct { else => |mcv| .{ .mem = try mcv.mem(s.cg, .{ .size = op.base.size }) }, .register => |reg| .{ .reg = registerAlias(reg, @intCast(@divExact(op.base.size.bitSize(s.cg.target), 8))) }, }, - inline .limb, .mask_limb => |kind| .{ .mem = try op.base.ref.deref(s).tracking(s.cg).short.mem(s.cg, @field(s, @tagName(kind))) }, .simm => .{ .imm = .s(op.adjustedImm(s)) }, .lea => .{ .mem = .{ .base = .{ .reg = registerAlias(op.base.ref.deref(s).tracking(s.cg).short.register, @divExact(s.cg.target.ptrBitWidth(), 8)) }, @@ -24775,8 +24955,6 @@ fn select( .cg = cg, .temps = undefined, .labels = @splat(.{ .forward = @splat(null), .backward = null }), - .limb = undefined, - .mask_limb = undefined, }; const tmp_slots = s.temps[@intFromEnum(Select.Operand.Ref.tmp0)..@intFromEnum(Select.Operand.Ref.dst0)]; const dst_slots = s.temps[@intFromEnum(Select.Operand.Ref.dst0)..@intFromEnum(Select.Operand.Ref.src0)]; @@ -24797,74 +24975,11 @@ fn select( @memcpy(dst_slots[0..dst_temps.len], dst_temps); switch (case.each) { - .once => |body| for (body) |inst| try s.emit(inst), - .limb => |limb| { - const limb_of_size: i32 = @intCast(limb.of.ref.deref(&s).typeOf(cg).abiSize(cg.pt.zcu)); - s.limb = .{ - .size = limb.of.size, - .index = (try Select.Operand.tmp0p.lower(&s)).reg, - .disp = limb_of_size, - }; - for (limb.header) |inst| try s.emit(inst); - try s.emit(.{ ._, ._, .mov, .tmp0p, .i(-limb_of_size), ._, ._ }); - assert(s.labels[0].backward == null); - s.labels[0].backward = @intCast(cg.mir_instructions.len); - for (limb.body) |inst| try s.emit(inst); - try s.emit(.{ ._, ._, .add, .tmp0p, .i(@intCast(@divExact(limb.of.size.bitSize(cg.target), 8))), ._, ._ }); - try s.emit(.{ ._, ._nc, .j, .@"0b", ._, ._, ._ }); - for (limb.trailer) |inst| try s.emit(inst); - }, - .limb_and_mask_limb => |limb| { - const limb_of_size: i32 = @intCast(limb.of.ref.deref(&s).typeOf(cg).abiSize(cg.pt.zcu)); - s.limb = .{ - .size = limb.of.size, - .index = (try Select.Operand.tmp0p.lower(&s)).reg, - .disp = limb_of_size, - }; - const mask_limb_bit_size: u31 = @intCast(@divExact( - limb.of.size.bitSize(cg.target), - limb.of_mask.size.bitSize(cg.target), - )); - if (mask_limb_bit_size >= 8) { - s.mask_limb = .{ - .size = .fromBitSize(mask_limb_bit_size), - .index = (try Select.Operand.tmp1p.lower(&s)).reg, - }; - for (limb.header) |inst| try s.emit(inst); - try s.emit(.{ ._, ._, .mov, .tmp0p, .i(-limb_of_size), ._, ._ }); - try s.emit(.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }); - assert(s.labels[0].backward == null); - s.labels[0].backward = @intCast(cg.mir_instructions.len); - for (limb.body) |inst| try s.emit(inst); - try s.emit(.{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, @divExact(mask_limb_bit_size, 8)), ._, ._ }); - try s.emit(.{ ._, ._, .add, .tmp0p, .i(@intCast(@divExact(limb.of.size.bitSize(cg.target), 8))), ._, ._ }); - try s.emit(.{ ._, ._nc, .j, .@"0b", ._, ._, ._ }); - for (limb.trailer) |inst| try s.emit(inst); - } else { - for (limb.header) |inst| try s.emit(inst); - try s.emit(.{ ._, ._, .mov, .tmp0p, .i(-limb_of_size), ._, ._ }); - try s.emit(.{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }); - try s.emit(.{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }); - assert(s.labels[0].backward == null); - s.labels[0].backward = @intCast(cg.mir_instructions.len); - for (limb.body) |inst| try s.emit(inst); - try s.emit(.{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, mask_limb_bit_size), ._, ._ }); - try s.emit(.{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }); - try s.emit(.{ ._, ._nz, .j, .@"1f", ._, ._, ._ }); - try s.emit(.{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }); - try s.emit(.{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }); - try s.emit(.{ ._, ._, .mov, .memid(.{ .ref = limb.of_mask.ref, .size = .byte }, .tmp3, -1), .tmp2b, ._, ._ }); - try s.emit(.{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }); - try s.emit(.{ .@"1:", ._, .add, .tmp0p, .i(@intCast(@divExact(limb.of.size.bitSize(cg.target), 8))), ._, ._ }); - try s.emit(.{ ._, ._nc, .j, .@"0b", ._, ._, ._ }); - try s.emit(.{ ._, ._, .lea, .tmp3d, .lead(.none, .tmp1, -1), ._, ._ }); - try s.emit(.{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }); - try s.emit(.{ ._, ._, .mov, .memi(.{ .ref = limb.of_mask.ref, .size = .byte }, .tmp3), .tmp2b, ._, ._ }); - for (limb.trailer) |inst| try s.emit(inst); - } + .once => |body| { + for (body) |inst| try s.emit(inst); + s.emitLabel(.@"0:"); }, } - s.emitLabel(.@"0:"); for (dst_temps, case.dst_temps[0..dst_temps.len]) |dst_temp, dst_kind| dst_kind.finish(dst_temp, &s); for (case.extra_temps, tmp_slots) |spec, temp| if (spec.kind != .unused) try temp.die(cg); From ac1a975f9b5a7d939663fa90556a2f038250c531 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sat, 28 Dec 2024 22:51:41 -0500 Subject: [PATCH 09/25] x86_64: implement clz and not --- lib/std/Target/Query.zig | 8 +- lib/std/Target/x86.zig | 65 + lib/std/math/big/int.zig | 13 +- lib/std/zig/system/x86.zig | 2 + src/arch/x86_64/CodeGen.zig | 6366 +++++++++++++++++++++++++------- src/arch/x86_64/Encoding.zig | 17 +- src/arch/x86_64/Lower.zig | 30 +- src/arch/x86_64/Mir.zig | 34 +- src/arch/x86_64/bits.zig | 12 +- src/arch/x86_64/encodings.zig | 238 +- src/dev.zig | 1 + src/link/Elf/Atom.zig | 37 +- src/link/MachO/Atom.zig | 12 +- test/behavior/math.zig | 9 +- test/behavior/vector.zig | 2 +- test/behavior/x86_64.zig | 3 +- test/behavior/x86_64/build.zig | 114 + test/behavior/x86_64/math.zig | 763 ++++ tools/update_cpu_features.zig | 138 +- 19 files changed, 6440 insertions(+), 1424 deletions(-) create mode 100644 test/behavior/x86_64/build.zig diff --git a/lib/std/Target/Query.zig b/lib/std/Target/Query.zig index 50db1fed5e..56387c27b3 100644 --- a/lib/std/Target/Query.zig +++ b/lib/std/Target/Query.zig @@ -6,13 +6,13 @@ /// `null` means native. cpu_arch: ?Target.Cpu.Arch = null, -cpu_model: CpuModel = CpuModel.determined_by_arch_os, +cpu_model: CpuModel = .determined_by_arch_os, /// Sparse set of CPU features to add to the set from `cpu_model`. -cpu_features_add: Target.Cpu.Feature.Set = Target.Cpu.Feature.Set.empty, +cpu_features_add: Target.Cpu.Feature.Set = .empty, /// Sparse set of CPU features to remove from the set from `cpu_model`. -cpu_features_sub: Target.Cpu.Feature.Set = Target.Cpu.Feature.Set.empty, +cpu_features_sub: Target.Cpu.Feature.Set = .empty, /// `null` means native. os_tag: ?Target.Os.Tag = null, @@ -38,7 +38,7 @@ abi: ?Target.Abi = null, /// When `os_tag` is `null`, then `null` means native. Otherwise it means the standard path /// based on the `os_tag`. -dynamic_linker: Target.DynamicLinker = Target.DynamicLinker.none, +dynamic_linker: Target.DynamicLinker = .none, /// `null` means default for the cpu/arch/os combo. ofmt: ?Target.ObjectFormat = null, diff --git a/lib/std/Target/x86.zig b/lib/std/Target/x86.zig index 86c334afda..76dddb222d 100644 --- a/lib/std/Target/x86.zig +++ b/lib/std/Target/x86.zig @@ -47,6 +47,7 @@ pub const Feature = enum { bmi2, branch_hint, branchfusion, + bsf_bsr_0_clobbers_result, ccmp, cf, cldemote, @@ -167,6 +168,8 @@ pub const Feature = enum { slow_unaligned_mem_32, sm3, sm4, + smap, + smep, soft_float, sse, sse2, @@ -497,6 +500,11 @@ pub const all_features = blk: { .description = "CMP/TEST can be fused with conditional branches", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.bsf_bsr_0_clobbers_result)] = .{ + .llvm_name = null, + .description = "BSF/BSR may clobber the lower 32-bits of the result register when the source is zero", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.ccmp)] = .{ .llvm_name = "ccmp", .description = "Support conditional cmp & test instructions", @@ -1127,6 +1135,16 @@ pub const all_features = blk: { .avx2, }), }; + result[@intFromEnum(Feature.smap)] = .{ + .llvm_name = null, + .description = "Enable Supervisor Mode Access Prevention", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.smep)] = .{ + .llvm_name = null, + .description = "Enable Supervisor Mode Execution Prevention", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.soft_float)] = .{ .llvm_name = "soft-float", .description = "Use software floating point features", @@ -1371,6 +1389,8 @@ pub const cpu = struct { .sha, .shstk, .slow_3ops_lea, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vaes, .vpclmulqdq, @@ -1467,6 +1487,8 @@ pub const cpu = struct { .sha, .shstk, .slow_3ops_lea, + .smap, + .smep, .tuning_fast_imm_vector_shift, .uintr, .vaes, @@ -1545,6 +1567,8 @@ pub const cpu = struct { .slow_3ops_lea, .sm3, .sm4, + .smap, + .smep, .tuning_fast_imm_vector_shift, .uintr, .vaes, @@ -1783,6 +1807,8 @@ pub const cpu = struct { .sahf, .sbb_dep_breaking, .slow_shld, + .smap, + .smep, .sse4a, .vzeroupper, .x87, @@ -1995,6 +2021,8 @@ pub const cpu = struct { .rdseed, .sahf, .slow_3ops_lea, + .smap, + .smep, .vzeroupper, .x87, .xsaveopt, @@ -2136,6 +2164,8 @@ pub const cpu = struct { .sahf, .sha, .slow_3ops_lea, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vzeroupper, .x87, @@ -2195,6 +2225,8 @@ pub const cpu = struct { .rdseed, .sahf, .slow_3ops_lea, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vzeroupper, .x87, @@ -2450,6 +2482,8 @@ pub const cpu = struct { .serialize, .sha, .shstk, + .smap, + .smep, .tsxldtrk, .tuning_fast_imm_vector_shift, .uintr, @@ -2519,6 +2553,8 @@ pub const cpu = struct { .slow_incdec, .slow_lea, .slow_two_mem_ops, + .smap, + .smep, .sse4_2, .use_glm_div_sqrt_costs, .vzeroupper, @@ -2898,6 +2934,7 @@ pub const cpu = struct { .rdrnd, .sahf, .slow_3ops_lea, + .smep, .vzeroupper, .x87, .xsaveopt, @@ -2907,6 +2944,7 @@ pub const cpu = struct { .name = "i386", .llvm_name = "i386", .features = featureSet(&[_]Feature{ + .bsf_bsr_0_clobbers_result, .slow_unaligned_mem_16, .vzeroupper, .x87, @@ -2916,6 +2954,7 @@ pub const cpu = struct { .name = "i486", .llvm_name = "i486", .features = featureSet(&[_]Feature{ + .bsf_bsr_0_clobbers_result, .slow_unaligned_mem_16, .vzeroupper, .x87, @@ -3096,6 +3135,7 @@ pub const cpu = struct { .sahf, .slow_3ops_lea, .slow_unaligned_mem_32, + .smep, .vzeroupper, .x87, .xsaveopt, @@ -3403,6 +3443,8 @@ pub const cpu = struct { .sha, .shstk, .slow_3ops_lea, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vaes, .vpclmulqdq, @@ -3766,6 +3808,8 @@ pub const cpu = struct { .sha, .shstk, .slow_3ops_lea, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vaes, .vpclmulqdq, @@ -3831,6 +3875,8 @@ pub const cpu = struct { .rdseed, .sahf, .sha, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vaes, .vpclmulqdq, @@ -3939,6 +3985,8 @@ pub const cpu = struct { .serialize, .sha, .shstk, + .smap, + .smep, .tsxldtrk, .tuning_fast_imm_vector_shift, .uintr, @@ -4042,6 +4090,7 @@ pub const cpu = struct { .slow_lea, .slow_pmulld, .slow_two_mem_ops, + .smep, .sse4_2, .use_slm_arith_costs, .vzeroupper, @@ -4098,6 +4147,8 @@ pub const cpu = struct { .rdseed, .sahf, .slow_3ops_lea, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vzeroupper, .x87, @@ -4150,6 +4201,8 @@ pub const cpu = struct { .rdseed, .sahf, .slow_3ops_lea, + .smap, + .smep, .vzeroupper, .x87, .xsavec, @@ -4305,6 +4358,8 @@ pub const cpu = struct { .sahf, .sha, .shstk, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vaes, .vpclmulqdq, @@ -4574,6 +4629,8 @@ pub const cpu = struct { .sbb_dep_breaking, .sha, .slow_shld, + .smap, + .smep, .sse4a, .vzeroupper, .x87, @@ -4629,6 +4686,8 @@ pub const cpu = struct { .sbb_dep_breaking, .sha, .slow_shld, + .smap, + .smep, .sse4a, .vzeroupper, .wbnoinvd, @@ -4686,6 +4745,8 @@ pub const cpu = struct { .sbb_dep_breaking, .sha, .slow_shld, + .smap, + .smep, .sse4a, .vaes, .vpclmulqdq, @@ -4757,6 +4818,8 @@ pub const cpu = struct { .sha, .shstk, .slow_shld, + .smap, + .smep, .sse4a, .vaes, .vpclmulqdq, @@ -4833,6 +4896,8 @@ pub const cpu = struct { .sha, .shstk, .slow_shld, + .smap, + .smep, .sse4a, .vaes, .vpclmulqdq, diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig index 691ae02280..98d37d8994 100644 --- a/lib/std/math/big/int.zig +++ b/lib/std/math/big/int.zig @@ -2520,12 +2520,13 @@ pub const Const = struct { return order(a, b) == .eq; } + /// Returns the number of leading zeros in twos-complement form. pub fn clz(a: Const, bits: Limb) Limb { - // Limbs are stored in little-endian order but we need - // to iterate big-endian. + // Limbs are stored in little-endian order but we need to iterate big-endian. + if (!a.positive) return 0; var total_limb_lz: Limb = 0; var i: usize = a.limbs.len; - const bits_per_limb = @sizeOf(Limb) * 8; + const bits_per_limb = @bitSizeOf(Limb); while (i != 0) { i -= 1; const limb = a.limbs[i]; @@ -2537,13 +2538,15 @@ pub const Const = struct { return total_limb_lz + bits - total_limb_bits; } + /// Returns the number of trailing zeros in twos-complement form. pub fn ctz(a: Const, bits: Limb) Limb { - // Limbs are stored in little-endian order. + // Limbs are stored in little-endian order. Converting a negative number to twos-complement + // flips all bits above the lowest set bit, which does not affect the trailing zero count. var result: Limb = 0; for (a.limbs) |limb| { const limb_tz = @ctz(limb); result += limb_tz; - if (limb_tz != @sizeOf(Limb) * 8) break; + if (limb_tz != @bitSizeOf(Limb)) break; } return @min(result, bits); } diff --git a/lib/std/zig/system/x86.zig b/lib/std/zig/system/x86.zig index 7bd1148e13..2737c67d0c 100644 --- a/lib/std/zig/system/x86.zig +++ b/lib/std/zig/system/x86.zig @@ -369,6 +369,7 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void { setFeature(cpu, .bmi, bit(leaf.ebx, 3)); // AVX2 is only supported if we have the OS save support from AVX. setFeature(cpu, .avx2, bit(leaf.ebx, 5) and has_avx_save); + setFeature(cpu, .smep, bit(leaf.ebx, 7)); setFeature(cpu, .bmi2, bit(leaf.ebx, 8)); setFeature(cpu, .invpcid, bit(leaf.ebx, 10)); setFeature(cpu, .rtm, bit(leaf.ebx, 11)); @@ -377,6 +378,7 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void { setFeature(cpu, .avx512dq, bit(leaf.ebx, 17) and has_avx512_save); setFeature(cpu, .rdseed, bit(leaf.ebx, 18)); setFeature(cpu, .adx, bit(leaf.ebx, 19)); + setFeature(cpu, .smap, bit(leaf.ebx, 20)); setFeature(cpu, .avx512ifma, bit(leaf.ebx, 21) and has_avx512_save); setFeature(cpu, .clflushopt, bit(leaf.ebx, 23)); setFeature(cpu, .clwb, bit(leaf.ebx, 24)); diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 6f3ec618fe..d2d1fedb6f 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1390,7 +1390,7 @@ fn asmOps(self: *CodeGen, tag: Mir.Inst.FixedTag, ops: [4]Operand) !void { /// A `cc` of `.z_and_np` clobbers `reg2`! fn asmCmovccRegisterRegister(self: *CodeGen, cc: Condition, reg1: Register, reg2: Register) !void { - _ = try self.addInst(.{ + if (self.hasFeature(.cmov)) _ = try self.addInst(.{ .tag = switch (cc) { else => .cmov, .z_and_np, .nz_or_p => .pseudo, @@ -1408,12 +1408,16 @@ fn asmCmovccRegisterRegister(self: *CodeGen, cc: Condition, reg1: Register, reg2 .r1 = reg1, .r2 = reg2, } }, - }); + }) else { + const reloc = try self.asmJccReloc(cc.negate(), undefined); + try self.asmRegisterRegister(.{ ._, .mov }, reg1, reg2); + self.performReloc(reloc); + } } /// A `cc` of `.z_and_np` is not supported by this encoding! fn asmCmovccRegisterMemory(self: *CodeGen, cc: Condition, reg: Register, m: Memory) !void { - _ = try self.addInst(.{ + if (self.hasFeature(.cmov)) _ = try self.addInst(.{ .tag = switch (cc) { else => .cmov, .z_and_np => unreachable, @@ -1433,7 +1437,11 @@ fn asmCmovccRegisterMemory(self: *CodeGen, cc: Condition, reg: Register, m: Memo .r1 = reg, .payload = try self.addExtra(Mir.Memory.encode(m)), } }, - }); + }) else { + const reloc = try self.asmJccReloc(cc.negate(), undefined); + try self.asmRegisterMemory(.{ ._, .mov }, reg, m); + self.performReloc(reloc); + } } fn asmSetccRegister(self: *CodeGen, cc: Condition, reg: Register) !void { @@ -2319,6 +2327,7 @@ fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { + @setEvalBranchQuota(1_600); const pt = cg.pt; const zcu = pt.zcu; const ip = &zcu.intern_pool; @@ -2354,9 +2363,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { try cg.inst_tracking.ensureUnusedCapacity(cg.gpa, 1); switch (air_tags[@intFromEnum(inst)]) { // zig fmt: off - .not, - => |air_tag| try cg.airUnOp(inst, air_tag), - .add, .add_wrap, .sub, @@ -2434,7 +2440,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .memset_safe => try cg.airMemset(inst, true), .set_union_tag => try cg.airSetUnionTag(inst), .get_union_tag => try cg.airGetUnionTag(inst), - .clz => try cg.airClz(inst), .ctz => try cg.airCtz(inst), .popcount => try cg.airPopCount(inst), .byte_swap => try cg.airByteSwap(inst), @@ -2525,146 +2530,156 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { })) { else => unreachable, inline .@"and", .@"or", .xor => |mir_tag| comptime &.{ .{ - .required_features = .{ .avx2, null }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .int_or_vec = .yword }, .{ .int_or_vec = .yword } }, .patterns = &.{ - .{ .src = .{ .ymm, .mem } }, - .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .ymm, .ymm } }, + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, }, .dst_temps = .{.{ .rc = .sse }}, .each = .{ .once = &.{ .{ ._, .vp_, mir_tag, .dst0y, .src0y, .src1y, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .int_or_vec = .yword }, .{ .int_or_vec = .yword } }, .patterns = &.{ - .{ .src = .{ .ymm, .mem } }, - .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .ymm, .ymm } }, + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, }, .dst_temps = .{.{ .rc = .sse }}, .each = .{ .once = &.{ .{ ._, .v_pd, mir_tag, .dst0y, .src0y, .src1y, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } }, .patterns = &.{ - .{ .src = .{ .xmm, .mem } }, - .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .xmm, .xmm } }, + .{ .src = .{ .to_xmm, .mem } }, + .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_xmm, .to_xmm } }, }, .dst_temps = .{.{ .rc = .sse }}, .each = .{ .once = &.{ .{ ._, .vp_, mir_tag, .dst0x, .src0x, .src1x, ._ }, } }, }, .{ - .required_features = .{ .sse2, null }, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } }, .patterns = &.{ - .{ .src = .{ .mut_xmm, .mem } }, - .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_xmm, .xmm } }, + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, }, .dst_temps = .{.{ .ref = .src0 }}, .each = .{ .once = &.{ .{ ._, .p_, mir_tag, .dst0x, .src1x, ._, ._ }, } }, }, .{ - .required_features = .{ .sse, null }, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } }, .patterns = &.{ - .{ .src = .{ .mut_xmm, .mem } }, - .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_xmm, .xmm } }, + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, }, .dst_temps = .{.{ .ref = .src0 }}, .each = .{ .once = &.{ .{ ._, ._ps, mir_tag, .dst0x, .src1x, ._, ._ }, } }, }, .{ - .required_features = .{ .mmx, null }, + .required_features = .{ .mmx, null, null, null }, + .src_constraints = .{ .{ .int_or_vec = .qword }, .{ .int_or_vec = .qword } }, .patterns = &.{ - .{ .src = .{ .mut_mm, .mem } }, - .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mm, .mm } }, + .{ .src = .{ .to_mut_mm, .mem } }, + .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_mm, .to_mm } }, }, .dst_temps = .{.{ .ref = .src0 }}, .each = .{ .once = &.{ .{ ._, .p_, mir_tag, .dst0q, .src1q, ._, ._ }, } }, }, .{ - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .src_constraints = .{ .{ .int_or_vec = .byte }, .{ .int_or_vec = .byte } }, .patterns = &.{ .{ .src = .{ .mut_mem, .imm8 } }, .{ .src = .{ .imm8, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .imm8 } }, - .{ .src = .{ .imm8, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mem, .gpr } }, - .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .mem } }, - .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .gpr } }, + .{ .src = .{ .to_mut_gpr, .imm8 } }, + .{ .src = .{ .imm8, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .to_gpr } }, + .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .mem } }, + .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, mir_tag, .dst0b, .src1b, ._, ._ }, } }, }, .{ - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .src_constraints = .{ .{ .int_or_vec = .word }, .{ .int_or_vec = .word } }, .patterns = &.{ .{ .src = .{ .mut_mem, .imm16 } }, .{ .src = .{ .imm16, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .imm16 } }, - .{ .src = .{ .imm16, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mem, .gpr } }, - .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .mem } }, - .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .gpr } }, + .{ .src = .{ .to_mut_gpr, .imm16 } }, + .{ .src = .{ .imm16, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .to_gpr } }, + .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .mem } }, + .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, mir_tag, .dst0w, .src1w, ._, ._ }, } }, }, .{ - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .src_constraints = .{ .{ .int_or_vec = .dword }, .{ .int_or_vec = .dword } }, .patterns = &.{ .{ .src = .{ .mut_mem, .imm32 } }, .{ .src = .{ .imm32, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .imm32 } }, - .{ .src = .{ .imm32, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mem, .gpr } }, - .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .mem } }, - .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .gpr } }, + .{ .src = .{ .to_mut_gpr, .imm32 } }, + .{ .src = .{ .imm32, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .to_gpr } }, + .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .mem } }, + .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, mir_tag, .dst0d, .src1d, ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null }, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .int_or_vec = .qword }, .{ .int_or_vec = .qword } }, .patterns = &.{ .{ .src = .{ .mut_mem, .simm32 } }, .{ .src = .{ .simm32, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .simm32 } }, - .{ .src = .{ .simm32, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mem, .gpr } }, - .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .mem } }, - .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .gpr } }, + .{ .src = .{ .to_mut_gpr, .simm32 } }, + .{ .src = .{ .simm32, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .to_gpr } }, + .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .mem } }, + .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, mir_tag, .dst0q, .src1q, ._, ._ }, } }, }, .{ - .required_features = .{ .avx2, null }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ + .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } }, + .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } }, + }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -2677,16 +2692,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_, mir_tag, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ }, .{ ._, .v_dqu, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } }, + .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } }, + }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -2699,16 +2719,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ .@"0:", .v_pd, .movu, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .v_pd, mir_tag, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_size), ._ }, .{ ._, .v_pd, .movu, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, + .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, + }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -2721,16 +2746,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_, mir_tag, .tmp1x, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._ }, .{ ._, .v_dqu, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .sse2, null }, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, + .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, + }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -2743,16 +2773,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_, mir_tag, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, .{ ._, ._dqu, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .sse, null }, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, + .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, + }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -2765,16 +2800,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ .@"0:", ._ps, .movu, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, ._ps, mir_tag, .tmp1x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, .{ ._, ._ps, .movu, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .mmx, null }, + .required_features = .{ .mmx, null, null, null }, + .src_constraints = .{ + .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } }, + .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } }, + }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -2787,15 +2827,20 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ .@"0:", ._q, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, .{ ._, .p_, mir_tag, .tmp1q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, .{ ._, ._q, .mov, .memia(.dst0q, .tmp0, .add_size), .tmp1q, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .src_constraints = .{ + .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } }, + .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } }, + }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -2808,16 +2853,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ }, .{ ._, ._, mir_tag, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp1, .add_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .sa(.tmp1, .add_size), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, } }, - }) catch |err2| switch (err2) { + }) catch |err| switch (err) { error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{ @tagName(air_tag), cg.typeOf(bin_op.lhs).fmt(pt), @@ -2830,6 +2876,1058 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { if (ops[1].index != res[0].index) try ops[1].die(cg); try res[0].moveTo(inst, cg); }, + .not => |air_tag| if (use_old) try cg.airUnOp(inst, air_tag) else { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + var res: [1]Temp = undefined; + cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, comptime &.{ .{ + .src_constraints = .{ .{ .signed_or_exact_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._, .not, .dst0b, ._, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0b, .sa(.src0, .add_umax), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .signed_or_exact_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._, .not, .dst0w, ._, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0w, .sa(.src0, .add_umax), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .signed_or_exact_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._, .not, .dst0d, ._, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0d, .sa(.src0, .add_umax), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._, .not, .dst0q, ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .xor, .dst0q, .src0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .mmx, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_mm, .none } }, + }, + .dst_temps = .{.{ .rc = .mmx }}, + .each = .{ .once = &.{ + .{ ._, .p_d, .cmpeq, .dst0q, .dst0q, ._, ._ }, + .{ ._, .p_, .xor, .dst0q, .src0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .mmx, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_mm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .p_, .xor, .dst0q, .lea(.qword, .tmp0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_int = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_xmm, .none } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .vp_q, .cmpeq, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vp_, .xor, .dst0x, .dst0x, .src0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_xmm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .vp_, .xor, .dst0x, .src0x, .lea(.xword, .tmp0), ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_int = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_xmm, .none } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .p_d, .cmpeq, .dst0x, .dst0x, ._, ._ }, + .{ ._, .p_, .xor, .dst0x, .src0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_xmm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .p_, .xor, .dst0x, .lea(.xword, .tmp0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .int = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_xmm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, ._ps, .xor, .dst0x, .lea(.xword, .tmp0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_int = .yword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_ymm, .none } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .vp_q, .cmpeq, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .vp_, .xor, .dst0y, .dst0y, .src0y, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .yword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_ymm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .vp_, .xor, .dst0y, .src0y, .lea(.yword, .tmp0), ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_int = .yword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_ymm, .none } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .v_pd, .cmp, .dst0y, .dst0y, .dst0y, .si(0b01111) }, + .{ ._, .v_pd, .xor, .dst0y, .dst0y, .src0y, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .yword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_ymm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_pd, .xor, .dst0y, .src0y, .lea(.yword, .tmp0), ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .yword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ ._, .vp_q, .cmpeq, .tmp1y, .tmp1y, .tmp1y, ._ }, + .{ .@"0:", .vp_, .xor, .tmp2y, .tmp1y, .memiad(.src0y, .tmp0, .add_size, -16), ._ }, + .{ ._, .v_dqu, .mov, .memiad(.dst0y, .tmp0, .add_size, -16), .tmp2y, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ .@"0:", .vp_, .xor, .tmp2x, .tmp1x, .memad(.src0x, .add_size, -16), ._ }, + .{ ._, .v_dqa, .mov, .memad(.dst0x, .add_size, -16), .tmp2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .yword, .is = .yword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ ._, .vp_q, .cmpeq, .tmp1y, .tmp1y, .tmp1y, ._ }, + .{ .@"0:", .vp_, .xor, .tmp2y, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._ }, + .{ ._, .v_dqu, .mov, .memia(.dst0y, .tmp0, .add_size), .tmp2y, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .yword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ ._, .v_pd, .cmp, .tmp1y, .tmp1y, .tmp1y, .si(0b01111) }, + .{ .@"0:", .v_pd, .xor, .tmp2y, .tmp1y, .memiad(.src0y, .tmp0, .add_size, -16), ._ }, + .{ ._, .v_pd, .movu, .memiad(.dst0y, .tmp0, .add_size, -16), .tmp2y, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ .@"0:", .v_pd, .xor, .tmp2x, .tmp1x, .memad(.src0x, .add_size, -16), ._ }, + .{ ._, .v_pd, .mova, .memad(.dst0x, .add_size, -16), .tmp2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .yword, .is = .yword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ ._, .v_pd, .cmp, .tmp1y, .tmp1y, .tmp1y, .si(0b01111) }, + .{ .@"0:", .v_pd, .xor, .tmp2y, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._ }, + .{ ._, .v_pd, .movu, .memia(.dst0y, .tmp0, .add_size), .tmp2y, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ ._, .vp_q, .cmpeq, .tmp1x, .tmp1x, .tmp1x, ._ }, + .{ .@"0:", .v_, .xor, .tmp2x, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._ }, + .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp2x, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ ._, .p_d, .cmpeq, .tmp1x, .tmp1x, ._, ._ }, + .{ .@"0:", ._dqa, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .p_, .xor, .tmp2x, .tmp1x, ._, ._ }, + .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp2x, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .not, .memia(.dst0q, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, 8), ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .signed_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_size), .tmp1q, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .xword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16), ._, ._, ._ }, + .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16 + 8), ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .not, .memad(.dst0d, .add_size, -16), ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .xword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -16), ._, ._ }, + .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, + .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -16), .tmp1q, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .memad(.src0d, .add_size, -16), ._, ._ }, + .{ ._, ._, .not, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0d, .add_size, -16), .tmp0d, ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0d, .add_size, -16 + 4), .si(0), ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16 + 8), .si(0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .qword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16), ._, ._, ._ }, + .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16 + 8), ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .not, .memad(.dst0q, .add_size, -16), ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .qword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -8), ._, ._ }, + .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, + .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -8), .tmp1q, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .si(0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .dword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -8), ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .not, .memad(.dst0d, .add_size, -8), ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .exact_remainder_int = .{ .of = .dword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -8), ._, ._ }, + .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, + .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -8), .tmp1q, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .memad(.src0d, .add_size, -8), ._, ._ }, + .{ ._, ._, .not, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0d, .add_size, -8), .tmp0d, ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0d, .add_size, -8 + 4), .si(0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16), ._, ._, ._ }, + .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16 + 8), ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .memad(.dst0d, .add_size, -16), .sa(.src0, .add_umax), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -16), ._, ._ }, + .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, + .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -16), .tmp1q, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .memad(.src0d, .add_size, -16), ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0d, .add_size, -16), .tmp0d, ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0d, .add_size, -16 + 4), .si(0), ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16 + 8), .si(0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .qword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -8), ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .memad(.dst0d, .add_size, -8), .sa(.src0, .add_umax), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .qword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -8), ._, ._ }, + .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, + .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -8), .tmp1q, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .memad(.src0d, .add_size, -8), ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0d, .add_size, -8), .tmp0d, ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0d, .add_size, -8 + 4), .si(0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16), ._, ._, ._ }, + .{ ._, ._, .not, .memiad(.dst0q, .tmp0, .add_size, -16 + 8), ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .xor, .memad(.dst0q, .add_size, -16), .tmp0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -16), ._, ._ }, + .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, + .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -16), .tmp1q, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .xor, .tmp0q, .memad(.src0q, .add_size, -16), ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0q, .add_size, -16), .tmp0q, ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .si(0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .not, .memiad(.dst0q, .tmp0, .add_size, -8), ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .xor, .memad(.dst0q, .add_size, -8), .tmp0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(8, .src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .memiad(.src0q, .tmp0, .add_size, -8), ._, ._ }, + .{ ._, ._, .not, .tmp1q, ._, ._, ._ }, + .{ ._, ._, .mov, .memiad(.dst0q, .tmp0, .add_size, -8), .tmp1q, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .xor, .tmp0q, .memad(.src0q, .add_size, -8), ._, ._ }, + .{ ._, ._, .mov, .memad(.dst0q, .add_size, -8), .tmp0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .mmx, null, null, null }, + .src_constraints = .{ .{ .signed_int_or_full_vec = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_mm, .none } }, + }, + .dst_temps = .{.{ .rc = .mmx }}, + .each = .{ .once = &.{ + .{ ._, .p_d, .cmpeq, .dst0q, .dst0q, ._, ._ }, + .{ ._, .p_, .xor, .dst0q, .src0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .mmx, null, null, null }, + .src_constraints = .{ .{ .unsigned_int_vec = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_mm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .p_, .xor, .dst0q, .lea(.qword, .tmp0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .signed_int_or_full_vec = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_xmm, .none } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .vp_q, .cmpeq, .dst0x, .dst0x, .dst0x, ._ }, + .{ ._, .vp_, .xor, .dst0x, .dst0x, .src0x, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .unsigned_int_vec = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_xmm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .vp_, .xor, .dst0x, .src0x, .lea(.xword, .tmp0), ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .signed_int_or_full_vec = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_xmm, .none } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .p_d, .cmpeq, .dst0x, .dst0x, ._, ._ }, + .{ ._, .p_, .xor, .dst0x, .src0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .unsigned_int_vec = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_xmm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .p_, .xor, .dst0x, .lea(.xword, .tmp0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .vec = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_xmm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, ._ps, .xor, .dst0x, .lea(.xword, .tmp0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .signed_int_or_full_vec = .yword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_ymm, .none } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .vp_q, .cmpeq, .dst0y, .dst0y, .dst0y, ._ }, + .{ ._, .vp_, .xor, .dst0y, .dst0y, .src0y, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .unsigned_int_vec = .yword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_ymm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .vp_, .xor, .dst0y, .src0y, .lea(.yword, .tmp0), ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .signed_int_or_full_vec = .yword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_ymm, .none } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .v_pd, .cmp, .dst0y, .dst0y, .dst0y, .si(0b01111) }, + .{ ._, .v_pd, .xor, .dst0y, .dst0y, .src0y, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .unsigned_int_vec = .yword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_ymm, .none } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.tmp1), ._, ._ }, + .{ ._, .v_pd, .xor, .dst0y, .src0y, .lea(.yword, .tmp0), ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_src0_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp3), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_src0_size), ._, ._ }, + .{ ._, ._, .xor, .tmp2q, .leaia(.qword, .tmp1, .tmp0, .add_src0_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0q, .tmp0, .add_src0_size), .tmp2q, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .umax_mem = .src0 } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_src0_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp3), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .memia(.src0d, .tmp0, .add_src0_size), ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .leaia(.dword, .tmp1, .tmp0, .add_src0_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0d, .tmp0, .add_src0_size), .tmp2d, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + } }) catch |err| switch (err) { + error.SelectFailed => return cg.fail("failed to select {s} {} {}", .{ + @tagName(air_tag), + cg.typeOf(ty_op.operand).fmt(pt), + ops[0].tracking(cg), + }), + else => |e| return e, + }; + if (ops[0].index != res[0].index) try ops[0].die(cg); + try res[0].moveTo(inst, cg); + }, + .block => if (use_old) try cg.airBlock(inst) else { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; const extra = cg.air.extraData(Air.Block, ty_pl.payload); @@ -2880,6 +3978,2675 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .call_never_tail => try cg.airCall(inst, .never_tail), .call_never_inline => try cg.airCall(inst, .never_inline), + .clz => |air_tag| if (use_old) try cg.airClz(inst) else { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + var res: [1]Temp = undefined; + cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, comptime &.{ .{ + .required_features = .{ .slow_incdec, null, null, null }, + .src_constraints = .{ .{ .exact_signed_int = 1 }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .add, .dst0b, .si(1), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .exact_signed_int = 1 }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .inc, .dst0b, ._, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .exact_unsigned_int = 1 }, .any }, + .patterns = &.{ + .{ .src = .{ .mut_mem, .none } }, + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0b, .si(1), ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, + .{ ._, ._, .lzcnt, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .signed_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, + .{ ._, ._, .@"and", .dst0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .lzcnt, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .false_deps_lzcnt_tzcnt, .lzcnt, null, null }, + .src_constraints = .{ .{ .exact_int = 16 }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .exact_int = 16 }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .signed_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"and", .src0w, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(16, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .false_deps_lzcnt_tzcnt, .lzcnt, null, null }, + .src_constraints = .{ .{ .unsigned_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(16, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0w, .src0w, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(16, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .false_deps_lzcnt_tzcnt, .lzcnt, null, null }, + .src_constraints = .{ .{ .exact_int = 32 }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .exact_int = 32 }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .signed_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"and", .src0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .false_deps_lzcnt_tzcnt, .lzcnt, null, null }, + .src_constraints = .{ .{ .unsigned_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0d, .src0d, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, + .src_constraints = .{ .{ .exact_int = 64 }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0q, .src0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .src_constraints = .{ .{ .exact_int = 64 }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0q, .src0q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .src_constraints = .{ .{ .signed_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .dst0q, .src0q, ._, ._ }, + .{ ._, ._, .lzcnt, .dst0q, .dst0q, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(64, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, + .src_constraints = .{ .{ .unsigned_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0q, .src0q, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(64, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .src_constraints = .{ .{ .unsigned_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lzcnt, .dst0q, .src0q, ._, ._ }, + .{ ._, ._, .sub, .dst0b, .sia(64, .src0, .sub_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, + .{ ._, ._r, .bs, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ ._, ._z, .cmov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .signed_po2_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, + .{ ._, ._, .@"and", .dst0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ ._, ._z, .cmov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .signed_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ }, + .{ ._, ._, .@"and", .tmp0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ }, + .{ ._, ._z, .cmov, .tmp0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .unsigned_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ }, + .{ ._, ._r, .bs, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ }, + .{ ._, ._z, .cmov, .tmp0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, + .{ ._, ._r, .bs, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .signed_po2_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, + .{ ._, ._, .@"and", .dst0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .signed_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ }, + .{ ._, ._, .@"and", .tmp0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ }, + .{ ._, ._r, .bs, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ ._, ._r, .bs, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .signed_po2_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp0d, .src0b, ._, ._ }, + .{ ._, ._, .@"and", .tmp0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ ._, ._r, .bs, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .signed_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, + .{ ._, ._, .@"and", .dst0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .src0b, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ }, + .{ ._, ._, .mov, .dst0w, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ ._, ._nz, .cmov, .dst0w, .src0w, ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .signed_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"and", .src0w, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ }, + .{ ._, ._, .mov, .dst0w, .si(0xff), ._, ._ }, + .{ ._, ._z, .cmov, .src0w, .dst0w, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .unsigned_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ }, + .{ ._, ._, .mov, .dst0w, .si(0xff), ._, ._ }, + .{ ._, ._z, .cmov, .src0w, .dst0w, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .dst0w, .src0w, ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .signed_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"and", .src0w, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .src0w, .src0w, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0w, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ ._, ._r, .bs, .dst0w, .src0w, ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .signed_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"and", .src0w, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .tmp0w, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp0w, .src0w, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0w, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp0w, .src0w, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ ._, ._nz, .cmov, .dst0d, .src0d, ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .signed_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"and", .src0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ }, + .{ ._, ._z, .cmov, .src0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .unsigned_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ }, + .{ ._, ._z, .cmov, .src0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .dst0d, .src0d, ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .signed_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"and", .src0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .src0d, .src0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ ._, ._r, .bs, .dst0d, .src0d, ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .signed_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .@"and", .src0d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp0d, .src0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp0d, .src0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, null }, + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .src0q, .src0q, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ ._, ._nz, .cmov, .dst0d, .src0d, ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, null }, + .src_constraints = .{ .{ .signed_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .tmp0q, .src0q, ._, ._ }, + .{ ._, ._r, .bs, .tmp0q, .tmp0q, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ }, + .{ ._, ._z, .cmov, .tmp0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, null }, + .src_constraints = .{ .{ .unsigned_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .src0q, .src0q, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .si(0xff), ._, ._ }, + .{ ._, ._z, .cmov, .src0d, .dst0d, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .src0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .dst0q, .src0q, ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .signed_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .tmp0q, .src0q, ._, ._ }, + .{ ._, ._r, .bs, .tmp0q, .tmp0q, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .unsigned_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._r, .bs, .src0q, .src0q, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .dst0b, .src0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .unsigned_po2_or_exact_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0d, .sia(-1, .src0, .add_2_bit_size), ._, ._ }, + .{ ._, ._r, .bs, .dst0q, .src0q, ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .signed_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .dst0q, .src0q, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp0q, .dst0q, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .none } }, + .{ .src = .{ .to_gpr, .none } }, + }, + .extra_temps = .{ + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp0q, .src0q, ._, ._ }, + .{ ._, ._, .mov, .dst0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .dst0b, .tmp0b, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, + .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, + .{ .@"0:", ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._r, .bs, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .si(-1), ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ }, + .{ ._, ._, .neg, .dst0d, ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, + .{ .@"0:", ._, .mov, .dst0d, .si(-1), ._, ._ }, + .{ ._, ._r, .bs, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ }, + .{ ._, ._, .neg, .dst0d, ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, + .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, + .{ .@"0:", ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._r, .bs, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .si(-1), ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ }, + .{ ._, ._, .neg, .dst0d, ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .unsigned_or_exact_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, + .{ .@"0:", ._, .mov, .dst0d, .si(-1), ._, ._ }, + .{ ._, ._r, .bs, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ }, + .{ ._, ._, .neg, .dst0d, ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._, .lzcnt, .dst0q, .tmp1q, ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"0:", ._, .@"and", .tmp1q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._, .lzcnt, .dst0q, .tmp1q, ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, + .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"0:", ._, .@"and", .dst0q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._r, .bs, .dst0q, .dst0q, ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ }, + .{ ._, ._, .neg, .dst0d, ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._, .lzcnt, .dst0q, .tmp1q, ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"0:", ._, .@"and", .tmp1q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._, .lzcnt, .dst0q, .tmp1q, ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .neg, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .add_src0_bit_size, -64), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, + .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"0:", ._, .@"and", .dst0q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._r, .bs, .dst0q, .dst0q, ._, ._ }, + .{ ._, ._nz, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ .@"0:", ._, .lea, .dst0d, .leasiad(.none, .dst0, .@"8", .tmp0, .sub_src0_bit_size, 1), ._, ._ }, + .{ ._, ._, .neg, .dst0d, ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, .slow_incdec, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, .slow_incdec, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, .slow_incdec, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .lzcnt, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .sia(32, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, .slow_incdec, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp1q, .tmp1q, ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .sia(64, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp1q, .tmp1q, ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .sia(64, .src0, .sub_bit_size), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, .slow_incdec, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp2d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp2d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, .slow_incdec, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, + .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, + .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .slow_incdec, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .scalar_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, .slow_incdec, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp2d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp2d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, .slow_incdec, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, + .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, + .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .slow_incdec, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .scalar_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, .slow_incdec, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .cmov, .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp2d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, .slow_incdec, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, + .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .bsf_bsr_0_clobbers_result, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._r, .bs, .tmp1d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, + .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .slow_incdec, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .scalar_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, .slow_incdec }, + .src_constraints = .{ .{ .scalar_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .tmp2q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, + .{ ._, ._r, .bs, .tmp2q, .tmp2q, ._, ._ }, + .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .cmov, .bsf_bsr_0_clobbers_result, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .tmp2q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, + .{ ._, ._r, .bs, .tmp2q, .tmp2q, ._, ._ }, + .{ ._, ._z, .cmov, .tmp2d, .tmp1d, ._, ._ }, + .{ ._, ._, .mov, .tmp3b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp3b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, .slow_incdec, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, + .{ ._, ._r, .bs, .tmp1q, .tmp1q, ._, ._ }, + .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, + .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bsf_bsr_0_clobbers_result, null, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, + .{ ._, ._r, .bs, .tmp1q, .tmp1q, ._, ._ }, + .{ ._, ._, .mov, .tmp2b, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._c, .st, ._, ._, ._, ._ }, + .{ ._, ._, .sbb, .tmp2b, .tmp1b, ._, ._ }, + .{ .@"1:", ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp2b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .slow_incdec, null, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, + .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp2q, .tmp1q, ._, ._ }, + .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .@"and", .tmp1q, .memsia(.src0q, .@"8", .tmp0, .add_8_len), ._, ._ }, + .{ ._, ._, .mov, .tmp2d, .si(0xff), ._, ._ }, + .{ ._, ._r, .bs, .tmp2q, .tmp1q, ._, ._ }, + .{ ._, ._, .mov, .tmp1b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .sub, .tmp1b, .tmp2b, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp1b, ._, ._ }, + .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, + .dst_constraints = .{.{ .scalar_int = .byte }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ }, + .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .dst_constraints = .{.{ .scalar_int = .byte }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .dst_constraints = .{.{ .scalar_int = .byte }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._r, .bs, .tmp3q, .tmp3q, ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .lea, .tmp3d, .leasiad(.none, .tmp3, .@"8", .tmp2, .sub_src0_bit_size, 1), ._, ._ }, + .{ ._, ._, .neg, .tmp3b, ._, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, + .dst_constraints = .{.{ .scalar_int = .byte }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ }, + .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .dst_constraints = .{.{ .scalar_int = .byte }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .dst_constraints = .{.{ .scalar_int = .byte }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._r, .bs, .tmp3q, .tmp3q, ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .lea, .tmp3d, .leasiad(.none, .tmp3, .@"8", .tmp2, .sub_src0_bit_size, 1), ._, ._ }, + .{ ._, ._, .neg, .tmp3b, ._, ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0b, .tmp0, .add_len), .tmp3b, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, + .dst_constraints = .{.{ .scalar_int = .word }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ }, + .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, + .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .dst_constraints = .{.{ .scalar_int = .word }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, + .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .dst_constraints = .{.{ .scalar_int = .word }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-16, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._r, .bs, .tmp3q, .tmp3q, ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .lea, .tmp3d, .leasiad(.none, .tmp3, .@"8", .tmp2, .sub_src0_bit_size, 1), ._, ._ }, + .{ ._, ._, .neg, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .false_deps_lzcnt_tzcnt, .lzcnt, null }, + .dst_constraints = .{.{ .scalar_int = .word }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ }, + .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, + .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .lzcnt, null, null }, + .dst_constraints = .{.{ .scalar_int = .word }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._, .lzcnt, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nc, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .neg, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp3d, .leasiad(.none, .tmp4, .@"8", .tmp2, .add_src0_bit_size, -64), ._, ._ }, + .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .dst_constraints = .{.{ .scalar_int = .word }}, + .src_constraints = .{ .{ .scalar_remainder_int = .{ .of = .xword, .is = .xword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sia(-8, .none, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_umax), ._, ._ }, + .{ .@"1:", ._, .@"and", .tmp3q, .leai(.qword, .tmp1, .tmp2), ._, ._ }, + .{ ._, ._r, .bs, .tmp3q, .tmp3q, ._, ._ }, + .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .si(-1), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"1b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ .@"1:", ._, .lea, .tmp3d, .leasiad(.none, .tmp3, .@"8", .tmp2, .sub_src0_bit_size, 1), ._, ._ }, + .{ ._, ._, .neg, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .mov, .memsia(.dst0w, .@"2", .tmp0, .add_2_len), .tmp3w, ._, ._ }, + .{ ._, ._, .lea, .tmp1q, .leaa(.none, .tmp1, .add_src0_elem_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + } }) catch |err| switch (err) { + error.SelectFailed => return cg.fail("failed to select {s} {} {}", .{ + @tagName(air_tag), + cg.typeOf(ty_op.operand).fmt(pt), + ops[0].tracking(cg), + }), + else => |e| return e, + }; + if (ops[0].index != res[0].index) try ops[0].die(cg); + try res[0].moveTo(inst, cg); + }, + .cmp_vector, .cmp_vector_optimized => |air_tag| if (use_old) try cg.airCmpVector(inst) else fallback: { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; const extra = cg.air.extraData(Air.VectorCmp, ty_pl.payload).data; @@ -2899,12 +6666,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { })) { else => unreachable, inline .e, .ne => |cc| comptime &.{ .{ - .required_features = .{ .avx2, null }, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ - .{ .src = .{ .ymm, .mem } }, - .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .ymm, .ymm } }, + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, }, .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ .kind = .all, @@ -2919,12 +6686,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_b, .cmpeq, .dst0y, .src0y, .src1y, ._ }, } }, }, .{ - .required_features = .{ .avx2, null }, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ - .{ .src = .{ .ymm, .mem } }, - .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .ymm, .ymm } }, + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, }, .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ .kind = .all, @@ -2939,12 +6706,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_w, .cmpeq, .dst0y, .src0y, .src1y, ._ }, } }, }, .{ - .required_features = .{ .avx2, null }, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ - .{ .src = .{ .ymm, .mem } }, - .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .ymm, .ymm } }, + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, }, .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ .kind = .all, @@ -2959,12 +6726,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .cmpeq, .dst0y, .src0y, .src1y, ._ }, } }, }, .{ - .required_features = .{ .avx2, null }, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, .patterns = &.{ - .{ .src = .{ .ymm, .mem } }, - .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .ymm, .ymm } }, + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, }, .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ .kind = .all, @@ -2979,12 +6746,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_q, .cmpeq, .dst0y, .src0y, .src1y, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ - .{ .src = .{ .xmm, .mem } }, - .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .xmm, .xmm } }, + .{ .src = .{ .to_xmm, .mem } }, + .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_xmm, .to_xmm } }, }, .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ .kind = .all, @@ -2999,12 +6766,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_b, .cmpeq, .dst0x, .src0x, .src1x, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ - .{ .src = .{ .xmm, .mem } }, - .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .xmm, .xmm } }, + .{ .src = .{ .to_xmm, .mem } }, + .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_xmm, .to_xmm } }, }, .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ .kind = .all, @@ -3019,12 +6786,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_w, .cmpeq, .dst0x, .src0x, .src1x, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ - .{ .src = .{ .xmm, .mem } }, - .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .xmm, .xmm } }, + .{ .src = .{ .to_xmm, .mem } }, + .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_xmm, .to_xmm } }, }, .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ .kind = .all, @@ -3039,12 +6806,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_d, .cmpeq, .dst0x, .src0x, .src1x, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, .patterns = &.{ - .{ .src = .{ .xmm, .mem } }, - .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .xmm, .xmm } }, + .{ .src = .{ .to_xmm, .mem } }, + .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_xmm, .to_xmm } }, }, .dst_temps = .{.{ .rc_mask = .{ .rc = .sse, .info = .{ .kind = .all, @@ -3059,12 +6826,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_q, .cmpeq, .dst0x, .src0x, .src1x, ._ }, } }, }, .{ - .required_features = .{ .sse2, null }, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ - .{ .src = .{ .mut_xmm, .mem } }, - .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_xmm, .xmm } }, + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, }, .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ .kind = .all, @@ -3079,12 +6846,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_b, .cmpeq, .dst0x, .src1x, ._, ._ }, } }, }, .{ - .required_features = .{ .sse2, null }, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ - .{ .src = .{ .mut_xmm, .mem } }, - .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_xmm, .xmm } }, + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, }, .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ .kind = .all, @@ -3099,12 +6866,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_w, .cmpeq, .dst0x, .src1x, ._, ._ }, } }, }, .{ - .required_features = .{ .sse2, null }, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ - .{ .src = .{ .mut_xmm, .mem } }, - .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_xmm, .xmm } }, + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, }, .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ .kind = .all, @@ -3119,12 +6886,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_d, .cmpeq, .dst0x, .src1x, ._, ._ }, } }, }, .{ - .required_features = .{ .sse4_1, null }, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, .patterns = &.{ - .{ .src = .{ .mut_xmm, .mem } }, - .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_xmm, .xmm } }, + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, }, .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ .kind = .all, @@ -3139,12 +6906,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_q, .cmpeq, .dst0x, .src1x, ._, ._ }, } }, }, .{ - .required_features = .{ .mmx, null }, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .required_features = .{ .mmx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ - .{ .src = .{ .mut_mm, .mem } }, - .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mm, .mm } }, + .{ .src = .{ .to_mut_mm, .mem } }, + .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_mm, .to_mm } }, }, .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ .kind = .all, @@ -3159,12 +6926,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_b, .cmpeq, .dst0q, .src1q, ._, ._ }, } }, }, .{ - .required_features = .{ .mmx, null }, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .required_features = .{ .mmx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ - .{ .src = .{ .mut_mm, .mem } }, - .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mm, .mm } }, + .{ .src = .{ .to_mut_mm, .mem } }, + .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_mm, .to_mm } }, }, .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ .kind = .all, @@ -3179,12 +6946,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_w, .cmpeq, .dst0q, .src1q, ._, ._ }, } }, }, .{ - .required_features = .{ .mmx, null }, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .required_features = .{ .mmx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ - .{ .src = .{ .mut_mm, .mem } }, - .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mm, .mm } }, + .{ .src = .{ .to_mut_mm, .mem } }, + .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_mm, .to_mm } }, }, .dst_temps = .{.{ .ref_mask = .{ .ref = .src0, .info = .{ .kind = .all, @@ -3203,16 +6970,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .patterns = &.{ .{ .src = .{ .mut_mem, .imm8 } }, .{ .src = .{ .imm8, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .imm8 } }, - .{ .src = .{ .imm8, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mem, .gpr } }, - .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .mem } }, - .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .gpr } }, + .{ .src = .{ .to_mut_gpr, .imm8 } }, + .{ .src = .{ .imm8, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .to_gpr } }, + .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .mem } }, + .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ @@ -3228,16 +6995,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .patterns = &.{ .{ .src = .{ .mut_mem, .imm16 } }, .{ .src = .{ .imm16, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .imm16 } }, - .{ .src = .{ .imm16, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mem, .gpr } }, - .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .mem } }, - .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .gpr } }, + .{ .src = .{ .to_mut_gpr, .imm16 } }, + .{ .src = .{ .imm16, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .to_gpr } }, + .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .mem } }, + .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ @@ -3253,16 +7020,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .patterns = &.{ .{ .src = .{ .mut_mem, .imm32 } }, .{ .src = .{ .imm32, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .imm32 } }, - .{ .src = .{ .imm32, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mem, .gpr } }, - .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .mem } }, - .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .gpr } }, + .{ .src = .{ .to_mut_gpr, .imm32 } }, + .{ .src = .{ .imm32, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .to_gpr } }, + .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .mem } }, + .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ @@ -3274,21 +7041,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, } }, }, .{ - .required_features = .{ .@"64bit", null }, + .required_features = .{ .@"64bit", null, null, null }, .src_constraints = .{ .{ .bool_vec = .qword }, .{ .bool_vec = .qword } }, .patterns = &.{ .{ .src = .{ .mut_mem, .simm32 } }, .{ .src = .{ .simm32, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .simm32 } }, - .{ .src = .{ .simm32, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mem, .gpr } }, - .{ .src = .{ .gpr, .mut_mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .mem } }, - .{ .src = .{ .mem, .mut_gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_gpr, .gpr } }, + .{ .src = .{ .to_mut_gpr, .simm32 } }, + .{ .src = .{ .simm32, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mut_mem, .to_gpr } }, + .{ .src = .{ .to_gpr, .mut_mem }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .mem } }, + .{ .src = .{ .mem, .to_mut_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ @@ -3304,7 +7071,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, - .clobbers = .{ .eflags = true }, .extra_temps = .{ .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, @@ -3314,29 +7080,30 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .xor, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .not, .tmp1p, ._, ._, ._ }, .{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp1, .add_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .sa(.tmp1, .add_size), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ .@"0:", ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .xor, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .mov, .memia(.dst0p, .tmp0, .add_size), .tmp1p, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp1, .add_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .sa(.tmp1, .add_size), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ - .required_features = .{ .avx2, null }, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3349,21 +7116,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_b, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0d, .tmp1), .tmp2d, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_b, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, @@ -3371,13 +7139,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .not, .tmp2d, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0d, .tmp1), .tmp2d, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ - .required_features = .{ .avx2, null }, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3390,10 +7158,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, @@ -3401,11 +7170,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3y, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_w, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, @@ -3414,13 +7183,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .not, .tmp2d, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ - .required_features = .{ .avx2, null }, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3433,21 +7202,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_d, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, .{ ._, .v_ps, .movmsk, .tmp2d, .tmp3y, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_d, .cmpeq, .tmp3y, .tmp3y, .memia(.src1y, .tmp0, .add_size), ._ }, @@ -3455,13 +7225,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ - .required_features = .{ .avx2, null }, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3474,10 +7244,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp4y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, @@ -3486,49 +7257,49 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp4y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_q, .cmpeq, .tmp4y, .tmp4y, .memia(.src1y, .tmp0, .add_size), ._ }, .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4y, ._, ._ }, - .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ }, + .{ ._, ._, .xor, .tmp3b, .si(0b1111), ._, ._ }, .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, } }, }, .{ - .required_features = .{ .avx, null }, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3541,21 +7312,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_b, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ }, .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_b, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ }, @@ -3563,13 +7335,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .not, .tmp2d, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ - .required_features = .{ .avx, null }, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3582,10 +7354,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_w, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ }, @@ -3593,11 +7366,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .vp_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_w, .cmpeq, .tmp3x, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._ }, @@ -3606,13 +7379,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ - .required_features = .{ .avx, null }, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3625,10 +7398,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, @@ -3637,49 +7411,49 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_d, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ }, .{ ._, .v_ps, .movmsk, .tmp3d, .tmp4x, ._, ._ }, - .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ }, + .{ ._, ._, .xor, .tmp3b, .si(0b1111), ._, ._ }, .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, } }, }, .{ - .required_features = .{ .avx, null }, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3692,10 +7466,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, @@ -3704,49 +7479,49 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_q, .cmpeq, .tmp4x, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._ }, .{ ._, .v_pd, .movmsk, .tmp3d, .tmp4x, ._, ._ }, - .{ ._, ._, .xor, .tmp3b, .i(0b11), ._, ._ }, + .{ ._, ._, .xor, .tmp3b, .si(0b11), ._, ._ }, .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, } }, }, .{ - .required_features = .{ .sse2, null }, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3759,21 +7534,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_b, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_b, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, @@ -3781,13 +7557,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .not, .tmp2d, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0w, .tmp1), .tmp2w, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ - .required_features = .{ .sse2, null }, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3800,10 +7576,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_w, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, @@ -3811,11 +7588,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_b, .movmsk, .tmp2d, .tmp3x, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp3x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_w, .cmpeq, .tmp3x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, @@ -3824,13 +7601,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ - .required_features = .{ .sse2, null }, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3843,10 +7620,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, @@ -3855,49 +7633,49 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_d, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, .{ ._, ._ps, .movmsk, .tmp3d, .tmp4x, ._, ._ }, - .{ ._, ._, .xor, .tmp3b, .i(0b1111), ._, ._ }, + .{ ._, ._, .xor, .tmp3b, .si(0b1111), ._, ._ }, .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, } }, }, .{ - .required_features = .{ .sse4_1, null }, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3910,10 +7688,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, @@ -3922,49 +7701,49 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp4x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_q, .cmpeq, .tmp4x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, .{ ._, ._pd, .movmsk, .tmp3d, .tmp4x, ._, ._ }, - .{ ._, ._, .xor, .tmp3b, .i(0b11), ._, ._ }, + .{ ._, ._, .xor, .tmp3b, .si(0b11), ._, ._ }, .{ ._, ._l, .ro, .tmp3b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp3b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp3, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp3), .tmp2b, ._, ._ }, }, } }, }, .{ - .required_features = .{ .sse, .mmx }, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .required_features = .{ .sse, .mmx, null, null }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3977,21 +7756,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", ._q, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, .{ ._, .p_b, .cmpeq, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, .{ ._, .p_b, .movmsk, .tmp2d, .tmp3q, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", ._q, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, .{ ._, .p_b, .cmpeq, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, @@ -3999,13 +7779,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .not, .tmp2b, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp2b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, }, } }, }, .{ - .required_features = .{ .sse, .mmx }, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .required_features = .{ .sse, .mmx, null, null }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4018,10 +7798,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .kind = .{ .rc = .mmx } }, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ }, @@ -4032,22 +7813,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ }, @@ -4055,28 +7836,28 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_w, .cmpeq, .tmp5q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ }, .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ }, - .{ ._, ._, .xor, .tmp4b, .i(0b1111), ._, ._ }, + .{ ._, ._, .xor, .tmp4b, .si(0b1111), ._, ._ }, .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 4), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ }, }, } }, }, .{ - .required_features = .{ .sse, .mmx }, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .required_features = .{ .sse, .mmx, null, null }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4089,10 +7870,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .kind = .{ .rc = .mmx } }, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = switch (cc) { else => unreachable, .e => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ }, @@ -4104,22 +7886,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ }, }, .ne => &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, .{ ._, .p_, .xor, .tmp3q, .tmp3q, ._, ._ }, @@ -4128,29 +7910,28 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .p_w, .ackssd, .tmp5q, .tmp3q, ._, ._ }, .{ ._, .p_b, .ackssw, .tmp5q, .tmp3q, ._, ._ }, .{ ._, .p_b, .movmsk, .tmp4d, .tmp5q, ._, ._ }, - .{ ._, ._, .xor, .tmp4b, .i(0b11), ._, ._ }, + .{ ._, ._, .xor, .tmp4b, .si(0b11), ._, ._ }, .{ ._, ._l, .ro, .tmp4b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2b, .tmp4b, ._, ._ }, .{ ._, ._, .lea, .tmp1d, .lead(.none, .tmp1, 2), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memid(.dst0b, .tmp4, -1), .tmp2b, ._, ._ }, .{ ._, ._, .xor, .tmp2b, .tmp2b, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(8), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .i(0b111), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .si(0b111), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp4d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp4d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp4d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0b, .tmp4), .tmp2b, ._, ._ }, }, } }, }, .{ - .required_features = .{ .slow_incdec, null }, .dst_constraints = .{.{ .bool_vec = .byte }}, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4163,51 +7944,23 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, .{ .@"0:", ._, .mov, .tmp2b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .cmp, .tmp2b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ .dst_constraints = .{.{ .bool_vec = .byte }}, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp2b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, - .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .slow_incdec, null }, - .dst_constraints = .{.{ .bool_vec = .byte }}, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4220,51 +7973,23 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, .{ .@"0:", ._, .mov, .tmp2w, .memia(.src0w, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .cmp, .tmp2w, .memia(.src1w, .tmp0, .add_size), ._, ._ }, .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ .dst_constraints = .{.{ .bool_vec = .byte }}, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2w, .memia(.src0w, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp2w, .memia(.src1w, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .slow_incdec, null }, - .dst_constraints = .{.{ .bool_vec = .byte }}, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4277,178 +8002,88 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, .{ .@"0:", ._, .mov, .tmp2d, .memia(.src0d, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .cmp, .tmp2d, .memia(.src1d, .tmp0, .add_size), ._, ._ }, .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_features = .{ .@"64bit", null, null, null }, .dst_constraints = .{.{ .bool_vec = .byte }}, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, + .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, + .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .bool_vec = .byte }}, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2d, .memia(.src0d, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp2d, .memia(.src1d, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .@"64bit", .slow_incdec }, - .dst_constraints = .{.{ .bool_vec = .byte }}, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .@"64bit", null }, - .dst_constraints = .{.{ .bool_vec = .byte }}, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .slow_incdec, null }, - .dst_constraints = .{.{ .bool_vec = .byte }}, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sa(.src0p, .add_elem_limbs), ._, ._ }, .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ }, .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ }, .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ }, - .{ ._, ._, .sub, .tmp2d, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .sa(.tmp4, .add_size), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(1), ._, ._ }, .{ ._, ._b, .j, .@"1b", ._, ._, ._ }, .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ }, .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .cmp, .tmp1b, .sa(.dst0, .add_len), ._, ._ }, .{ ._, ._b, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .dst_constraints = .{.{ .bool_vec = .byte }}, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0b, .dst0b, ._, ._ }, - .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ }, - .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, - .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ }, - .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ }, - .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ }, - .{ ._, ._, .dec, .tmp2d, ._, ._, ._ }, - .{ ._, ._nz, .j, .@"1b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2b, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0b, .tmp2b, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ }, - .{ ._, ._b, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .slow_incdec, null }, .dst_constraints = .{.{ .bool_vec = .dword }}, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4461,9 +8096,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, @@ -4471,43 +8107,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ .dst_constraints = .{.{ .bool_vec = .dword }}, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp3b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2d, .tmp1d, ._, ._ }, - .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, - .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .slow_incdec, null }, - .dst_constraints = .{.{ .bool_vec = .dword }}, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4520,9 +8126,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ }, @@ -4530,43 +8137,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ .dst_constraints = .{.{ .bool_vec = .dword }}, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .slow_incdec, null }, - .dst_constraints = .{.{ .bool_vec = .dword }}, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4579,9 +8156,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ }, @@ -4589,43 +8167,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_features = .{ .@"64bit", null, null, null }, .dst_constraints = .{.{ .bool_vec = .dword }}, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .@"64bit", .slow_incdec }, - .dst_constraints = .{.{ .bool_vec = .dword }}, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4638,9 +8187,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, @@ -4648,115 +8198,50 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null }, .dst_constraints = .{.{ .bool_vec = .dword }}, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ ._, ._, .mov, .tmp3q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp3q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .slow_incdec, null }, - .dst_constraints = .{.{ .bool_vec = .dword }}, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sa(.src0p, .add_elem_limbs), ._, ._ }, .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ }, .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ }, .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ }, - .{ ._, ._, .sub, .tmp2d, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .sa(.tmp4, .add_size), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(1), ._, ._ }, .{ ._, ._b, .j, .@"1b", ._, ._, ._ }, .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ }, .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0d, .tmp2d, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .cmp, .tmp1b, .sa(.dst0, .add_len), ._, ._ }, .{ ._, ._b, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .dst_constraints = .{.{ .bool_vec = .dword }}, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ }, - .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, - .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ }, - .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ }, - .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ }, - .{ ._, ._, .dec, .tmp2d, ._, ._, ._ }, - .{ ._, ._nz, .j, .@"1b", ._, ._, ._ }, - .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2d, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0d, .tmp2b, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ }, - .{ ._, ._b, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .@"64bit", .slow_incdec }, + .required_features = .{ .@"64bit", null, null, null }, .dst_constraints = .{.{ .bool_vec = .qword }}, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4769,9 +8254,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, @@ -4779,44 +8265,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(1), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null }, + .required_features = .{ .@"64bit", null, null, null }, .dst_constraints = .{.{ .bool_vec = .qword }}, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ ._, ._, .mov, .tmp3b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp3b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .inc, .tmp0p, ._, ._, ._ }, - .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .@"64bit", .slow_incdec }, - .dst_constraints = .{.{ .bool_vec = .qword }}, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, + .src_constraints = .{ .{ .scalar_int = .word }, .{ .scalar_int = .word } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4829,53 +8285,24 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ }, .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null }, + .required_features = .{ .@"64bit", null, null, null }, .dst_constraints = .{.{ .bool_vec = .qword }}, - .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ ._, ._, .mov, .tmp3w, .memia(.src0w, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp3w, .memia(.src1w, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(2), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .@"64bit", .slow_incdec }, - .dst_constraints = .{.{ .bool_vec = .qword }}, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, + .src_constraints = .{ .{ .scalar_int = .dword }, .{ .scalar_int = .dword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4888,9 +8315,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ }, @@ -4898,44 +8326,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(4), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null }, + .required_features = .{ .@"64bit", null, null, null }, .dst_constraints = .{.{ .bool_vec = .qword }}, - .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ ._, ._, .mov, .tmp3d, .memia(.src0d, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp3d, .memia(.src1d, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(4), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .@"64bit", .slow_incdec }, - .dst_constraints = .{.{ .bool_vec = .qword }}, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .src_constraints = .{ .{ .scalar_int = .qword }, .{ .scalar_int = .qword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -4948,9 +8346,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, @@ -4958,42 +8357,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null }, - .dst_constraints = .{.{ .bool_vec = .qword }}, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, - .unused, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ ._, ._, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .@"64bit", .slow_incdec }, + .required_features = .{ .@"64bit", null, null, null }, .dst_constraints = .{.{ .bool_vec = .qword }}, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, @@ -5007,66 +8376,30 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2d, .sa(.src0p, .add_elem_limbs), ._, ._ }, .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ }, .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ }, .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ }, - .{ ._, ._, .sub, .tmp2d, .i(1), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .sa(.tmp4, .add_size), ._, ._ }, + .{ ._, ._, .sub, .tmp2d, .si(1), ._, ._ }, .{ ._, ._b, .j, .@"1b", ._, ._, ._ }, .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ }, .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, - .{ ._, ._, .add, .tmp1b, .i(1), ._, ._ }, - .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ }, + .{ ._, ._, .add, .tmp1b, .si(1), ._, ._ }, + .{ ._, ._, .cmp, .tmp1b, .sa(.dst0, .add_len), ._, ._ }, .{ ._, ._b, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null }, - .dst_constraints = .{.{ .bool_vec = .qword }}, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .reg = .cl } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .unused, - }, - .dst_temps = .{.{ .rc = .general_purpose }}, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .dst0d, .dst0d, ._, ._ }, - .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, - .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._, .mov, .tmp2d, .a(.src0p, .add_elem_limbs), ._, ._ }, - .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, - .{ .@"1:", ._, .mov, .tmp4p, .memi(.src0p, .tmp0), ._, ._ }, - .{ ._, ._, .xor, .tmp4p, .memi(.src1p, .tmp0), ._, ._ }, - .{ ._, ._, .@"or", .tmp3p, .tmp4p, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp4, .add_size), ._, ._ }, - .{ ._, ._, .dec, .tmp2d, ._, ._, ._ }, - .{ ._, ._nz, .j, .@"1b", ._, ._, ._ }, - .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ ._, ._, .@"test", .tmp3p, .tmp3p, ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp2b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp2q, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .dst0q, .tmp2q, ._, ._ }, - .{ ._, ._, .inc, .tmp1b, ._, ._, ._ }, - .{ ._, ._, .cmp, .tmp1b, .a(.dst0, .add_len), ._, ._ }, - .{ ._, ._b, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .slow_incdec, null }, - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, + .src_constraints = .{ .{ .scalar_int = .byte }, .{ .scalar_int = .byte } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -5079,8 +8412,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, .{ .@"0:", ._, .xor, .tmp3d, .tmp3d, ._, ._ }, @@ -5089,62 +8423,23 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .fromCondition(cc), .set, .tmp3b, ._, ._, ._ }, .{ ._, ._l, .sh, .tmp3p, .tmp1b, ._, ._ }, .{ ._, ._, .@"or", .tmp2p, .tmp3p, ._, ._ }, - .{ ._, ._, .add, .tmp1d, .i(1), ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .ia(-1, .none, .add_ptr_bit_size), ._, ._ }, + .{ ._, ._, .add, .tmp1d, .si(1), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .sia(-1, .none, .add_ptr_bit_size), ._, ._ }, .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memia(.dst0p, .tmp3, .sub_ptr_size), .tmp2p, ._, ._ }, .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ .@"1:", ._, .add, .tmp0p, .i(1), ._, ._ }, + .{ .@"1:", ._, .add, .tmp0p, .si(1), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .ia(-1, .none, .add_ptr_bit_size), ._, ._ }, + .{ ._, ._, .@"test", .tmp1d, .sia(-1, .none, .add_ptr_bit_size), ._, ._ }, .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, - .{ ._, ._, .mov, .memi(.dst0p, .tmp3), .tmp2p, ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, - }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u32, .kind = .{ .reg = .ecx } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, - .unused, - }, - .dst_temps = .{.mem}, - .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, - .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, - .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ .@"0:", ._, .xor, .tmp3d, .tmp3d, ._, ._ }, - .{ ._, ._, .mov, .tmp4b, .memia(.src0b, .tmp0, .add_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp4b, .memia(.src1b, .tmp0, .add_size), ._, ._ }, - .{ ._, .fromCondition(cc), .set, .tmp3b, ._, ._, ._ }, - .{ ._, ._l, .sh, .tmp3p, .tmp1b, ._, ._ }, - .{ ._, ._, .@"or", .tmp2p, .tmp3p, ._, ._ }, - .{ ._, ._, .inc, .tmp1d, ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .ia(-1, .none, .add_ptr_bit_size), ._, ._ }, - .{ ._, ._nz, .j, .@"1f", ._, ._, ._ }, - .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, - .{ ._, ._, .mov, .memia(.dst0p, .tmp3, .sub_ptr_size), .tmp2p, ._, ._ }, - .{ ._, ._, .xor, .tmp2d, .tmp2d, ._, ._ }, - .{ .@"1:", ._, .inc, .tmp0p, ._, ._, ._ }, - .{ ._, ._nz, .j, .@"0b", ._, ._, ._ }, - .{ ._, ._, .@"test", .tmp1d, .ia(-1, .none, .add_ptr_bit_size), ._, ._ }, - .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, - .{ ._, ._, .mov, .tmp3d, .tmp1d, ._, ._ }, - .{ ._, ._r, .sh, .tmp3d, .i(3), ._, ._ }, + .{ ._, ._r, .sh, .tmp3d, .si(3), ._, ._ }, .{ ._, ._, .mov, .memi(.dst0p, .tmp3), .tmp2p, ._, ._ }, } }, } }, - }) catch |err2| switch (err2) { + }) catch |err| switch (err) { error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{ @tagName(air_tag), cg.typeOf(extra.lhs).fmt(pt), @@ -5175,9 +8470,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .cmp_lte, .cmp_lte_optimized => .lte, .cmp_gte, .cmp_gte_optimized => .gte, .cmp_gt, .cmp_gt_optimized => .gt, - }) else { + }) else fallback: { const bin_op = air_datas[@intFromEnum(inst)].bin_op; const scalar_ty = cg.typeOf(bin_op.lhs).scalarType(zcu); + if (scalar_ty.isRuntimeFloat()) break :fallback try cg.airCmp(inst, switch (air_tag) { + else => unreachable, + .cmp_lt, .cmp_lt_optimized => .lt, + .cmp_lte, .cmp_lte_optimized => .lte, + .cmp_gte, .cmp_gte_optimized => .gte, + .cmp_gt, .cmp_gt_optimized => .gt, + }); const signedness = if (scalar_ty.isAbiInt(zcu)) scalar_ty.intInfo(zcu).signedness else @@ -5205,11 +8507,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, .patterns = &.{ .{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .imm8, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .imm8, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc.commute() }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ }, } }, @@ -5217,12 +8519,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, .patterns = &.{ .{ .src = .{ .mem, .imm8 } }, - .{ .src = .{ .gpr, .imm8 } }, - .{ .src = .{ .gpr, .mem } }, - .{ .src = .{ .gpr, .gpr } }, + .{ .src = .{ .to_gpr, .imm8 } }, + .{ .src = .{ .to_gpr, .mem } }, + .{ .src = .{ .to_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ }, } }, @@ -5230,11 +8532,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, .patterns = &.{ .{ .src = .{ .imm16, .mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .imm16, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .imm16, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc.commute() }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ }, } }, @@ -5242,12 +8544,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .src_constraints = .{ .{ .int = .word }, .{ .int = .word } }, .patterns = &.{ .{ .src = .{ .mem, .imm16 } }, - .{ .src = .{ .gpr, .imm16 } }, - .{ .src = .{ .gpr, .mem } }, - .{ .src = .{ .gpr, .gpr } }, + .{ .src = .{ .to_gpr, .imm16 } }, + .{ .src = .{ .to_gpr, .mem } }, + .{ .src = .{ .to_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ }, } }, @@ -5255,11 +8557,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, .patterns = &.{ .{ .src = .{ .imm32, .mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .imm32, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .imm32, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc.commute() }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ }, } }, @@ -5267,45 +8569,50 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword } }, .patterns = &.{ .{ .src = .{ .mem, .imm32 } }, - .{ .src = .{ .gpr, .imm32 } }, - .{ .src = .{ .gpr, .mem } }, - .{ .src = .{ .gpr, .gpr } }, + .{ .src = .{ .to_gpr, .imm32 } }, + .{ .src = .{ .to_gpr, .mem } }, + .{ .src = .{ .to_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null }, + .required_features = .{ .@"64bit", null, null, null }, .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, .patterns = &.{ .{ .src = .{ .simm32, .mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .simm32, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .simm32, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc.commute() }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null }, + .required_features = .{ .@"64bit", null, null, null }, .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, .patterns = &.{ .{ .src = .{ .mem, .simm32 } }, - .{ .src = .{ .gpr, .simm32 } }, - .{ .src = .{ .gpr, .mem } }, - .{ .src = .{ .gpr, .gpr } }, + .{ .src = .{ .to_gpr, .simm32 } }, + .{ .src = .{ .to_gpr, .mem } }, + .{ .src = .{ .to_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ }, } }, }, .{ + .src_constraints = .{ .any_int, .any_int }, .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem } }, + .{ .src = .{ .to_mem, .to_mem }, .commute = switch (cc) { + else => unreachable, + .l, .ge, .b, .ae => .{ 0, 0 }, + .le, .g, .be, .a => .{ 0, 1 }, + } }, }, .extra_temps = .{ .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, @@ -5315,17 +8622,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1b, .tmp1b, ._, ._ }, - .{ .@"0:", ._r, .sh, .tmp1b, .i(1), ._, ._ }, + .{ .@"0:", ._r, .sh, .tmp1b, .si(1), ._, ._ }, .{ ._, ._, .mov, .tmp1p, .memia(.src0p, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .sbb, .tmp1p, .memia(.src1p, .tmp0, .add_size), ._, ._ }, .{ ._, ._c, .set, .tmp1b, ._, ._, ._ }, - .{ ._, .fromCondition(cc), .set, .dst0b, ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp1, .add_size), ._, ._ }, + .{ ._, .fromCondition(switch (cc) { + else => unreachable, + .l, .ge, .b, .ae => cc, + .le, .g, .be, .a => cc.commute(), + }), .set, .dst0b, ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .sa(.tmp1, .add_size), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, } }, @@ -5342,13 +8653,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { if (ops[1].index != res[0].index) try ops[1].die(cg); try res[0].moveTo(inst, cg); }, - .cmp_eq, .cmp_eq_optimized, .cmp_neq, .cmp_neq_optimized => |air_tag| if (use_old) try cg.airCmp(inst, switch (air_tag) { + .cmp_eq, + .cmp_eq_optimized, + .cmp_neq, + .cmp_neq_optimized, + => |air_tag| if (use_old) try cg.airCmp(inst, switch (air_tag) { else => unreachable, .cmp_eq, .cmp_eq_optimized => .eq, .cmp_neq, .cmp_neq_optimized => .neq, }) else fallback: { const bin_op = air_datas[@intFromEnum(inst)].bin_op; - if (ip.isOptionalType(cg.typeOf(bin_op.lhs).toIntern())) break :fallback try cg.airCmp(inst, switch (air_tag) { + const scalar_ty = cg.typeOf(bin_op.lhs).scalarType(zcu); + if (scalar_ty.isRuntimeFloat() or ip.isOptionalType(scalar_ty.toIntern())) break :fallback try cg.airCmp(inst, switch (air_tag) { else => unreachable, .cmp_eq, .cmp_eq_optimized => .eq, .cmp_neq, .cmp_neq_optimized => .neq, @@ -5362,14 +8678,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { })) { else => unreachable, inline .e, .ne => |cc| comptime &.{ .{ - .required_features = .{ .avx2, null }, - .src_constraints = .{ .any_int, .any_int }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .int = .yword }, .{ .int = .yword } }, .patterns = &.{ - .{ .src = .{ .ymm, .mem } }, - .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .ymm, .ymm } }, + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, }, - .clobbers = .{ .eflags = true }, .extra_temps = .{ .{ .kind = .{ .rc = .sse } }, .unused, @@ -5379,19 +8694,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, .vp_, .xor, .tmp0y, .src0y, .src1y, ._ }, .{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, - .src_constraints = .{ .any_int, .any_int }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .int = .yword }, .{ .int = .yword } }, .patterns = &.{ - .{ .src = .{ .ymm, .mem } }, - .{ .src = .{ .mem, .ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .ymm, .ymm } }, + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, }, - .clobbers = .{ .eflags = true }, .extra_temps = .{ .{ .kind = .{ .rc = .sse } }, .unused, @@ -5401,19 +8716,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, .v_pd, .xor, .tmp0y, .src0y, .src1y, ._ }, .{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, - .src_constraints = .{ .any_int, .any_int }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } }, .patterns = &.{ - .{ .src = .{ .xmm, .mem } }, - .{ .src = .{ .mem, .xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .xmm, .xmm } }, + .{ .src = .{ .to_xmm, .mem } }, + .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_xmm, .to_xmm } }, }, - .clobbers = .{ .eflags = true }, .extra_temps = .{ .{ .kind = .{ .rc = .sse } }, .unused, @@ -5423,33 +8738,33 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, .vp_, .xor, .tmp0x, .src0x, .src1x, ._ }, .{ ._, .vp_, .@"test", .tmp0x, .tmp0x, ._, ._ }, } }, }, .{ - .required_features = .{ .sse4_1, null }, - .src_constraints = .{ .any_int, .any_int }, + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } }, .patterns = &.{ - .{ .src = .{ .mut_xmm, .mem } }, - .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_xmm, .xmm } }, + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, .p_, .xor, .src0x, .src1x, ._, ._ }, .{ ._, .p_, .@"test", .src0x, .src0x, ._, ._ }, } }, }, .{ - .required_features = .{ .sse2, null }, - .src_constraints = .{ .any_int, .any_int }, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } }, .patterns = &.{ - .{ .src = .{ .mut_xmm, .mem } }, - .{ .src = .{ .mem, .mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_xmm, .xmm } }, + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, }, - .clobbers = .{ .eflags = true }, .extra_temps = .{ .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .sse } }, @@ -5459,22 +8774,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, .{ ._, .p_, .xor, .src0x, .src1x, ._, ._ }, .{ ._, .p_b, .cmpeq, .tmp1x, .src0x, ._, ._ }, .{ ._, .p_b, .movmsk, .tmp0d, .tmp1x, ._, ._ }, - .{ ._, ._, .xor, .tmp0d, .i(0xffff), ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .si(0xffff), ._, ._ }, } }, }, .{ - .required_features = .{ .sse2, .mmx }, - .src_constraints = .{ .any_int, .any_int }, + .required_features = .{ .sse, .mmx, null, null }, + .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, .patterns = &.{ - .{ .src = .{ .mut_mm, .mem } }, - .{ .src = .{ .mem, .mut_mm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mut_mm, .mm } }, + .{ .src = .{ .to_mut_mm, .mem } }, + .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_mm, .to_mm } }, }, - .clobbers = .{ .eflags = true }, .extra_temps = .{ .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .kind = .{ .rc = .mmx } }, @@ -5484,26 +8799,27 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ }, .{ ._, .p_, .xor, .src0q, .src1q, ._, ._ }, .{ ._, .p_b, .cmpeq, .tmp1q, .src0q, ._, ._ }, .{ ._, .p_b, .movmsk, .tmp0d, .tmp1q, ._, ._ }, - .{ ._, ._, .xor, .tmp0d, .i(0xff), ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .si(0xff), ._, ._ }, } }, }, .{ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, .patterns = &.{ .{ .src = .{ .mem, .imm8 } }, .{ .src = .{ .imm8, .mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .imm8 } }, - .{ .src = .{ .imm8, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .mem } }, - .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .gpr } }, + .{ .src = .{ .to_gpr, .imm8 } }, + .{ .src = .{ .imm8, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_gpr, .mem } }, + .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0b, .src1b, ._, ._ }, } }, @@ -5512,14 +8828,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .patterns = &.{ .{ .src = .{ .mem, .imm16 } }, .{ .src = .{ .imm16, .mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .imm16 } }, - .{ .src = .{ .imm16, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .mem } }, - .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .gpr } }, + .{ .src = .{ .to_gpr, .imm16 } }, + .{ .src = .{ .imm16, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_gpr, .mem } }, + .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0w, .src1w, ._, ._ }, } }, @@ -5528,36 +8844,40 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .patterns = &.{ .{ .src = .{ .mem, .imm32 } }, .{ .src = .{ .imm32, .mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .imm32 } }, - .{ .src = .{ .imm32, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .mem } }, - .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .gpr } }, + .{ .src = .{ .to_gpr, .imm32 } }, + .{ .src = .{ .imm32, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_gpr, .mem } }, + .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0d, .src1d, ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null }, + .required_features = .{ .@"64bit", null, null, null }, .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, .patterns = &.{ .{ .src = .{ .mem, .simm32 } }, .{ .src = .{ .simm32, .mem }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .simm32 } }, - .{ .src = .{ .simm32, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .mem } }, - .{ .src = .{ .mem, .gpr }, .commute = .{ 0, 1 } }, - .{ .src = .{ .gpr, .gpr } }, + .{ .src = .{ .to_gpr, .simm32 } }, + .{ .src = .{ .simm32, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_gpr, .mem } }, + .{ .src = .{ .mem, .to_gpr }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_gpr, .to_gpr } }, }, - .clobbers = .{ .eflags = true }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ }, } }, }, .{ - .required_features = .{ .avx2, null }, + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ + .{ .remainder_int = .{ .of = .yword, .is = .xword } }, + .{ .remainder_int = .{ .of = .yword, .is = .xword } }, + }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -5570,18 +8890,51 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ ._, .vp_, .xor, .tmp1y, .tmp1y, .tmp1y, ._ }, + .{ .@"0:", .v_dqu, .mov, .tmp2y, .memiad(.src0y, .tmp0, .add_size, -16), ._, ._ }, + .{ ._, .vp_, .xor, .tmp2y, .tmp2y, .memiad(.src1y, .tmp0, .add_size, -16), ._ }, + .{ ._, .vp_, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .tmp2x, .memad(.src0x, .add_size, -16), ._, ._ }, + .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .memad(.src1x, .add_size, -16), ._ }, + .{ ._, .vp_, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ }, + .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, .vp_, .xor, .tmp1y, .tmp1y, .tmp1y, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_, .xor, .tmp2y, .tmp2y, .memia(.src1y, .tmp0, .add_size), ._ }, .{ ._, .vp_, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ }, - .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .remainder_int = .{ .of = .yword, .is = .xword } }, + .{ .remainder_int = .{ .of = .yword, .is = .xword } }, + }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -5594,18 +8947,47 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_size), ._, ._ }, + .{ ._, .v_pd, .xor, .tmp1y, .tmp1y, .tmp1y, ._ }, + .{ .@"0:", .v_pd, .movu, .tmp2y, .memiad(.src0y, .tmp0, .add_size, -16), ._, ._ }, + .{ ._, .v_pd, .xor, .tmp2y, .tmp2y, .memiad(.src1y, .tmp0, .add_size, -16), ._ }, + .{ ._, .v_pd, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_pd, .mova, .tmp2x, .memad(.src0x, .add_size, -16), ._, ._ }, + .{ ._, .v_pd, .xor, .tmp2x, .tmp2x, .memad(.src1x, .add_size, -16), ._ }, + .{ ._, .v_pd, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ }, + .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, .v_pd, .xor, .tmp1y, .tmp1y, .tmp1y, ._ }, .{ .@"0:", .v_pd, .movu, .tmp2y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, .{ ._, .v_pd, .xor, .tmp2y, .tmp2y, .memia(.src1y, .tmp0, .add_size), ._ }, .{ ._, .v_pd, .@"or", .tmp1y, .tmp1y, .tmp2y, ._ }, - .{ ._, ._, .add, .tmp0p, .i(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, .{ ._, .vp_, .@"test", .tmp1y, .tmp1y, ._, ._ }, } }, }, .{ - .required_features = .{ .avx, null }, + .required_features = .{ .avx, null, null, null }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -5618,18 +9000,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, .vp_, .xor, .tmp1x, .tmp1x, .tmp1x, ._ }, .{ .@"0:", .v_dqu, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .vp_, .xor, .tmp2x, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._ }, .{ ._, .vp_, .@"or", .tmp1x, .tmp1x, .tmp2x, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, .{ ._, .vp_, .@"test", .tmp1x, .tmp1x, ._, ._ }, } }, }, .{ - .required_features = .{ .sse4_1, null }, + .required_features = .{ .sse4_1, null, null, null }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -5642,18 +9025,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_, .xor, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_, .@"or", .tmp1x, .tmp2x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, .{ ._, .p_, .@"test", .tmp1x, .tmp1x, ._, ._ }, } }, }, .{ - .required_features = .{ .sse2, null }, + .required_features = .{ .sse2, null, null, null }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -5666,21 +9050,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, .{ .@"0:", ._dqu, .mov, .tmp2x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_, .xor, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, .{ ._, .p_, .@"or", .tmp1x, .tmp2x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(16), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ }, .{ ._, .p_b, .cmpeq, .tmp1x, .tmp2x, ._, ._ }, .{ ._, .p_b, .movmsk, .tmp0d, .tmp1x, ._, ._ }, - .{ ._, ._, .cmp, .tmp0d, .i(0xffff), ._, ._ }, + .{ ._, ._, .cmp, .tmp0d, .si(0xffff), ._, ._ }, } }, }, .{ - .required_features = .{ .sse, .mmx }, + .required_features = .{ .sse, .mmx, null, null }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -5693,18 +9078,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ }, .{ .@"0:", ._q, .mov, .tmp2q, .memia(.src0q, .tmp0, .add_size), ._, ._ }, .{ ._, .p_, .xor, .tmp2q, .memia(.src1q, .tmp0, .add_size), ._, ._ }, .{ ._, .p_, .@"or", .tmp1q, .tmp2q, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .i(8), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, .{ ._, .p_, .xor, .tmp2q, .tmp2q, ._, ._ }, .{ ._, .p_b, .cmpeq, .tmp1q, .tmp2q, ._, ._ }, .{ ._, .p_b, .movmsk, .tmp0d, .tmp1q, ._, ._ }, - .{ ._, ._, .cmp, .tmp0d, .i(0xff), ._, ._ }, + .{ ._, ._, .cmp, .tmp0d, .si(0xff), ._, ._ }, } }, }, .{ .patterns = &.{ @@ -5719,13 +9105,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .a(.src0, .sub_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, .{ ._, ._, .xor, .tmp1p, .tmp1p, ._, ._ }, .{ .@"0:", ._, .mov, .tmp2p, .memia(.src0p, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .xor, .tmp2p, .memia(.src1p, .tmp0, .add_size), ._, ._ }, .{ ._, ._, .@"or", .tmp1p, .tmp2p, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .a(.tmp2, .add_size), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .sa(.tmp2, .add_size), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, .{ ._, ._, .@"test", .tmp1p, .tmp1p, ._, ._ }, } }, @@ -6453,11 +9840,8 @@ fn regClassForType(self: *CodeGen, ty: Type) Register.Class { else => .sse, }, .vector => switch (ty.childType(zcu).toIntern()) { - .bool_type, .u1_type => .general_purpose, - else => if (ty.isAbiInt(zcu) and ty.intInfo(zcu).bits == 1) - .general_purpose - else - .sse, + .bool_type => .general_purpose, + else => .sse, }, else => .general_purpose, }; @@ -8383,32 +11767,54 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { const lhs_mcv = try self.resolveInst(bin_op.lhs); const rhs_mcv = try self.resolveInst(bin_op.rhs); - const mat_lhs_mcv = switch (lhs_mcv) { - .load_symbol => mat_lhs_mcv: { + const mat_lhs_mcv = mat_lhs_mcv: switch (lhs_mcv) { + .register => |lhs_reg| switch (lhs_reg.class()) { + else => lhs_mcv, + .sse => { + const mat_lhs_mcv: MCValue = .{ + .register_pair = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp), + }; + try self.genCopy(dst_ty, mat_lhs_mcv, lhs_mcv, .{}); + break :mat_lhs_mcv mat_lhs_mcv; + }, + }, + .load_symbol => { // TODO clean this up! const addr_reg = try self.copyToTmpRegister(.usize, lhs_mcv.address()); break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; }, else => lhs_mcv, }; - const mat_lhs_lock = switch (mat_lhs_mcv) { - .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg), - else => null, + const mat_lhs_locks: [2]?RegisterLock = switch (mat_lhs_mcv) { + .register_pair => |mat_lhs_regs| self.register_manager.lockRegs(2, mat_lhs_regs), + .indirect => |reg_off| .{ self.register_manager.lockReg(reg_off.reg), null }, + else => @splat(null), }; - defer if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock); - const mat_rhs_mcv = switch (rhs_mcv) { - .load_symbol => mat_rhs_mcv: { + defer for (mat_lhs_locks) |mat_lhs_lock| if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock); + const mat_rhs_mcv = mat_rhs_mcv: switch (rhs_mcv) { + .register => |rhs_reg| switch (rhs_reg.class()) { + else => rhs_mcv, + .sse => { + const mat_rhs_mcv: MCValue = .{ + .register_pair = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp), + }; + try self.genCopy(dst_ty, mat_rhs_mcv, rhs_mcv, .{}); + break :mat_rhs_mcv mat_rhs_mcv; + }, + }, + .load_symbol => { // TODO clean this up! const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address()); break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; }, else => rhs_mcv, }; - const mat_rhs_lock = switch (mat_rhs_mcv) { - .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg), - else => null, + const mat_rhs_locks: [2]?RegisterLock = switch (mat_rhs_mcv) { + .register_pair => |mat_rhs_regs| self.register_manager.lockRegs(2, mat_rhs_regs), + .indirect => |reg_off| .{ self.register_manager.lockReg(reg_off.reg), null }, + else => @splat(null), }; - defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock); + defer for (mat_rhs_locks) |mat_rhs_lock| if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock); if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory( .{ ._, .mov }, @@ -10003,7 +13409,7 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void { } }, }, .u(0)); _ = try self.asmJccReloc(.e, loop); - try self.asmRegisterMemory(.{ ._, .bsr }, dst_reg.to64(), .{ + try self.asmRegisterMemory(.{ ._r, .bs }, dst_reg.to64(), .{ .base = .{ .frame = src_frame_addr.index }, .mod = .{ .rm = .{ .size = .qword, @@ -10080,8 +13486,8 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void { defer self.register_manager.unlockReg(wide_lock); try self.truncateRegister(src_ty, wide_reg); - try self.genBinOpMir(.{ ._, .bsr }, .u16, dst_mcv, .{ .register = wide_reg }); - } else try self.genBinOpMir(.{ ._, .bsr }, src_ty, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.{ ._r, .bs }, .u16, dst_mcv, .{ .register = wide_reg }); + } else try self.genBinOpMir(.{ ._r, .bs }, src_ty, dst_mcv, mat_src_mcv); try self.asmCmovccRegisterRegister( .z, @@ -10103,7 +13509,7 @@ fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void { try self.truncateRegister(src_ty, wide_reg); try self.genBinOpMir( - .{ ._, .bsr }, + .{ ._r, .bs }, if (src_bits <= 8) .u16 else src_ty, dst_mcv, .{ .register = wide_reg }, @@ -10200,7 +13606,7 @@ fn airCtz(self: *CodeGen, inst: Air.Inst.Index) !void { } }, }, .u(0)); _ = try self.asmJccReloc(.e, loop); - try self.asmRegisterMemory(.{ ._, .bsf }, dst_reg.to64(), .{ + try self.asmRegisterMemory(.{ ._f, .bs }, dst_reg.to64(), .{ .base = .{ .frame = src_frame_addr.index }, .mod = .{ .rm = .{ .size = .qword, @@ -10280,8 +13686,8 @@ fn airCtz(self: *CodeGen, inst: Air.Inst.Index) !void { defer self.register_manager.unlockReg(wide_lock); try self.truncateRegister(src_ty, wide_reg); - try self.genBinOpMir(.{ ._, .bsf }, wide_ty, dst_mcv, .{ .register = wide_reg }); - } else try self.genBinOpMir(.{ ._, .bsf }, src_ty, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.{ ._f, .bs }, wide_ty, dst_mcv, .{ .register = wide_reg }); + } else try self.genBinOpMir(.{ ._f, .bs }, src_ty, dst_mcv, mat_src_mcv); const cmov_abi_size = @max(@as(u32, @intCast(dst_ty.abiSize(zcu))), 2); try self.asmCmovccRegisterRegister( @@ -12975,7 +16381,18 @@ fn genShiftBinOp( const rcx_lock = self.register_manager.lockReg(.rcx); defer if (rcx_lock) |lock| self.register_manager.unlockReg(lock); - const lhs_lock = switch (lhs_mcv) { + const mat_lhs_mcv: MCValue, const can_reuse_lhs = switch (lhs_mcv) { + .register => |lhs_reg| switch (lhs_reg.class()) { + .general_purpose => .{ lhs_mcv, true }, + else => lhs: { + const mat_lhs_mcv = try self.allocTempRegOrMem(lhs_ty, true); + try self.genCopy(lhs_ty, mat_lhs_mcv, lhs_mcv, .{}); + break :lhs .{ mat_lhs_mcv, false }; + }, + }, + else => .{ lhs_mcv, true }, + }; + const lhs_lock = switch (mat_lhs_mcv) { .register => |reg| self.register_manager.lockReg(reg), else => null, }; @@ -12988,12 +16405,12 @@ fn genShiftBinOp( defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); const dst_mcv: MCValue = dst: { - if (maybe_inst) |inst| { + if (can_reuse_lhs) if (maybe_inst) |inst| { const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; - if (self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv)) break :dst lhs_mcv; - } + if (self.reuseOperand(inst, bin_op.lhs, 0, mat_lhs_mcv)) break :dst mat_lhs_mcv; + }; const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true); - try self.genCopy(lhs_ty, dst_mcv, lhs_mcv, .{}); + try self.genCopy(lhs_ty, dst_mcv, mat_lhs_mcv, .{}); break :dst dst_mcv; }; @@ -18337,12 +21754,28 @@ const MoveStrategy = union(enum) { try self.asmRegister(.{ .f_, .ld }, src_reg); try self.asmMemory(.{ .f_p, .st }, dst_mem); }, - .insert_extract, .vex_insert_extract => |ie| try self.asmMemoryRegisterImmediate( - ie.extract, - dst_mem, - src_reg, - .u(0), - ), + .insert_extract, .vex_insert_extract => |ie| if (ie.extract[0] != .p_w or self.hasFeature(.sse4_1)) + try self.asmMemoryRegisterImmediate(ie.extract, dst_mem, src_reg, .u(0)) + else if (self.hasFeature(.sse2)) { + const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); + try self.asmRegisterRegisterImmediate(ie.extract, tmp_reg.to32(), src_reg.to128(), .u(0)); + try self.asmMemoryRegister(.{ ._, .mov }, dst_mem, tmp_reg.to16()); + } else { + const tmp_frame_index = try self.allocFrameIndex(.init(.{ + .size = 16, + .alignment = .@"16", + })); + try self.asmMemoryRegister(.{ ._ps, .mova }, .{ + .base = .{ .frame = tmp_frame_index }, + .mod = .{ .rm = .{ .size = .xword } }, + }, src_reg.to128()); + const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); + try self.asmRegisterMemory(.{ ._, .mov }, tmp_reg.to16(), .{ + .base = .{ .frame = tmp_frame_index }, + .mod = .{ .rm = .{ .size = .word } }, + }); + try self.asmMemoryRegister(.{ ._, .mov }, dst_mem, tmp_reg.to16()); + }, } } }; @@ -18400,8 +21833,10 @@ fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool) .{ ._ss, .mov } }, 5...8 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_sd, .mov } + else if (self.hasFeature(.sse2)) + .{ ._sd, .mov } else - .{ ._sd, .mov } }, + .{ ._ps, .movl } }, 9...16 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_pd, if (aligned) .mova else .movu } else if (self.hasFeature(.sse2)) @@ -18427,8 +21862,10 @@ fn moveStrategy(self: *CodeGen, ty: Type, class: Register.Class, aligned: bool) .{ ._ss, .mov } }, 64 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_sd, .mov } + else if (self.hasFeature(.sse2)) + .{ ._sd, .mov } else - .{ ._sd, .mov } }, + .{ ._ps, .movl } }, 128 => return .{ .move = if (self.hasFeature(.avx)) .{ if (aligned) .v_dqa else .v_dqu, .mov } else if (self.hasFeature(.sse2)) @@ -18623,6 +22060,30 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C }, opts), inline .register_pair, .register_triple, .register_quadruple => |dst_regs| { const src_info: ?struct { addr_reg: Register, addr_lock: RegisterLock } = switch (src_mcv) { + .register => |src_reg| switch (dst_regs[0].class()) { + .general_purpose => switch (src_reg.class()) { + else => unreachable, + .sse => if (ty.abiSize(pt.zcu) <= 16) { + if (self.hasFeature(.avx)) { + try self.asmRegisterRegister(.{ .v_q, .mov }, dst_regs[0].to64(), src_reg.to128()); + try self.asmRegisterRegisterImmediate(.{ .vp_q, .extr }, dst_regs[1].to64(), src_reg.to128(), .u(1)); + } else if (self.hasFeature(.sse4_1)) { + try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[0].to64(), src_reg.to128()); + try self.asmRegisterRegisterImmediate(.{ .p_q, .extr }, dst_regs[1].to64(), src_reg.to128(), .u(1)); + } else { + const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[0].to64(), src_reg.to128()); + try self.asmRegisterRegister(.{ ._ps, .movhl }, tmp_reg.to128(), src_reg.to128()); + try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[1].to64(), src_reg.to128()); + } + return; + } else unreachable, + }, + else => unreachable, + }, .register_pair, .memory, .indirect, .load_frame => null, .load_symbol, .load_direct, .load_got, .load_tlv => src: { const src_addr_reg = @@ -18863,7 +22324,39 @@ fn genSetReg( inline .register_pair, .register_triple, .register_quadruple, - => |src_regs| try self.genSetReg(dst_reg, ty, .{ .register = src_regs[0] }, opts), + => |src_regs| switch (dst_reg.class()) { + .general_purpose => switch (src_regs[0].class()) { + .general_purpose => try self.genSetReg(dst_reg, ty, .{ .register = src_regs[0] }, opts), + else => unreachable, + }, + .sse => switch (src_regs[0].class()) { + .general_purpose => if (abi_size <= 16) { + if (self.hasFeature(.avx)) { + try self.asmRegisterRegister(.{ .v_q, .mov }, dst_reg.to128(), src_regs[0].to64()); + try self.asmRegisterRegisterRegisterImmediate( + .{ .vp_q, .insr }, + dst_reg.to128(), + dst_reg.to128(), + src_regs[1].to64(), + .u(1), + ); + } else if (self.hasFeature(.sse4_1)) { + try self.asmRegisterRegister(.{ ._q, .mov }, dst_reg.to128(), src_regs[0].to64()); + try self.asmRegisterRegisterImmediate(.{ .p_q, .insr }, dst_reg.to128(), src_regs[1].to64(), .u(1)); + } else { + const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + try self.asmRegisterRegister(.{ ._q, .mov }, dst_reg.to128(), src_regs[0].to64()); + try self.asmRegisterRegister(.{ ._q, .mov }, tmp_reg.to128(), src_regs[1].to64()); + try self.asmRegisterRegister(.{ ._ps, .movlh }, dst_reg.to128(), tmp_reg.to128()); + } + } else unreachable, + else => unreachable, + }, + else => unreachable, + }, .register_offset, .indirect, .load_frame, @@ -23517,8 +27010,6 @@ fn promoteVarArg(self: *CodeGen, ty: Type) Type { } } -// ====================================== rewrite starts here ====================================== - const Temp = struct { index: Air.Inst.Index, @@ -24311,13 +27802,13 @@ const Select = struct { } const Case = struct { - required_features: [2]?std.Target.x86.Feature = @splat(null), + required_features: [4]?std.Target.x86.Feature = @splat(null), dst_constraints: [@intFromEnum(Select.Operand.Ref.src0) - @intFromEnum(Select.Operand.Ref.dst0)]Constraint = @splat(.any), src_constraints: [@intFromEnum(Select.Operand.Ref.none) - @intFromEnum(Select.Operand.Ref.src0)]Constraint = @splat(.any), patterns: []const Select.Pattern, - clobbers: struct { eflags: bool = false } = .{}, extra_temps: [@intFromEnum(Select.Operand.Ref.dst0) - @intFromEnum(Select.Operand.Ref.tmp0)]TempSpec = @splat(.unused), dst_temps: [@intFromEnum(Select.Operand.Ref.src0) - @intFromEnum(Select.Operand.Ref.dst0)]TempSpec.Kind = @splat(.unused), + clobbers: struct { eflags: bool = false } = .{}, each: union(enum) { once: []const Instruction, }, @@ -24327,9 +27818,32 @@ const Select = struct { any, any_bool_vec, any_int, + any_signed_int, any_float, bool_vec: Memory.Size, + vec: Memory.Size, + signed_int_vec: Memory.Size, + signed_int_or_full_vec: Memory.Size, + unsigned_int_vec: Memory.Size, + int_or_vec: Memory.Size, + exact_remainder_int_or_vec: struct { of: Memory.Size, is: Memory.Size }, int: Memory.Size, + scalar_int: Memory.Size, + scalar_signed_int: Memory.Size, + scalar_unsigned_int: Memory.Size, + scalar_remainder_int: struct { of: Memory.Size, is: Memory.Size }, + exact_int: u16, + exact_signed_int: u16, + exact_unsigned_int: u16, + signed_or_exact_int: Memory.Size, + unsigned_or_exact_int: Memory.Size, + po2_int: Memory.Size, + signed_po2_int: Memory.Size, + unsigned_po2_or_exact_int: Memory.Size, + remainder_int: struct { of: Memory.Size, is: Memory.Size }, + exact_remainder_int: struct { of: Memory.Size, is: Memory.Size }, + signed_or_exact_remainder_int: struct { of: Memory.Size, is: Memory.Size }, + unsigned_or_exact_remainder_int: struct { of: Memory.Size, is: Memory.Size }, signed_int: Memory.Size, unsigned_int: Memory.Size, @@ -24338,30 +27852,183 @@ const Select = struct { switch (constraint) { .any => return true, .any_bool_vec => return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type, - .any_int => { - const scalar_ty = ty.scalarType(zcu); - return scalar_ty.isAbiInt(zcu) or scalar_ty.isPtrAtRuntime(zcu); - }, + .any_int => return ty.toIntern() == .bool_type or ty.isPtrAtRuntime(zcu) or ty.isAbiInt(zcu), + .any_signed_int => return ty.isAbiInt(zcu) and ty.intInfo(zcu).signedness == .signed, .any_float => return ty.scalarType(zcu).isRuntimeFloat(), - .bool_vec => |size| return ty.isVector(zcu) and - ty.scalarType(zcu).toIntern() == .bool_type and ty.vectorLen(zcu) <= size.bitSize(cg.target), - .int => |size| { + .bool_vec => |size| return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type and + size.bitSize(cg.target) >= ty.vectorLen(zcu), + .vec => |size| return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() != .bool_type and + size.bitSize(cg.target) >= ty.abiSize(zcu), + .signed_int_vec => |size| { + if (!ty.isVector(zcu) or size.bitSize(cg.target) < 8 * ty.abiSize(zcu)) return false; const scalar_ty = ty.scalarType(zcu); - if (scalar_ty.isPtrAtRuntime(zcu)) return cg.target.ptrBitWidth() <= size.bitSize(cg.target); - return scalar_ty.isAbiInt(zcu) and scalar_ty.intInfo(zcu).bits <= size.bitSize(cg.target); + return scalar_ty.isAbiInt(zcu) and scalar_ty.intInfo(zcu).signedness == .signed; + }, + .signed_int_or_full_vec => |size| { + if (!ty.isVector(zcu) or size.bitSize(cg.target) < 8 * ty.abiSize(zcu)) return false; + const scalar_ty = ty.scalarType(zcu); + if (scalar_ty.isPtrAtRuntime(zcu)) return true; + if (!scalar_ty.isAbiInt(zcu)) return false; + const scalar_int_info = scalar_ty.intInfo(zcu); + return switch (scalar_int_info.signedness) { + .signed => true, + .unsigned => scalar_int_info.bits >= 8 and std.math.isPowerOfTwo(scalar_int_info.bits), + }; + }, + .unsigned_int_vec => |size| { + if (!ty.isVector(zcu) or size.bitSize(cg.target) < ty.bitSize(zcu)) return false; + const scalar_ty = ty.scalarType(zcu); + if (scalar_ty.isPtrAtRuntime(zcu)) return true; + return scalar_ty.isAbiInt(zcu) and scalar_ty.intInfo(zcu).signedness == .unsigned; + }, + .int_or_vec => |size| { + if (ty.isVector(zcu)) return ty.scalarType(zcu).toIntern() != .bool_type and + size.bitSize(cg.target) >= 8 * ty.abiSize(zcu); + if (ty.toIntern() == .bool_type) return true; + if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); + return ty.isAbiInt(zcu) and size.bitSize(cg.target) >= ty.intInfo(zcu).bits; + }, + .exact_remainder_int_or_vec => |of_is| { + if (ty.isVector(zcu)) return ty.scalarType(zcu).toIntern() != .bool_type and + of_is.is.bitSize(cg.target) == (8 * ty.abiSize(zcu) - 1) % of_is.of.bitSize(cg.target) + 1; + if (ty.isPtrAtRuntime(zcu)) + return of_is.is.bitSize(cg.target) == (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; + if (!ty.isAbiInt(zcu)) return false; + return of_is.is.bitSize(cg.target) == (ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1; + }, + .int => |size| { + if (ty.toIntern() == .bool_type) return true; + if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); + return ty.isAbiInt(zcu) and size.bitSize(cg.target) >= ty.intInfo(zcu).bits; + }, + .scalar_int => |size| { + const scalar_ty = ty.scalarType(zcu); + if (scalar_ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); + return scalar_ty.isAbiInt(zcu) and size.bitSize(cg.target) >= scalar_ty.intInfo(zcu).bits; + }, + .scalar_signed_int => |size| { + const scalar_ty = ty.scalarType(zcu); + if (!scalar_ty.isAbiInt(zcu)) return false; + const scalar_int_info = scalar_ty.intInfo(zcu); + return scalar_int_info.signedness == .signed and size.bitSize(cg.target) >= scalar_int_info.bits; + }, + .scalar_unsigned_int => |size| { + const scalar_ty = ty.scalarType(zcu); + if (scalar_ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); + if (!scalar_ty.isAbiInt(zcu)) return false; + const scalar_int_info = scalar_ty.intInfo(zcu); + return scalar_int_info.signedness == .unsigned and size.bitSize(cg.target) >= scalar_int_info.bits; + }, + .scalar_remainder_int => |of_is| { + const scalar_ty = ty.scalarType(zcu); + if (scalar_ty.isPtrAtRuntime(zcu)) + return of_is.is.bitSize(cg.target) >= (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; + if (!scalar_ty.isAbiInt(zcu)) return false; + return of_is.is.bitSize(cg.target) >= (scalar_ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1; + }, + .exact_int => |bit_size| { + if (ty.toIntern() == .bool_type) return bit_size == 1; + if (ty.isPtrAtRuntime(zcu)) return bit_size == cg.target.ptrBitWidth(); + return ty.isAbiInt(zcu) and bit_size == ty.intInfo(zcu).bits; + }, + .exact_signed_int => |bit_size| { + if (!ty.isAbiInt(zcu)) return false; + const int_info = ty.intInfo(zcu); + return int_info.signedness == .signed and bit_size == int_info.bits; + }, + .exact_unsigned_int => |bit_size| { + if (ty.toIntern() == .bool_type) return bit_size == 1; + if (ty.isPtrAtRuntime(zcu)) return bit_size == cg.target.ptrBitWidth(); + if (!ty.isAbiInt(zcu)) return false; + const int_info = ty.intInfo(zcu); + return int_info.signedness == .unsigned and bit_size == int_info.bits; + }, + .signed_or_exact_int => |size| { + if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) == cg.target.ptrBitWidth(); + if (!ty.isAbiInt(zcu)) return false; + const int_info = ty.intInfo(zcu); + return switch (int_info.signedness) { + .signed => size.bitSize(cg.target) >= int_info.bits, + .unsigned => size.bitSize(cg.target) == int_info.bits, + }; + }, + .unsigned_or_exact_int => |size| { + if (ty.toIntern() == .bool_type or ty.isPtrAtRuntime(zcu)) return true; + if (!ty.isAbiInt(zcu)) return false; + const int_info = ty.intInfo(zcu); + return switch (int_info.signedness) { + .signed => size.bitSize(cg.target) == int_info.bits, + .unsigned => size.bitSize(cg.target) >= int_info.bits, + }; + }, + .po2_int => |size| { + if (ty.toIntern() == .bool_type) return true; + if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); + if (!ty.isAbiInt(zcu)) return false; + const bit_size = ty.intInfo(zcu).bits; + return std.math.isPowerOfTwo(bit_size) and size.bitSize(cg.target) >= bit_size; + }, + .signed_po2_int => |size| { + if (!ty.isAbiInt(zcu)) return false; + const int_info = ty.intInfo(zcu); + return int_info.signedness == .signed and std.math.isPowerOfTwo(int_info.bits) and + size.bitSize(cg.target) >= int_info.bits; + }, + .unsigned_po2_or_exact_int => |size| { + if (ty.toIntern() == .bool_type) return true; + if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); + if (!ty.isAbiInt(zcu)) return false; + const int_info = ty.intInfo(zcu); + return switch (int_info.signedness) { + .signed => size.bitSize(cg.target) == int_info.bits, + .unsigned => std.math.isPowerOfTwo(int_info.bits) and size.bitSize(cg.target) >= int_info.bits, + }; + }, + .remainder_int => |of_is| { + if (ty.toIntern() == .bool_type) return true; + if (ty.isPtrAtRuntime(zcu)) + return of_is.is.bitSize(cg.target) >= (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; + if (!ty.isAbiInt(zcu)) return false; + return of_is.is.bitSize(cg.target) >= (ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1; + }, + .exact_remainder_int => |of_is| { + if (ty.isPtrAtRuntime(zcu)) + return of_is.is.bitSize(cg.target) == (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; + if (!ty.isAbiInt(zcu)) return false; + return of_is.is.bitSize(cg.target) == (ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1; + }, + .signed_or_exact_remainder_int => |of_is| { + if (ty.isPtrAtRuntime(zcu)) + return of_is.is.bitSize(cg.target) == (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; + if (!ty.isAbiInt(zcu)) return false; + const int_info = ty.intInfo(zcu); + return switch (int_info.signedness) { + .signed => of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1, + .unsigned => of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1, + }; + }, + .unsigned_or_exact_remainder_int => |of_is| { + if (ty.toIntern() == .bool_type) return true; + if (ty.isPtrAtRuntime(zcu)) + return of_is.is.bitSize(cg.target) >= (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; + if (!ty.isAbiInt(zcu)) return false; + const int_info = ty.intInfo(zcu); + return switch (int_info.signedness) { + .signed => of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1, + .unsigned => of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1, + }; }, .signed_int => |size| { - const scalar_ty = ty.scalarType(zcu); - if (!scalar_ty.isAbiInt(zcu)) return false; - const info = scalar_ty.intInfo(zcu); - return info.signedness == .signed and info.bits <= size.bitSize(cg.target); + if (!ty.isAbiInt(zcu)) return false; + const int_info = ty.intInfo(zcu); + return int_info.signedness == .signed and size.bitSize(cg.target) >= int_info.bits; }, .unsigned_int => |size| { - const scalar_ty = ty.scalarType(zcu); - if (scalar_ty.isPtrAtRuntime(zcu)) return cg.target.ptrBitWidth() <= size.bitSize(cg.target); - if (!scalar_ty.isAbiInt(zcu)) return false; - const info = scalar_ty.intInfo(zcu); - return info.signedness == .unsigned and info.bits <= size.bitSize(cg.target); + if (ty.toIntern() == .bool_type) return true; + if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); + if (!ty.isAbiInt(zcu)) return false; + const int_info = ty.intInfo(zcu); + return int_info.signedness == .unsigned and size.bitSize(cg.target) >= int_info.bits; }, } } @@ -24379,97 +28046,107 @@ const Select = struct { imm32, simm32, mem, - mut_mem, to_mem, + mut_mem, + to_mut_mem, gpr, + to_gpr, mut_gpr, + to_mut_gpr, mm, + to_mm, mut_mm, + to_mut_mm, xmm, + to_xmm, mut_xmm, + to_mut_xmm, ymm, + to_ymm, mut_ymm, + to_mut_ymm, fn matches(src: Src, temp: Temp, cg: *CodeGen) bool { - switch (src) { + return switch (src) { .none => unreachable, - .any => return true, - .imm8 => return switch (temp.tracking(cg).short) { + .any => true, + .imm8 => switch (temp.tracking(cg).short) { .immediate => |imm| std.math.cast(u8, imm) != null, else => false, }, - .imm16 => return switch (temp.tracking(cg).short) { + .imm16 => switch (temp.tracking(cg).short) { .immediate => |imm| std.math.cast(u16, imm) != null, else => false, }, - .imm32 => return switch (temp.tracking(cg).short) { + .imm32 => switch (temp.tracking(cg).short) { .immediate => |imm| std.math.cast(u32, imm) != null, else => false, }, - .simm32 => return switch (temp.tracking(cg).short) { + .simm32 => switch (temp.tracking(cg).short) { .immediate => |imm| std.math.cast(i32, @as(i64, @bitCast(imm))) != null, else => false, }, - .mem => return temp.tracking(cg).short.isMemory(), - .mut_mem => return temp.isMut(cg) and temp.tracking(cg).short.isMemory(), - .to_mem => return true, - .gpr, .mut_gpr => { - const mcv = temp.tracking(cg).short; - const abi_size = temp.typeOf(cg).abiSize(cg.pt.zcu); - return abi_size <= 8 and switch (mcv) { - .register => |reg| reg.class() == .general_purpose, - .register_offset => |reg_off| reg_off.reg.class() == .general_purpose and - reg_off.off == 0, - .register_pair, .register_triple, .register_quadruple => false, - else => true, - }; + .mem => temp.tracking(cg).short.isMemory(), + .to_mem, .to_mut_mem => true, + .mut_mem => temp.isMut(cg) and temp.tracking(cg).short.isMemory(), + .gpr => temp.typeOf(cg).abiSize(cg.pt.zcu) <= 8 and switch (temp.tracking(cg).short) { + .register => |reg| reg.class() == .general_purpose, + .register_offset => |reg_off| reg_off.reg.class() == .general_purpose and reg_off.off == 0, + else => false, }, - .mm, .mut_mm => { - const mcv = temp.tracking(cg).short; - const abi_size = temp.typeOf(cg).abiSize(cg.pt.zcu); - return abi_size <= 8 and switch (mcv) { - .register => |reg| reg.class() == .mmx, - .register_offset => |reg_off| reg_off.reg.class() == .mmx and - reg_off.off == 0, - else => false, - }; + .mut_gpr => temp.isMut(cg) and temp.typeOf(cg).abiSize(cg.pt.zcu) <= 8 and switch (temp.tracking(cg).short) { + .register => |reg| reg.class() == .general_purpose, + .register_offset => |reg_off| reg_off.reg.class() == .general_purpose and reg_off.off == 0, + else => false, }, - .xmm, .mut_xmm => { - const mcv = temp.tracking(cg).short; - const abi_size = temp.typeOf(cg).abiSize(cg.pt.zcu); - return abi_size > 8 and abi_size <= 16 and switch (mcv) { - .register => |reg| reg.class() == .sse, - .register_offset => |reg_off| reg_off.reg.class() == .sse and - reg_off.off == 0, - .register_pair, .register_triple, .register_quadruple => false, - else => true, - }; + .to_gpr, .to_mut_gpr => temp.typeOf(cg).abiSize(cg.pt.zcu) <= 8, + .mm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 8 and switch (temp.tracking(cg).short) { + .register => |reg| reg.class() == .mmx, + .register_offset => |reg_off| reg_off.reg.class() == .mmx and reg_off.off == 0, + else => false, }, - .ymm, .mut_ymm => { - const mcv = temp.tracking(cg).short; - const abi_size = temp.typeOf(cg).abiSize(cg.pt.zcu); - return abi_size > 16 and abi_size <= 32 and switch (mcv) { - .register => |reg| reg.class() == .sse, - .register_offset => |reg_off| reg_off.reg.class() == .sse and - reg_off.off == 0, - .register_pair, .register_triple, .register_quadruple => false, - else => true, - }; + .mut_mm => temp.isMut(cg) and temp.typeOf(cg).abiSize(cg.pt.zcu) == 8 and switch (temp.tracking(cg).short) { + .register => |reg| reg.class() == .mmx, + .register_offset => |reg_off| reg_off.reg.class() == .mmx and reg_off.off == 0, + else => false, }, - } + .to_mm, .to_mut_mm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 8, + .xmm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 16 and switch (temp.tracking(cg).short) { + .register => |reg| reg.class() == .sse, + .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0, + else => false, + }, + .mut_xmm => temp.isMut(cg) and temp.typeOf(cg).abiSize(cg.pt.zcu) == 16 and switch (temp.tracking(cg).short) { + .register => |reg| reg.class() == .sse, + .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0, + else => false, + }, + .to_xmm, .to_mut_xmm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 16, + .ymm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 32 and switch (temp.tracking(cg).short) { + .register => |reg| reg.class() == .sse, + .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0, + else => false, + }, + .mut_ymm => temp.isMut(cg) and temp.typeOf(cg).abiSize(cg.pt.zcu) == 32 and switch (temp.tracking(cg).short) { + .register => |reg| reg.class() == .sse, + .register_offset => |reg_off| reg_off.reg.class() == .sse and reg_off.off == 0, + else => false, + }, + .to_ymm, .to_mut_ymm => temp.typeOf(cg).abiSize(cg.pt.zcu) == 32, + }; } fn convert(src: Src, temp: *Temp, cg: *CodeGen) !bool { return switch (src) { .none => unreachable, .any, .imm8, .imm16, .imm32, .simm32 => false, - .mem, .mut_mem, .to_mem => try temp.toBase(cg), - .gpr => try temp.toRegClass(false, .general_purpose, cg), - .mut_gpr => try temp.toRegClass(true, .general_purpose, cg), - .mm => try temp.toRegClass(false, .mmx, cg), - .mut_mm => try temp.toRegClass(true, .mmx, cg), - .xmm, .ymm => try temp.toRegClass(false, .sse, cg), - .mut_xmm, .mut_ymm => try temp.toRegClass(true, .sse, cg), + .mem, .to_mem, .mut_mem, .to_mut_mem => try temp.toBase(cg), + .gpr, .to_gpr => try temp.toRegClass(false, .general_purpose, cg), + .mut_gpr, .to_mut_gpr => try temp.toRegClass(true, .general_purpose, cg), + .mm, .to_mm => try temp.toRegClass(false, .mmx, cg), + .mut_mm, .to_mut_mm => try temp.toRegClass(true, .mmx, cg), + .xmm, .to_xmm, .ymm, .to_ymm => try temp.toRegClass(false, .sse, cg), + .mut_xmm, .to_mut_xmm, .mut_ymm, .to_mut_ymm => try temp.toRegClass(true, .sse, cg), }; } }; @@ -24489,6 +28166,10 @@ const Select = struct { rc: Register.Class, rc_mask: struct { rc: Register.Class, info: MaskInfo }, mem, + smin_mem: Select.Operand.Ref, + smax_mem: Select.Operand.Ref, + umin_mem: Select.Operand.Ref, + umax_mem: Select.Operand.Ref, ref: Select.Operand.Ref, ref_mask: struct { ref: Select.Operand.Ref, info: MaskInfo }, @@ -24501,14 +28182,81 @@ const Select = struct { }; fn create(spec: TempSpec, s: *Select) !?Temp { + const cg = s.cg; return switch (spec.kind) { .unused => null, - .any => try s.cg.tempAlloc(spec.type), - .cc => |cc| try s.cg.tempFromValue(spec.type, .{ .eflags = cc }), - .reg => |reg| try s.cg.tempFromValue(spec.type, .{ .register = reg }), - .rc => |rc| try s.cg.tempAllocReg(spec.type, regSetForRegClass(rc)), - .rc_mask => |rc_mask| try s.cg.tempAllocReg(spec.type, regSetForRegClass(rc_mask.rc)), - .mem => try s.cg.tempAllocMem(spec.type), + .any => try cg.tempAlloc(spec.type), + .cc => |cc| try cg.tempFromValue(spec.type, .{ .eflags = cc }), + .reg => |reg| try cg.tempFromValue(spec.type, .{ .register = reg }), + .rc => |rc| try cg.tempAllocReg(spec.type, regSetForRegClass(rc)), + .rc_mask => |rc_mask| try cg.tempAllocReg(spec.type, regSetForRegClass(rc_mask.rc)), + .mem => try cg.tempAllocMem(spec.type), + .smin_mem, .smax_mem, .umin_mem, .umax_mem => |ty_ref| { + const pt = cg.pt; + const zcu = pt.zcu; + const ip = &zcu.intern_pool; + const ty = ty_ref.deref(s).typeOf(s.cg); + const vector_len, const scalar_ty: Type = switch (ip.indexToKey(ty.toIntern())) { + else => .{ null, ty }, + .vector_type => |vector_type| .{ vector_type.len, .fromInterned(vector_type.child) }, + }; + const res_scalar_ty, const res_scalar_val: Value = res_scalar: switch (scalar_ty.toIntern()) { + .bool_type => .{ + scalar_ty, + .fromInterned(switch (spec.kind) { + else => unreachable, + .smin_mem, .umax_mem => .bool_true, + .smax_mem, .umin_mem => .bool_false, + }), + }, + else => { + const scalar_info: InternPool.Key.IntType = if (scalar_ty.isAbiInt(zcu)) + scalar_ty.intInfo(zcu) + else + .{ .signedness = .unsigned, .bits = @intCast(scalar_ty.bitSize(zcu)) }; + const scalar_int_ty = try pt.intType(scalar_info.signedness, scalar_info.bits); + if (scalar_info.bits <= 64) { + const int_val: i64 = switch (spec.kind) { + else => unreachable, + .smin_mem => std.math.minInt(i64), + .smax_mem => std.math.maxInt(i64), + .umin_mem => 0, + .umax_mem => -1, + }; + const shift: u6 = @intCast(64 - scalar_info.bits); + break :res_scalar .{ scalar_int_ty, switch (scalar_info.signedness) { + .signed => try pt.intValue_i64(scalar_int_ty, int_val >> shift), + .unsigned => try pt.intValue_u64(scalar_int_ty, @as(u64, @bitCast(int_val)) >> shift), + } }; + } + var big_int: std.math.big.int.Managed = try .init(cg.gpa); + defer big_int.deinit(); + try big_int.setTwosCompIntLimit(switch (spec.kind) { + else => unreachable, + .smin_mem, .umin_mem => .min, + .smax_mem, .umax_mem => .max, + }, switch (spec.kind) { + else => unreachable, + .smin_mem, .smax_mem => .signed, + .umin_mem, .umax_mem => .unsigned, + }, scalar_info.bits); + try big_int.truncate(&big_int, scalar_info.signedness, scalar_info.bits); + break :res_scalar .{ scalar_int_ty, try pt.intValue_big(scalar_int_ty, big_int.toConst()) }; + }, + }; + const res_ty, const res_val: Value = if (vector_len) |len| res: { + const vector_ty = try pt.vectorType(.{ + .len = len, + .child = res_scalar_ty.toIntern(), + }); + const vector_val = try pt.intern(.{ .aggregate = .{ + .ty = vector_ty.toIntern(), + .storage = .{ .repeated_elem = res_scalar_val.toIntern() }, + } }); + break :res .{ vector_ty, .fromInterned(vector_val) }; + } else .{ res_scalar_ty, res_scalar_val }; + return try cg.tempFromValue(res_ty, try cg.genTypedValue(res_val)); + }, .ref => |ref| ref.deref(s), .ref_mask => |ref_mask| ref_mask.ref.deref(s), }; @@ -24541,21 +28289,51 @@ const Select = struct { forward_label, ref, simm, + uimm, lea, mem, }; - const Adjust = enum { - none, - add_ptr_size, - sub_ptr_size, - add_ptr_bit_size, - sub_ptr_bit_size, - add_size, - sub_size, - add_len, - sub_len, - add_elem_limbs, - sub_elem_limbs, + const Adjust = packed struct(u8) { + factor: i2, + scale: Memory.Scale, + amount: enum(u4) { + none, + ptr_size, + ptr_bit_size, + size, + src0_size, + bit_size, + src0_bit_size, + len, + elem_limbs, + src0_elem_size, + smin, + smax, + umax, + }, + + const none: Adjust = .{ .factor = 0, .scale = .@"1", .amount = .none }; + const sub_ptr_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .ptr_size }; + const add_ptr_bit_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .ptr_bit_size }; + const add_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .size }; + const sub_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .size }; + const add_src0_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_size }; + const sub_src0_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_size }; + const add_2_bit_size: Adjust = .{ .factor = 1, .scale = .@"2", .amount = .bit_size }; + const add_bit_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .bit_size }; + const sub_bit_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .bit_size }; + const add_src0_bit_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_bit_size }; + const sub_src0_bit_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_bit_size }; + const add_8_len: Adjust = .{ .factor = 1, .scale = .@"8", .amount = .len }; + const add_4_len: Adjust = .{ .factor = 1, .scale = .@"4", .amount = .len }; + const add_3_len: Adjust = .{ .factor = 1, .scale = .@"3", .amount = .len }; + const add_2_len: Adjust = .{ .factor = 1, .scale = .@"2", .amount = .len }; + const add_len: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .len }; + const sub_len: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .len }; + const add_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_elem_size }; + const sub_src0_elem_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_elem_size }; + const add_elem_limbs: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .elem_limbs }; + const add_umax: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .umax }; }; const Ref = enum(u4) { tmp0, @@ -24741,15 +28519,24 @@ const Select = struct { const src1x: Select.Operand = .{ .tag = .ref, .base = .src1x }; const src1y: Select.Operand = .{ .tag = .ref, .base = .src1y }; - fn i(imm: i32) Select.Operand { + fn si(imm: i32) Select.Operand { return .{ .tag = .simm, .imm = imm }; } - fn a(base: Ref.Sized, adjust: Adjust) Select.Operand { + fn sa(base: Ref.Sized, adjust: Adjust) Select.Operand { return .{ .tag = .simm, .base = base, .adjust = adjust }; } - fn ia(imm: i32, base: Ref.Sized, adjust: Adjust) Select.Operand { + fn sia(imm: i32, base: Ref.Sized, adjust: Adjust) Select.Operand { return .{ .tag = .simm, .base = base, .adjust = adjust, .imm = imm }; } + fn ui(imm: i32) Select.Operand { + return .{ .tag = .uimm, .imm = imm }; + } + fn ua(base: Ref.Sized, adjust: Adjust) Select.Operand { + return .{ .tag = .uimm, .base = base, .adjust = adjust }; + } + fn uia(imm: i32, base: Ref.Sized, adjust: Adjust) Select.Operand { + return .{ .tag = .uimm, .base = base, .adjust = adjust, .imm = imm }; + } fn lea(size: Memory.Size, base: Ref) Select.Operand { return .{ @@ -24757,6 +28544,13 @@ const Select = struct { .base = .{ .ref = base, .size = size }, }; } + fn leaa(size: Memory.Size, base: Ref, adjust: Adjust) Select.Operand { + return .{ + .tag = .lea, + .base = .{ .ref = base, .size = size }, + .adjust = adjust, + }; + } fn lead(size: Memory.Size, base: Ref, disp: i32) Select.Operand { return .{ .tag = .lea, @@ -24768,14 +28562,22 @@ const Select = struct { return .{ .tag = .lea, .base = .{ .ref = base, .size = size }, - .index_ = .{ .ref = index, .scale = .@"1" }, + .index = .{ .ref = index, .scale = .@"1" }, + }; + } + fn leaia(size: Memory.Size, base: Ref, index: Ref, adjust: Adjust) Select.Operand { + return .{ + .tag = .lea, + .base = .{ .ref = base, .size = size }, + .index = .{ .ref = index, .scale = .@"1" }, + .adjust = adjust, }; } fn leaid(size: Memory.Size, base: Ref, index: Ref, disp: i32) Select.Operand { return .{ .tag = .lea, .base = .{ .ref = base, .size = size }, - .index_ = .{ .ref = index, .scale = .@"1" }, + .index = .{ .ref = index, .scale = .@"1" }, .imm = disp, }; } @@ -24783,22 +28585,22 @@ const Select = struct { return .{ .tag = .lea, .base = .{ .ref = base, .size = size }, - .index_ = .{ .ref = index, .scale = scale }, + .index = .{ .ref = index, .scale = scale }, }; } fn leasid(size: Memory.Size, base: Ref, scale: Memory.Scale, index: Ref, disp: i32) Select.Operand { return .{ .tag = .lea, .base = .{ .ref = base, .size = size }, - .index_ = .{ .ref = index, .scale = scale }, + .index = .{ .ref = index, .scale = scale }, .imm = disp, }; } - fn leasida(size: Memory.Size, base: Ref, scale: Memory.Scale, index: Ref, disp: i32, adjust: Adjust) Select.Operand { + fn leasiad(size: Memory.Size, base: Ref, scale: Memory.Scale, index: Ref, adjust: Adjust, disp: i32) Select.Operand { return .{ .tag = .lea, .base = .{ .ref = base, .size = size }, - .index_ = .{ .ref = index, .scale = scale }, + .index = .{ .ref = index, .scale = scale }, .adjust = adjust, .imm = disp, }; @@ -24817,6 +28619,21 @@ const Select = struct { .imm = disp, }; } + fn mema(base: Ref.Sized, adjust: Adjust) Select.Operand { + return .{ + .tag = .mem, + .base = base, + .adjust = adjust, + }; + } + fn memad(base: Ref.Sized, adjust: Adjust, disp: i32) Select.Operand { + return .{ + .tag = .mem, + .base = base, + .adjust = adjust, + .imm = disp, + }; + } fn memi(base: Ref.Sized, index: Ref) Select.Operand { return .{ .tag = .mem, @@ -24832,6 +28649,15 @@ const Select = struct { .adjust = adjust, }; } + fn memiad(base: Ref.Sized, index: Ref, adjust: Adjust, disp: i32) Select.Operand { + return .{ + .tag = .mem, + .base = base, + .index = .{ .ref = index, .scale = .@"1" }, + .adjust = adjust, + .imm = disp, + }; + } fn memid(base: Ref.Sized, index: Ref, disp: i32) Select.Operand { return .{ .tag = .mem, @@ -24847,6 +28673,14 @@ const Select = struct { .index = .{ .ref = index, .scale = scale }, }; } + fn memsia(base: Ref.Sized, scale: Memory.Scale, index: Ref, adjust: Adjust) Select.Operand { + return .{ + .tag = .mem, + .base = base, + .index = .{ .ref = index, .scale = scale }, + .adjust = adjust, + }; + } fn memsid(base: Ref.Sized, scale: Memory.Scale, index: Ref, disp: i32) Select.Operand { return .{ .tag = .mem, @@ -24855,7 +28689,7 @@ const Select = struct { .imm = disp, }; } - fn memsida(base: Ref.Sized, scale: Memory.Scale, index: Ref, disp: i32, adjust: Adjust) Select.Operand { + fn memsiad(base: Ref.Sized, scale: Memory.Scale, index: Ref, adjust: Adjust, disp: i32) Select.Operand { return .{ .tag = .mem, .base = base, @@ -24865,26 +28699,34 @@ const Select = struct { }; } - fn adjustedImm(op: Select.Operand, s: *const Select) i32 { - return switch (op.adjust) { - .none => op.imm, - .add_ptr_size => op.imm + @divExact(s.cg.target.ptrBitWidth(), 8), - .sub_ptr_size => op.imm - @divExact(s.cg.target.ptrBitWidth(), 8), - .add_ptr_bit_size => op.imm + s.cg.target.ptrBitWidth(), - .sub_ptr_bit_size => op.imm - s.cg.target.ptrBitWidth(), - .add_size => op.imm + @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu))), - .sub_size => op.imm - @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu))), - .add_len => op.imm + @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).vectorLen(s.cg.pt.zcu))), - .sub_len => op.imm - @as(i32, @intCast(op.base.ref.deref(s).typeOf(s.cg).vectorLen(s.cg.pt.zcu))), - .add_elem_limbs => op.imm + @as(i32, @intCast(@divExact( + fn adjustedImm(op: Select.Operand, comptime SignedImm: type, s: *const Select) SignedImm { + const UnsignedImm = @Type(.{ + .int = .{ .signedness = .unsigned, .bits = @typeInfo(SignedImm).int.bits }, + }); + return op.imm + @as(i5, op.adjust.factor) * op.adjust.scale.toFactor() * @as(SignedImm, switch (op.adjust.amount) { + .none => 0, + .ptr_size => @divExact(s.cg.target.ptrBitWidth(), 8), + .ptr_bit_size => s.cg.target.ptrBitWidth(), + .size => @intCast(op.base.ref.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu)), + .src0_size => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).abiSize(s.cg.pt.zcu)), + .bit_size => @intCast(op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu)), + .src0_bit_size => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu)), + .len => @intCast(op.base.ref.deref(s).typeOf(s.cg).vectorLen(s.cg.pt.zcu)), + .elem_limbs => @intCast(@divExact( op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu), @divExact(op.base.size.bitSize(s.cg.target), 8), - ))), - .sub_elem_limbs => op.imm - @as(i32, @intCast(@divExact( - op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu), - @divExact(op.base.size.bitSize(s.cg.target), 8), - ))), - }; + )), + .src0_elem_size => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)), + .smin => @as(SignedImm, std.math.minInt(SignedImm)) >> @truncate( + -%op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu), + ), + .smax => @as(SignedImm, std.math.maxInt(SignedImm)) >> @truncate( + -%op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu), + ), + .umax => @bitCast(@as(UnsignedImm, std.math.maxInt(UnsignedImm)) >> @truncate( + -%op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu), + )), + }); } fn lower(op: Select.Operand, s: *Select) !CodeGen.Operand { @@ -24907,7 +28749,8 @@ const Select = struct { else => |mcv| .{ .mem = try mcv.mem(s.cg, .{ .size = op.base.size }) }, .register => |reg| .{ .reg = registerAlias(reg, @intCast(@divExact(op.base.size.bitSize(s.cg.target), 8))) }, }, - .simm => .{ .imm = .s(op.adjustedImm(s)) }, + .simm => .{ .imm = .s(op.adjustedImm(i32, s)) }, + .uimm => .{ .imm = .u(@bitCast(op.adjustedImm(i64, s))) }, .lea => .{ .mem = .{ .base = .{ .reg = registerAlias(op.base.ref.deref(s).tracking(s.cg).short.register, @divExact(s.cg.target.ptrBitWidth(), 8)) }, .mod = .{ .rm = .{ @@ -24917,7 +28760,7 @@ const Select = struct { .none => .none, }, .scale = op.index.scale, - .disp = op.adjustedImm(s), + .disp = op.adjustedImm(i32, s), } }, } }, .mem => .{ .mem = try op.base.ref.deref(s).tracking(s.cg).short.mem(s.cg, .{ @@ -24927,7 +28770,7 @@ const Select = struct { .none => .none, }, .scale = op.index.scale, - .disp = op.adjustedImm(s), + .disp = op.adjustedImm(i32, s), }) }, }; } @@ -24942,14 +28785,23 @@ fn select( ) !void { cases: for (cases) |case| { for (case.required_features) |required_feature| if (required_feature) |feature| if (!switch (feature) { - .@"64bit" => cg.target.ptrBitWidth() == 64, + .@"64bit" => switch (cg.target.cpu.arch) { + else => unreachable, + .x86 => false, + .x86_64 => true, + }, .mmx => false, else => cg.hasFeature(feature), }) continue :cases; for (case.dst_constraints[0..dst_temps.len], dst_tys) |dst_constraint, dst_ty| if (!dst_constraint.accepts(dst_ty, cg)) continue :cases; for (case.src_constraints[0..src_temps.len], src_temps) |src_constraint, src_temp| if (!src_constraint.accepts(src_temp.typeOf(cg), cg)) continue :cases; + if (std.debug.runtime_safety) { + for (case.dst_constraints[dst_temps.len..]) |dst_constraint| assert(dst_constraint == .any); + for (case.src_constraints[src_temps.len..]) |src_constraint| assert(src_constraint == .any); + } patterns: for (case.patterns) |pattern| { - for (pattern.src, src_temps) |src_pattern, src_temp| if (!src_pattern.matches(src_temp, cg)) continue :patterns; + for (pattern.src[0..src_temps.len], src_temps) |src_pattern, src_temp| if (!src_pattern.matches(src_temp, cg)) continue :patterns; + if (std.debug.runtime_safety) for (pattern.src[src_temps.len..]) |src_pattern| assert(src_pattern == .none); var s: Select = .{ .cg = cg, @@ -24960,9 +28812,11 @@ fn select( const dst_slots = s.temps[@intFromEnum(Select.Operand.Ref.dst0)..@intFromEnum(Select.Operand.Ref.src0)]; const src_slots = s.temps[@intFromEnum(Select.Operand.Ref.src0)..@intFromEnum(Select.Operand.Ref.none)]; + @memcpy(src_slots[0..src_temps.len], src_temps); + std.mem.swap(Temp, &src_slots[pattern.commute[0]], &src_slots[pattern.commute[1]]); for (tmp_slots, case.extra_temps) |*slot, spec| slot.* = try spec.create(&s) orelse continue; - while (true) for (pattern.src, src_temps) |src_pattern, *src_temp| { + while (true) for (pattern.src[0..src_temps.len], src_temps) |src_pattern, *src_temp| { if (try src_pattern.convert(src_temp, cg)) break; } else break; @memcpy(src_slots[0..src_temps.len], src_temps); diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 251cf7d7cd..142fe4745b 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -64,7 +64,7 @@ pub fn findByMnemonic( comptime var feature_it = std.mem.splitScalar(u8, @tagName(tag), ' '); comptime var features: []const std.Target.x86.Feature = &.{}; inline while (comptime feature_it.next()) |feature| features = features ++ .{@field(std.Target.x86.Feature, feature)}; - break :has_features std.Target.x86.featureSetHasAll(target.cpu.features, features[0..features.len].*); + break :has_features std.Target.x86.featureSetHasAll(target.cpu.features, features[0..].*); }, }) continue; @@ -250,7 +250,8 @@ pub const Mnemonic = enum { // General-purpose adc, add, @"and", bsf, bsr, bswap, bt, btc, btr, bts, - call, cbw, cdq, cdqe, clflush, + call, cbw, cdq, cdqe, + clac, clc, cld, clflush, cli, clts, clui, cmova, cmovae, cmovb, cmovbe, cmovc, cmove, cmovg, cmovge, cmovl, cmovle, cmovna, cmovnae, cmovnb, cmovnbe, cmovnc, cmovne, cmovng, cmovnge, cmovnl, cmovnle, cmovno, cmovnp, cmovns, cmovnz, cmovo, cmovp, cmovpe, cmovpo, cmovs, cmovz, @@ -274,7 +275,9 @@ pub const Mnemonic = enum { rcl, rcr, ret, rol, ror, rorx, sal, sar, sarx, sbb, scas, scasb, scasd, scasq, scasw, - shl, shld, shlx, shr, shrd, shrx, sub, syscall, + shl, shld, shlx, shr, shrd, shrx, + stac, stc, std, sti, stui, + sub, syscall, seta, setae, setb, setbe, setc, sete, setg, setge, setl, setle, setna, setnae, setnb, setnbe, setnc, setne, setng, setnge, setnl, setnle, setno, setnp, setns, setnz, seto, setp, setpe, setpo, sets, setz, @@ -307,7 +310,7 @@ pub const Mnemonic = enum { ldmxcsr, maxps, maxss, minps, minss, - movaps, movhlps, movlhps, + movaps, movhlps, movhps, movlhps, movlps, movmskps, movss, movups, mulps, mulss, @@ -333,6 +336,7 @@ pub const Mnemonic = enum { minpd, minsd, movapd, movdqa, movdqu, + movhpd, movlpd, movmskpd, //movsd, movupd, @@ -395,7 +399,7 @@ pub const Mnemonic = enum { vmovd, vmovddup, vmovdqa, vmovdqu, - vmovhlps, vmovlhps, + vmovhlps, vmovhpd, vmovhps, vmovlhps, vmovlpd, vmovlps, vmovmskpd, vmovmskps, vmovq, vmovsd, @@ -823,6 +827,7 @@ pub const Feature = enum { avx2, bmi, bmi2, + cmov, f16c, fma, lzcnt, @@ -830,6 +835,7 @@ pub const Feature = enum { pclmul, @"pclmul avx", popcnt, + smap, sse, sse2, sse3, @@ -837,6 +843,7 @@ pub const Feature = enum { sse4_2, ssse3, sha, + uintr, vaes, vpclmulqdq, x87, diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index e025f4ddbd..bfe699a825 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -418,8 +418,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) // Here, we currently assume local dynamic TLS vars, and so // we emit LD model. _ = lower.reloc(.{ .linker_tlsld = sym_index }, 0); - lower.result_insts[lower.result_insts_len] = - try Instruction.new(.none, .lea, &[_]Operand{ + lower.result_insts[lower.result_insts_len] = try .new(.none, .lea, &.{ .{ .reg = .rdi }, .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) }, }, lower.target); @@ -427,8 +426,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) _ = lower.reloc(.{ .linker_extern_fn = try elf_file.getGlobalSymbol("__tls_get_addr", null), }, 0); - lower.result_insts[lower.result_insts_len] = - try Instruction.new(.none, .call, &[_]Operand{ + lower.result_insts[lower.result_insts_len] = try .new(.none, .call, &.{ .{ .imm = .s(0) }, }, lower.target); lower.result_insts_len += 1; @@ -440,8 +438,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) }) }; } else { // Since we are linking statically, we emit LE model directly. - lower.result_insts[lower.result_insts_len] = - try Instruction.new(.none, .mov, &[_]Operand{ + lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{ .{ .reg = .rax }, .{ .mem = Memory.initSib(.qword, .{ .base = .{ .reg = .fs } }) }, }, lower.target); @@ -464,8 +461,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) .mov => { if (elf_sym.flags.is_extern_ptr) { const reg = ops[0].reg; - lower.result_insts[lower.result_insts_len] = - try Instruction.new(.none, .mov, &[_]Operand{ + lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{ .{ .reg = reg.to64() }, .{ .mem = Memory.initRip(.qword, 0) }, }, lower.target); @@ -496,16 +492,14 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) if (macho_sym.flags.tlv) { _ = lower.reloc(.{ .linker_reloc = sym_index }, 0); - lower.result_insts[lower.result_insts_len] = - try Instruction.new(.none, .mov, &[_]Operand{ + lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{ .{ .reg = .rdi }, .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) }, - }); + }, lower.target); lower.result_insts_len += 1; - lower.result_insts[lower.result_insts_len] = - try Instruction.new(.none, .call, &[_]Operand{ + lower.result_insts[lower.result_insts_len] = try .new(.none, .call, &.{ .{ .mem = Memory.initSib(.qword, .{ .base = .{ .reg = .rdi } }) }, - }); + }, lower.target); lower.result_insts_len += 1; emit_mnemonic = .mov; break :op .{ .reg = .rax }; @@ -520,11 +514,10 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) .mov => { if (macho_sym.flags.is_extern_ptr) { const reg = ops[0].reg; - lower.result_insts[lower.result_insts_len] = - try Instruction.new(.none, .mov, &[_]Operand{ + lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{ .{ .reg = reg.to64() }, .{ .mem = Memory.initRip(.qword, 0) }, - }); + }, lower.target); lower.result_insts_len += 1; break :op .{ .mem = Memory.initSib(mem_op.sib.ptr_size, .{ .base = .{ .reg = reg.to64(), @@ -541,8 +534,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) }, }; } - lower.result_insts[lower.result_insts_len] = - try Instruction.new(emit_prefix, emit_mnemonic, emit_ops, lower.target); + lower.result_insts[lower.result_insts_len] = try .new(emit_prefix, emit_mnemonic, emit_ops, lower.target); lower.result_insts_len += 1; } diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 19bacdcac6..c5f29d3a0c 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -38,6 +38,11 @@ pub const Inst = struct { /// ___ Right Without Affecting Flags _rx, + /// ___ Forward + _f, + /// ___ Reverse + //_r, + /// ___ Above _a, /// ___ Above Or Equal @@ -47,6 +52,7 @@ pub const Inst = struct { /// ___ Below Or Equal _be, /// ___ Carry + /// ___ Carry Flag _c, /// ___ Equal _e, @@ -98,6 +104,14 @@ pub const Inst = struct { _s, /// ___ Zero _z, + /// ___ Alignment Check Flag + _ac, + /// ___ Direction Flag + //_d, + /// ___ Interrupt Flag + _i, + /// ___ User Interrupt Flag + _ui, /// ___ Byte //_b, @@ -299,9 +313,8 @@ pub const Inst = struct { /// Bitwise logical and of packed double-precision floating-point values @"and", /// Bit scan forward - bsf, /// Bit scan reverse - bsr, + bs, /// Byte swap bswap, /// Bit test @@ -317,6 +330,10 @@ pub const Inst = struct { cdq, /// Convert doubleword to quadword cdqe, + /// Clear carry flag + /// Clear direction flag + /// Clear interrupt flag + cl, /// Flush cache line clflush, /// Conditional move @@ -443,6 +460,11 @@ pub const Inst = struct { /// Subtract packed double-precision floating-point values /// Subtract scalar double-precision floating-point values sub, + /// Set carry flag + /// Set direction flag + /// Set interrupt flag + /// Store floating-point value + st, /// Store string sto, /// Syscall @@ -478,8 +500,6 @@ pub const Inst = struct { ldenv, /// Store x87 FPU environment nstenv, - /// Store floating-point value - st, /// Store x87 FPU environment stenv, @@ -560,8 +580,14 @@ pub const Inst = struct { /// Move aligned packed single-precision floating-point values /// Move aligned packed double-precision floating-point values mova, + /// Move high packed single-precision floating-point values + /// Move high packed double-precision floating-point values + movh, /// Move packed single-precision floating-point values high to low movhl, + /// Move low packed single-precision floating-point values + /// Move low packed double-precision floating-point values + movl, /// Move packed single-precision floating-point values low to high movlh, /// Move unaligned packed single-precision floating-point values diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig index 6d1ab76c5a..500dc488e6 100644 --- a/src/arch/x86_64/bits.zig +++ b/src/arch/x86_64/bits.zig @@ -571,11 +571,15 @@ pub const Memory = struct { writer: anytype, ) @TypeOf(writer).Error!void { if (s == .none) return; - if (s != .ptr) { - try writer.writeAll(@tagName(s)); - try writer.writeByte(' '); + try writer.writeAll(@tagName(s)); + switch (s) { + .none => unreachable, + .ptr => {}, + else => { + try writer.writeByte(' '); + try writer.writeAll("ptr"); + }, } - try writer.writeAll("ptr"); } }; diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index a3a82cf4e2..f6f86cd828 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -132,98 +132,110 @@ pub const table = [_]Entry{ .{ .cdq, .zo, &.{ .o32 }, &.{ 0x99 }, 0, .none, .none }, .{ .cqo, .zo, &.{ .o64 }, &.{ 0x99 }, 0, .long, .none }, + .{ .clac, .zo, &.{}, &.{ 0x0f, 0x01, 0xca }, 0, .none, .smap }, + + .{ .clc, .zo, &.{}, &.{ 0xf8 }, 0, .none, .none }, + + .{ .cld, .zo, &.{}, &.{ 0xfc }, 0, .none, .none }, + .{ .clflush, .m, &.{ .m8 }, &.{ 0x0f, 0xae }, 7, .none, .none }, - .{ .cmova, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .none }, - .{ .cmova, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .none }, - .{ .cmova, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .none }, - .{ .cmovae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .none }, - .{ .cmovae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, - .{ .cmovae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none }, - .{ .cmovb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .none }, - .{ .cmovb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, - .{ .cmovb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none }, - .{ .cmovbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .short, .none }, - .{ .cmovbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .none }, - .{ .cmovbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .none }, - .{ .cmovc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .none }, - .{ .cmovc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, - .{ .cmovc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none }, - .{ .cmove, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .short, .none }, - .{ .cmove, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .none }, - .{ .cmove, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .none }, - .{ .cmovg, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .short, .none }, - .{ .cmovg, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .none }, - .{ .cmovg, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .none }, - .{ .cmovge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .short, .none }, - .{ .cmovge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .none }, - .{ .cmovge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .none }, - .{ .cmovl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .short, .none }, - .{ .cmovl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .none }, - .{ .cmovl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .none }, - .{ .cmovle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .short, .none }, - .{ .cmovle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .none }, - .{ .cmovle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .none }, - .{ .cmovna, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .short, .none }, - .{ .cmovna, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .none }, - .{ .cmovna, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .none }, - .{ .cmovnae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .none }, - .{ .cmovnae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, - .{ .cmovnae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none }, - .{ .cmovnb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .none }, - .{ .cmovnb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, - .{ .cmovnb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none }, - .{ .cmovnbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .none }, - .{ .cmovnbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .none }, - .{ .cmovnbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .none }, - .{ .cmovnc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .none }, - .{ .cmovnc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, - .{ .cmovnc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none }, - .{ .cmovne, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .short, .none }, - .{ .cmovne, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .none }, - .{ .cmovne, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .none }, - .{ .cmovng, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .short, .none }, - .{ .cmovng, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .none }, - .{ .cmovng, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .none }, - .{ .cmovnge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .short, .none }, - .{ .cmovnge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .none }, - .{ .cmovnge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .none }, - .{ .cmovnl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .short, .none }, - .{ .cmovnl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .none }, - .{ .cmovnl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .none }, - .{ .cmovnle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .short, .none }, - .{ .cmovnle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .none }, - .{ .cmovnle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .none }, - .{ .cmovno, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x41 }, 0, .short, .none }, - .{ .cmovno, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x41 }, 0, .none, .none }, - .{ .cmovno, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x41 }, 0, .long, .none }, - .{ .cmovnp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .short, .none }, - .{ .cmovnp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .none }, - .{ .cmovnp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .none }, - .{ .cmovns, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x49 }, 0, .short, .none }, - .{ .cmovns, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x49 }, 0, .none, .none }, - .{ .cmovns, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x49 }, 0, .long, .none }, - .{ .cmovnz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .short, .none }, - .{ .cmovnz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .none }, - .{ .cmovnz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .none }, - .{ .cmovo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x40 }, 0, .short, .none }, - .{ .cmovo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x40 }, 0, .none, .none }, - .{ .cmovo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x40 }, 0, .long, .none }, - .{ .cmovp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .short, .none }, - .{ .cmovp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .none }, - .{ .cmovp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .none }, - .{ .cmovpe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .short, .none }, - .{ .cmovpe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .none }, - .{ .cmovpe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .none }, - .{ .cmovpo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .short, .none }, - .{ .cmovpo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .none }, - .{ .cmovpo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .none }, - .{ .cmovs, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x48 }, 0, .short, .none }, - .{ .cmovs, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x48 }, 0, .none, .none }, - .{ .cmovs, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x48 }, 0, .long, .none }, - .{ .cmovz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .short, .none }, - .{ .cmovz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .none }, - .{ .cmovz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .none }, + .{ .cli, .zo, &.{}, &.{ 0xfa }, 0, .none, .none }, + + .{ .clts, .zo, &.{}, &.{ 0x0f, 0x06 }, 0, .none, .none }, + + .{ .clui, .zo, &.{}, &.{ 0xf3, 0x0f, 0x01, 0xee }, 0, .none, .uintr }, + + .{ .cmova, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .cmov }, + .{ .cmova, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .cmov }, + .{ .cmova, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .cmov }, + .{ .cmovae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .cmov }, + .{ .cmovae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .cmov }, + .{ .cmovae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .cmov }, + .{ .cmovb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .cmov }, + .{ .cmovb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .cmov }, + .{ .cmovb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .cmov }, + .{ .cmovbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .short, .cmov }, + .{ .cmovbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .cmov }, + .{ .cmovbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .cmov }, + .{ .cmovc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .cmov }, + .{ .cmovc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .cmov }, + .{ .cmovc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .cmov }, + .{ .cmove, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .short, .cmov }, + .{ .cmove, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .cmov }, + .{ .cmove, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .cmov }, + .{ .cmovg, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .short, .cmov }, + .{ .cmovg, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .cmov }, + .{ .cmovg, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .cmov }, + .{ .cmovge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .short, .cmov }, + .{ .cmovge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .cmov }, + .{ .cmovge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .cmov }, + .{ .cmovl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .short, .cmov }, + .{ .cmovl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .cmov }, + .{ .cmovl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .cmov }, + .{ .cmovle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .short, .cmov }, + .{ .cmovle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .cmov }, + .{ .cmovle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .cmov }, + .{ .cmovna, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .short, .cmov }, + .{ .cmovna, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .cmov }, + .{ .cmovna, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .cmov }, + .{ .cmovnae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .cmov }, + .{ .cmovnae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .cmov }, + .{ .cmovnae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .cmov }, + .{ .cmovnb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .cmov }, + .{ .cmovnb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .cmov }, + .{ .cmovnb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .cmov }, + .{ .cmovnbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .cmov }, + .{ .cmovnbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .cmov }, + .{ .cmovnbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .cmov }, + .{ .cmovnc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .cmov }, + .{ .cmovnc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .cmov }, + .{ .cmovnc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .cmov }, + .{ .cmovne, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .short, .cmov }, + .{ .cmovne, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .cmov }, + .{ .cmovne, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .cmov }, + .{ .cmovng, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .short, .cmov }, + .{ .cmovng, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .cmov }, + .{ .cmovng, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .cmov }, + .{ .cmovnge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .short, .cmov }, + .{ .cmovnge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .cmov }, + .{ .cmovnge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .cmov }, + .{ .cmovnl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .short, .cmov }, + .{ .cmovnl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .cmov }, + .{ .cmovnl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .cmov }, + .{ .cmovnle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .short, .cmov }, + .{ .cmovnle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .cmov }, + .{ .cmovnle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .cmov }, + .{ .cmovno, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x41 }, 0, .short, .cmov }, + .{ .cmovno, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x41 }, 0, .none, .cmov }, + .{ .cmovno, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x41 }, 0, .long, .cmov }, + .{ .cmovnp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .short, .cmov }, + .{ .cmovnp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .cmov }, + .{ .cmovnp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .cmov }, + .{ .cmovns, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x49 }, 0, .short, .cmov }, + .{ .cmovns, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x49 }, 0, .none, .cmov }, + .{ .cmovns, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x49 }, 0, .long, .cmov }, + .{ .cmovnz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .short, .cmov }, + .{ .cmovnz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .cmov }, + .{ .cmovnz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .cmov }, + .{ .cmovo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x40 }, 0, .short, .cmov }, + .{ .cmovo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x40 }, 0, .none, .cmov }, + .{ .cmovo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x40 }, 0, .long, .cmov }, + .{ .cmovp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .short, .cmov }, + .{ .cmovp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .cmov }, + .{ .cmovp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .cmov }, + .{ .cmovpe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .short, .cmov }, + .{ .cmovpe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .cmov }, + .{ .cmovpe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .cmov }, + .{ .cmovpo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .short, .cmov }, + .{ .cmovpo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .cmov }, + .{ .cmovpo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .cmov }, + .{ .cmovs, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x48 }, 0, .short, .cmov }, + .{ .cmovs, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x48 }, 0, .none, .cmov }, + .{ .cmovs, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x48 }, 0, .long, .cmov }, + .{ .cmovz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .short, .cmov }, + .{ .cmovz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .cmov }, + .{ .cmovz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .cmov }, .{ .cmp, .zi, &.{ .al, .imm8 }, &.{ 0x3c }, 0, .none, .none }, .{ .cmp, .zi, &.{ .ax, .imm16 }, &.{ 0x3d }, 0, .short, .none }, @@ -747,6 +759,16 @@ pub const table = [_]Entry{ .{ .shrd, .mrc, &.{ .rm32, .r32, .cl }, &.{ 0x0f, 0xad }, 0, .none, .none }, .{ .shrd, .mrc, &.{ .rm64, .r64, .cl }, &.{ 0x0f, 0xad }, 0, .long, .none }, + .{ .stac, .zo, &.{}, &.{ 0x0f, 0x01, 0xcb }, 0, .none, .smap }, + + .{ .stc, .zo, &.{}, &.{ 0xf9 }, 0, .none, .none }, + + .{ .std, .zo, &.{}, &.{ 0xfd }, 0, .none, .none }, + + .{ .sti, .zo, &.{}, &.{ 0xfb }, 0, .none, .none }, + + .{ .stui, .zo, &.{}, &.{ 0xf3, 0x0f, 0x01, 0xef }, 0, .none, .uintr }, + .{ .stos, .zo, &.{ .m8 }, &.{ 0xaa }, 0, .none, .none }, .{ .stos, .zo, &.{ .m16 }, &.{ 0xab }, 0, .short, .none }, .{ .stos, .zo, &.{ .m32 }, &.{ 0xab }, 0, .none, .none }, @@ -927,8 +949,14 @@ pub const table = [_]Entry{ .{ .movhlps, .rm, &.{ .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .none, .sse }, + .{ .movhps, .rm, &.{ .xmm, .m64 }, &.{ 0x0f, 0x16 }, 0, .none, .sse }, + .{ .movhps, .mr, &.{ .m64, .xmm }, &.{ 0x0f, 0x17 }, 0, .none, .sse }, + .{ .movlhps, .rm, &.{ .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .none, .sse }, + .{ .movlps, .rm, &.{ .xmm, .m64 }, &.{ 0x0f, 0x12 }, 0, .none, .sse }, + .{ .movlps, .mr, &.{ .m64, .xmm }, &.{ 0x0f, 0x13 }, 0, .none, .sse }, + .{ .movmskps, .rm, &.{ .r32, .xmm }, &.{ 0x0f, 0x50 }, 0, .none, .sse }, .{ .movmskps, .rm, &.{ .r64, .xmm }, &.{ 0x0f, 0x50 }, 0, .none, .sse }, @@ -1037,6 +1065,12 @@ pub const table = [_]Entry{ .{ .movdqu, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .none, .sse2 }, .{ .movdqu, .mr, &.{ .xmm_m128, .xmm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .none, .sse2 }, + .{ .movhpd, .rm, &.{ .xmm, .m64 }, &.{ 0x66, 0x0f, 0x16 }, 0, .none, .sse2 }, + .{ .movhpd, .mr, &.{ .m64, .xmm }, &.{ 0x66, 0x0f, 0x17 }, 0, .none, .sse2 }, + + .{ .movlpd, .rm, &.{ .xmm, .m64 }, &.{ 0x66, 0x0f, 0x12 }, 0, .none, .sse2 }, + .{ .movlpd, .mr, &.{ .m64, .xmm }, &.{ 0x66, 0x0f, 0x13 }, 0, .none, .sse2 }, + .{ .movmskpd, .rm, &.{ .r32, .xmm }, &.{ 0x66, 0x0f, 0x50 }, 0, .none, .sse2 }, .{ .movmskpd, .rm, &.{ .r64, .xmm }, &.{ 0x66, 0x0f, 0x50 }, 0, .none, .sse2 }, @@ -1486,8 +1520,20 @@ pub const table = [_]Entry{ .{ .vmovhlps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, + .{ .vmovhpd, .rvm, &.{ .xmm, .xmm, .m64 }, &.{ 0x66, 0x0f, 0x16 }, 0, .vex_128_wig, .avx }, + .{ .vmovhpd, .mr, &.{ .m64, .xmm }, &.{ 0x66, 0x0f, 0x17 }, 0, .vex_128_wig, .avx }, + + .{ .vmovhps, .rvm, &.{ .xmm, .xmm, .m64 }, &.{ 0x0f, 0x16 }, 0, .vex_128_wig, .avx }, + .{ .vmovhps, .mr, &.{ .m64, .xmm }, &.{ 0x0f, 0x17 }, 0, .vex_128_wig, .avx }, + .{ .vmovlhps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .vex_128_wig, .avx }, + .{ .vmovlpd, .rvm, &.{ .xmm, .xmm, .m64 }, &.{ 0x66, 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, + .{ .vmovlpd, .mr, &.{ .m64, .xmm }, &.{ 0x66, 0x0f, 0x13 }, 0, .vex_128_wig, .avx }, + + .{ .vmovlps, .rvm, &.{ .xmm, .xmm, .m64 }, &.{ 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, + .{ .vmovlps, .mr, &.{ .m64, .xmm }, &.{ 0x0f, 0x13 }, 0, .vex_128_wig, .avx }, + .{ .vmovq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .vex_128_wig, .avx }, .{ .vmovq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .vex_128_wig, .avx }, @@ -1583,14 +1629,14 @@ pub const table = [_]Entry{ .{ .vpextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w0, .avx }, .{ .vpextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w1, .avx }, - .{ .vpextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128_wig, .avx }, - .{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_wig, .avx }, + .{ .vpextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .vex_128_w0, .avx }, + .{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_w0, .avx }, - .{ .vpinsrb, .rmi, &.{ .xmm, .r32_m8, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x20 }, 0, .vex_128_w0, .avx }, - .{ .vpinsrd, .rmi, &.{ .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w0, .avx }, - .{ .vpinsrq, .rmi, &.{ .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w1, .avx }, + .{ .vpinsrb, .rvmi, &.{ .xmm, .xmm, .r32_m8, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x20 }, 0, .vex_128_w0, .avx }, + .{ .vpinsrd, .rvmi, &.{ .xmm, .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w0, .avx }, + .{ .vpinsrq, .rvmi, &.{ .xmm, .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w1, .avx }, - .{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx }, + .{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_w0, .avx }, .{ .vpmaxsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_128_wig, .avx }, .{ .vpmaxsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xee }, 0, .vex_128_wig, .avx }, diff --git a/src/dev.zig b/src/dev.zig index 2573e63f25..f4be5a36a9 100644 --- a/src/dev.zig +++ b/src/dev.zig @@ -135,6 +135,7 @@ pub const Env = enum { else => Env.ast_gen.supports(feature), }, .@"x86_64-linux" => switch (feature) { + .build_command, .stdio_listen, .incremental, .x86_64_backend, diff --git a/src/link/Elf/Atom.zig b/src/link/Elf/Atom.zig index 68cb154d3b..10e6f669d3 100644 --- a/src/link/Elf/Atom.zig +++ b/src/link/Elf/Atom.zig @@ -1274,19 +1274,19 @@ const x86_64 = struct { fn relaxGotpcrelx(code: []u8, t: *const std.Target) !void { dev.check(.x86_64_backend); const old_inst = disassemble(code) orelse return error.RelaxFailure; - const inst = switch (old_inst.encoding.mnemonic) { - .call => try Instruction.new(old_inst.prefix, .call, &.{ + const inst: Instruction = switch (old_inst.encoding.mnemonic) { + .call => try .new(old_inst.prefix, .call, &.{ // TODO: hack to force imm32s in the assembler - .{ .imm = Immediate.s(-129) }, + .{ .imm = .s(-129) }, }, t), - .jmp => try Instruction.new(old_inst.prefix, .jmp, &.{ + .jmp => try .new(old_inst.prefix, .jmp, &.{ // TODO: hack to force imm32s in the assembler - .{ .imm = Immediate.s(-129) }, + .{ .imm = .s(-129) }, }, t), else => return error.RelaxFailure, }; relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); - const nop = try Instruction.new(.none, .nop, &.{}, t); + const nop: Instruction = try .new(.none, .nop, &.{}, t); try encode(&.{ nop, inst }, code); } @@ -1295,7 +1295,7 @@ const x86_64 = struct { const old_inst = disassemble(code) orelse return error.RelaxFailure; switch (old_inst.encoding.mnemonic) { .mov => { - const inst = try Instruction.new(old_inst.prefix, .lea, &old_inst.ops, t); + const inst: Instruction = try .new(old_inst.prefix, .lea, &old_inst.ops, t); relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); try encode(&.{inst}, code); }, @@ -1404,14 +1404,15 @@ const x86_64 = struct { dev.check(.x86_64_backend); const old_inst = disassemble(code) orelse return false; switch (old_inst.encoding.mnemonic) { - .mov => if (Instruction.new(old_inst.prefix, .mov, &.{ - old_inst.ops[0], - // TODO: hack to force imm32s in the assembler - .{ .imm = Immediate.s(-129) }, - }, t)) |inst| { + .mov => { + const inst = Instruction.new(old_inst.prefix, .mov, &.{ + old_inst.ops[0], + // TODO: hack to force imm32s in the assembler + .{ .imm = .s(-129) }, + }, t) catch return false; inst.encode(std.io.null_writer, .{}) catch return false; return true; - } else |_| return false, + }, else => return false, } } @@ -1424,7 +1425,7 @@ const x86_64 = struct { const inst = Instruction.new(old_inst.prefix, .mov, &.{ old_inst.ops[0], // TODO: hack to force imm32s in the assembler - .{ .imm = Immediate.s(-129) }, + .{ .imm = .s(-129) }, }, t) catch unreachable; relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); encode(&.{inst}, code) catch unreachable; @@ -1438,10 +1439,10 @@ const x86_64 = struct { const old_inst = disassemble(code) orelse return error.RelaxFailure; switch (old_inst.encoding.mnemonic) { .lea => { - const inst = try Instruction.new(old_inst.prefix, .mov, &.{ + const inst: Instruction = try .new(old_inst.prefix, .mov, &.{ old_inst.ops[0], // TODO: hack to force imm32s in the assembler - .{ .imm = Immediate.s(-129) }, + .{ .imm = .s(-129) }, }, target); relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); try encode(&.{inst}, code); @@ -1781,7 +1782,7 @@ const aarch64 = struct { const off: u12 = @truncate(@as(u64, @bitCast(S_ + A))); aarch64_util.writeAddImmInst(off, code); } else { - const old_inst = Instruction{ + const old_inst: Instruction = .{ .add_subtract_immediate = mem.bytesToValue(std.meta.TagPayload( Instruction, Instruction.add_subtract_immediate, @@ -1795,7 +1796,7 @@ const aarch64 = struct { }, .TLSDESC_CALL => if (!target.flags.has_tlsdesc) { - const old_inst = Instruction{ + const old_inst: Instruction = .{ .unconditional_branch_register = mem.bytesToValue(std.meta.TagPayload( Instruction, Instruction.unconditional_branch_register, diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index ed554ffb35..4270ff0306 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -640,7 +640,8 @@ fn resolveRelocInner( macho_file: *MachO, writer: anytype, ) ResolveError!void { - const cpu_arch = macho_file.getTarget().cpu.arch; + const t = &macho_file.base.comp.root_mod.resolved_target.result; + const cpu_arch = t.cpu.arch; const rel_offset = math.cast(usize, rel.offset - self.off) orelse return error.Overflow; const P = @as(i64, @intCast(self.getAddress(macho_file))) + @as(i64, @intCast(rel_offset)); const A = rel.addend + rel.getRelocAddend(cpu_arch); @@ -747,7 +748,7 @@ fn resolveRelocInner( const S_: i64 = @intCast(sym.getTlvPtrAddress(macho_file)); try writer.writeInt(i32, @intCast(S_ + A - P), .little); } else { - try x86_64.relaxTlv(code[rel_offset - 3 ..]); + try x86_64.relaxTlv(code[rel_offset - 3 ..], t); try writer.writeInt(i32, @intCast(S + A - P), .little); } }, @@ -893,11 +894,12 @@ fn resolveRelocInner( const x86_64 = struct { fn relaxGotLoad(self: Atom, code: []u8, rel: Relocation, macho_file: *MachO) ResolveError!void { dev.check(.x86_64_backend); + const t = &macho_file.base.comp.root_mod.resolved_target.result; const diags = &macho_file.base.comp.link_diags; const old_inst = disassemble(code) orelse return error.RelaxFail; switch (old_inst.encoding.mnemonic) { .mov => { - const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops) catch return error.RelaxFail; + const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops, t) catch return error.RelaxFail; relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); encode(&.{inst}, code) catch return error.RelaxFail; }, @@ -916,12 +918,12 @@ const x86_64 = struct { } } - fn relaxTlv(code: []u8) error{RelaxFail}!void { + fn relaxTlv(code: []u8, t: *const std.Target) error{RelaxFail}!void { dev.check(.x86_64_backend); const old_inst = disassemble(code) orelse return error.RelaxFail; switch (old_inst.encoding.mnemonic) { .mov => { - const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops) catch return error.RelaxFail; + const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops, t) catch return error.RelaxFail; relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); encode(&.{inst}, code) catch return error.RelaxFail; }, diff --git a/test/behavior/math.zig b/test/behavior/math.zig index 21f09a877f..789eeaef66 100644 --- a/test/behavior/math.zig +++ b/test/behavior/math.zig @@ -65,6 +65,8 @@ test "@clz" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; try testClz(); try comptime testClz(); @@ -75,6 +77,7 @@ fn testClz() !void { try expect(testOneClz(u8, 0b00001010) == 4); try expect(testOneClz(u8, 0b00011010) == 3); try expect(testOneClz(u8, 0b00000000) == 8); + try expect(testOneClz(i8, -1) == 0); } test "@clz big ints" { @@ -100,7 +103,7 @@ fn testOneClz(comptime T: type, x: T) u32 { test "@clz vectors" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -159,6 +162,8 @@ fn testCtz() !void { try expect(testOneCtz(u8, 0b10100000) == 5); try expect(testOneCtz(u8, 0b10001010) == 1); try expect(testOneCtz(u8, 0b00000000) == 8); + try expect(testOneCtz(i8, -1) == 0); + try expect(testOneCtz(i8, -2) == 1); try expect(testOneCtz(u16, 0b00000000) == 16); } @@ -1712,7 +1717,7 @@ test "mod lazy values" { test "@clz works on both vector and scalar inputs" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index 3180d1554a..6b03ac90e3 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -646,7 +646,7 @@ test "vector division operators" { test "vector bitwise not operator" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO diff --git a/test/behavior/x86_64.zig b/test/behavior/x86_64.zig index ffb1750ff6..f72fa79ca5 100644 --- a/test/behavior/x86_64.zig +++ b/test/behavior/x86_64.zig @@ -1,8 +1,7 @@ //! CodeGen tests for the x86_64 backend. -const builtin = @import("builtin"); - test { + const builtin = @import("builtin"); if (builtin.zig_backend != .stage2_x86_64) return error.SkipZigTest; if (builtin.object_format == .coff) return error.SkipZigTest; _ = @import("x86_64/math.zig"); diff --git a/test/behavior/x86_64/build.zig b/test/behavior/x86_64/build.zig new file mode 100644 index 0000000000..dccda7236b --- /dev/null +++ b/test/behavior/x86_64/build.zig @@ -0,0 +1,114 @@ +const std = @import("std"); +pub fn build(b: *std.Build) void { + const compiler_rt_lib = b.addStaticLibrary(.{ + .name = "compiler_rt", + .use_llvm = false, + .use_lld = false, + .root_module = b.createModule(.{ + .root_source_file = b.addWriteFiles().add("compiler_rt.zig", ""), + .target = b.resolveTargetQuery(.{ .cpu_arch = .x86_64 }), + }), + }); + compiler_rt_lib.bundle_compiler_rt = true; + + for ([_]std.Target.Query{ + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 }, + .cpu_features_add = std.Target.x86.featureSet(&.{.bsf_bsr_0_clobbers_result}), + //.cpu_features_sub = std.Target.x86.featureSet(&.{.sse}), + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 }, + .cpu_features_add = std.Target.x86.featureSet(&.{.bsf_bsr_0_clobbers_result}), + .cpu_features_sub = std.Target.x86.featureSet(&.{ + .cmov, + //.sse, + }), + }, + //.{ + // .cpu_arch = .x86_64, + // .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 }, + // .cpu_features_sub = std.Target.x86.featureSet(&.{.sse}), + //}, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 }, + .cpu_features_sub = std.Target.x86.featureSet(&.{.sse2}), + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 }, + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 }, + .cpu_features_add = std.Target.x86.featureSet(&.{.sse3}), + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 }, + .cpu_features_add = std.Target.x86.featureSet(&.{.ssse3}), + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 }, + .cpu_features_add = std.Target.x86.featureSet(&.{.sse4_1}), + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64 }, + .cpu_features_add = std.Target.x86.featureSet(&.{.sse4_2}), + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v2 }, + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v2 }, + .cpu_features_add = std.Target.x86.featureSet(&.{.avx}), + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 }, + .cpu_features_sub = std.Target.x86.featureSet(&.{.avx2}), + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 }, + }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v4 }, + }, + }) |query| { + const target = b.resolveTargetQuery(query); + const cpu = query.serializeCpuAlloc(b.allocator) catch @panic("OOM"); + for ([_][]const u8{ + "math.zig", + }) |path| { + const test_mod = b.createModule(.{ + .root_source_file = b.path(path), + .target = target, + }); + const test_exe = b.addTest(.{ + .name = std.fs.path.stem(path), + .use_llvm = false, + .use_lld = false, + .root_module = test_mod, + }); + if (!std.Target.x86.featureSetHas(target.result.cpu.features, .sse2)) { + test_exe.bundle_compiler_rt = false; + test_mod.linkLibrary(compiler_rt_lib); + } + const test_run = b.addRunArtifact(test_exe); + b.default_step.dependOn(&test_run.step); + for ([_]*std.Build.Step{ + &test_exe.step, + &test_run.step, + }) |step| step.name = b.fmt("{s} {s}", .{ step.name, cpu }); + } + } +} diff --git a/test/behavior/x86_64/math.zig b/test/behavior/x86_64/math.zig index 7860c945a0..5bb257a5f4 100644 --- a/test/behavior/x86_64/math.zig +++ b/test/behavior/x86_64/math.zig @@ -1,3 +1,709 @@ +fn testUnary(comptime op: anytype) !void { + const testType = struct { + fn testType(comptime Type: type, comptime imm_arg: Type) !void { + const expected = op(Type, imm_arg); + try struct { + fn testOne(actual: @TypeOf(expected)) !void { + if (switch (@typeInfo(@TypeOf(expected))) { + else => actual != expected, + .vector => @reduce(.Or, actual != expected), + }) return error.Unexpected; + } + noinline fn testOps(mem_arg: Type) !void { + var reg_arg = mem_arg; + _ = .{®_arg}; + try testOne(op(Type, reg_arg)); + try testOne(op(Type, mem_arg)); + try testOne(op(Type, imm_arg)); + } + }.testOps(imm_arg); + } + }.testType; + + try testType(i0, 0); + try testType(u0, 0); + + try testType(i1, -1); + try testType(i1, 0); + try testType(u1, 0); + try testType(u1, 1 << 0); + + try testType(i2, -1 << 1); + try testType(i2, -1); + try testType(i2, 0); + try testType(u2, 0); + try testType(u2, 1 << 0); + try testType(u2, 1 << 1); + + try testType(i3, -1 << 2); + try testType(i3, -1); + try testType(i3, 0); + try testType(u3, 0); + try testType(u3, 1 << 0); + try testType(u3, 1 << 1); + try testType(u3, 1 << 2); + + try testType(i4, -1 << 3); + try testType(i4, -1); + try testType(i4, 0); + try testType(u4, 0); + try testType(u4, 1 << 0); + try testType(u4, 1 << 1); + try testType(u4, 1 << 2); + try testType(u4, 1 << 3); + + try testType(i5, -1 << 4); + try testType(i5, -1); + try testType(i5, 0); + try testType(u5, 0); + try testType(u5, 1 << 0); + try testType(u5, 1 << 1); + try testType(u5, 1 << 3); + try testType(u5, 1 << 4); + + try testType(i7, -1 << 6); + try testType(i7, -1); + try testType(i7, 0); + try testType(u7, 0); + try testType(u7, 1 << 0); + try testType(u7, 1 << 1); + try testType(u7, 1 << 5); + try testType(u7, 1 << 6); + + try testType(i8, -1 << 7); + try testType(i8, -1); + try testType(i8, 0); + try testType(u8, 0); + try testType(u8, 1 << 0); + try testType(u8, 1 << 1); + try testType(u8, 1 << 6); + try testType(u8, 1 << 7); + + try testType(i9, -1 << 8); + try testType(i9, -1); + try testType(i9, 0); + try testType(u9, 0); + try testType(u9, 1 << 0); + try testType(u9, 1 << 1); + try testType(u9, 1 << 7); + try testType(u9, 1 << 8); + + try testType(i15, -1 << 14); + try testType(i15, -1); + try testType(i15, 0); + try testType(u15, 0); + try testType(u15, 1 << 0); + try testType(u15, 1 << 1); + try testType(u15, 1 << 13); + try testType(u15, 1 << 14); + + try testType(i16, -1 << 15); + try testType(i16, -1); + try testType(i16, 0); + try testType(u16, 0); + try testType(u16, 1 << 0); + try testType(u16, 1 << 1); + try testType(u16, 1 << 14); + try testType(u16, 1 << 15); + + try testType(i17, -1 << 16); + try testType(i17, -1); + try testType(i17, 0); + try testType(u17, 0); + try testType(u17, 1 << 0); + try testType(u17, 1 << 1); + try testType(u17, 1 << 15); + try testType(u17, 1 << 16); + + try testType(i31, -1 << 30); + try testType(i31, -1); + try testType(i31, 0); + try testType(u31, 0); + try testType(u31, 1 << 0); + try testType(u31, 1 << 1); + try testType(u31, 1 << 29); + try testType(u31, 1 << 30); + + try testType(i32, -1 << 31); + try testType(i32, -1); + try testType(i32, 0); + try testType(u32, 0); + try testType(u32, 1 << 0); + try testType(u32, 1 << 1); + try testType(u32, 1 << 30); + try testType(u32, 1 << 31); + + try testType(i33, -1 << 32); + try testType(i33, -1); + try testType(i33, 0); + try testType(u33, 0); + try testType(u33, 1 << 0); + try testType(u33, 1 << 1); + try testType(u33, 1 << 31); + try testType(u33, 1 << 32); + + try testType(i63, -1 << 62); + try testType(i63, -1); + try testType(i63, 0); + try testType(u63, 0); + try testType(u63, 1 << 0); + try testType(u63, 1 << 1); + try testType(u63, 1 << 61); + try testType(u63, 1 << 62); + + try testType(i64, -1 << 63); + try testType(i64, -1); + try testType(i64, 0); + try testType(u64, 0); + try testType(u64, 1 << 0); + try testType(u64, 1 << 1); + try testType(u64, 1 << 62); + try testType(u64, 1 << 63); + + try testType(i65, -1 << 64); + try testType(i65, -1); + try testType(i65, 0); + try testType(u65, 0); + try testType(u65, 1 << 0); + try testType(u65, 1 << 1); + try testType(u65, 1 << 63); + try testType(u65, 1 << 64); + + try testType(i95, -1 << 94); + try testType(i95, -1); + try testType(i95, 0); + try testType(u95, 0); + try testType(u95, 1 << 0); + try testType(u95, 1 << 1); + try testType(u95, 1 << 93); + try testType(u95, 1 << 94); + + try testType(i96, -1 << 95); + try testType(i96, -1); + try testType(i96, 0); + try testType(u96, 0); + try testType(u96, 1 << 0); + try testType(u96, 1 << 1); + try testType(u96, 1 << 94); + try testType(u96, 1 << 95); + + try testType(i97, -1 << 96); + try testType(i97, -1); + try testType(i97, 0); + try testType(u97, 0); + try testType(u97, 1 << 0); + try testType(u97, 1 << 1); + try testType(u97, 1 << 95); + try testType(u97, 1 << 96); + + try testType(i127, -1 << 126); + try testType(i127, -1); + try testType(i127, 0); + try testType(u127, 0); + try testType(u127, 1 << 0); + try testType(u127, 1 << 1); + try testType(u127, 1 << 125); + try testType(u127, 1 << 126); + + try testType(i128, -1 << 127); + try testType(i128, -1); + try testType(i128, 0); + try testType(u128, 0); + try testType(u128, 1 << 0); + try testType(u128, 1 << 1); + try testType(u128, 1 << 126); + try testType(u128, 1 << 127); + + try testType(i129, -1 << 128); + try testType(i129, -1); + try testType(i129, 0); + try testType(u129, 0); + try testType(u129, 1 << 0); + try testType(u129, 1 << 1); + try testType(u129, 1 << 127); + try testType(u129, 1 << 128); + + try testType(i159, -1 << 158); + try testType(i159, -1); + try testType(i159, 0); + try testType(u159, 0); + try testType(u159, 1 << 0); + try testType(u159, 1 << 1); + try testType(u159, 1 << 157); + try testType(u159, 1 << 158); + + try testType(i160, -1 << 159); + try testType(i160, -1); + try testType(i160, 0); + try testType(u160, 0); + try testType(u160, 1 << 0); + try testType(u160, 1 << 1); + try testType(u160, 1 << 158); + try testType(u160, 1 << 159); + + try testType(i161, -1 << 160); + try testType(i161, -1); + try testType(i161, 0); + try testType(u161, 0); + try testType(u161, 1 << 0); + try testType(u161, 1 << 1); + try testType(u161, 1 << 159); + try testType(u161, 1 << 160); + + try testType(i191, -1 << 190); + try testType(i191, -1); + try testType(i191, 0); + try testType(u191, 0); + try testType(u191, 1 << 0); + try testType(u191, 1 << 1); + try testType(u191, 1 << 189); + try testType(u191, 1 << 190); + + try testType(i192, -1 << 191); + try testType(i192, -1); + try testType(i192, 0); + try testType(u192, 0); + try testType(u192, 1 << 0); + try testType(u192, 1 << 1); + try testType(u192, 1 << 190); + try testType(u192, 1 << 191); + + try testType(i193, -1 << 192); + try testType(i193, -1); + try testType(i193, 0); + try testType(u193, 0); + try testType(u193, 1 << 0); + try testType(u193, 1 << 1); + try testType(u193, 1 << 191); + try testType(u193, 1 << 192); + + try testType(i223, -1 << 222); + try testType(i223, -1); + try testType(i223, 0); + try testType(u223, 0); + try testType(u223, 1 << 0); + try testType(u223, 1 << 1); + try testType(u223, 1 << 221); + try testType(u223, 1 << 222); + + try testType(i224, -1 << 223); + try testType(i224, -1); + try testType(i224, 0); + try testType(u224, 0); + try testType(u224, 1 << 0); + try testType(u224, 1 << 1); + try testType(u224, 1 << 222); + try testType(u224, 1 << 223); + + try testType(i225, -1 << 224); + try testType(i225, -1); + try testType(i225, 0); + try testType(u225, 0); + try testType(u225, 1 << 0); + try testType(u225, 1 << 1); + try testType(u225, 1 << 223); + try testType(u225, 1 << 224); + + try testType(i255, -1 << 254); + try testType(i255, -1); + try testType(i255, 0); + try testType(u255, 0); + try testType(u255, 1 << 0); + try testType(u255, 1 << 1); + try testType(u255, 1 << 253); + try testType(u255, 1 << 254); + + try testType(i256, -1 << 255); + try testType(i256, -1); + try testType(i256, 0); + try testType(u256, 0); + try testType(u256, 1 << 0); + try testType(u256, 1 << 1); + try testType(u256, 1 << 254); + try testType(u256, 1 << 255); + + try testType(i257, -1 << 256); + try testType(i257, -1); + try testType(i257, 0); + try testType(u257, 0); + try testType(u257, 1 << 0); + try testType(u257, 1 << 1); + try testType(u257, 1 << 255); + try testType(u257, 1 << 256); + + try testType(i511, -1 << 510); + try testType(i511, -1); + try testType(i511, 0); + try testType(u511, 0); + try testType(u511, 1 << 0); + try testType(u511, 1 << 1); + try testType(u511, 1 << 509); + try testType(u511, 1 << 510); + + try testType(i512, -1 << 511); + try testType(i512, -1); + try testType(i512, 0); + try testType(u512, 0); + try testType(u512, 1 << 0); + try testType(u512, 1 << 1); + try testType(u512, 1 << 510); + try testType(u512, 1 << 511); + + try testType(i513, -1 << 512); + try testType(i513, -1); + try testType(i513, 0); + try testType(u513, 0); + try testType(u513, 1 << 0); + try testType(u513, 1 << 1); + try testType(u513, 1 << 511); + try testType(u513, 1 << 512); + + try testType(i1023, -1 << 1022); + try testType(i1023, -1); + try testType(i1023, 0); + try testType(u1023, 0); + try testType(u1023, 1 << 0); + try testType(u1023, 1 << 1); + try testType(u1023, 1 << 1021); + try testType(u1023, 1 << 1022); + + try testType(i1024, -1 << 1023); + try testType(i1024, -1); + try testType(i1024, 0); + try testType(u1024, 0); + try testType(u1024, 1 << 0); + try testType(u1024, 1 << 1); + try testType(u1024, 1 << 1022); + try testType(u1024, 1 << 1023); + + try testType(i1025, -1 << 1024); + try testType(i1025, -1); + try testType(i1025, 0); + try testType(u1025, 0); + try testType(u1025, 1 << 0); + try testType(u1025, 1 << 1); + try testType(u1025, 1 << 1023); + try testType(u1025, 1 << 1024); + + try testType(@Vector(3, i0), .{ 0 << 0, 0, 0 }); + try testType(@Vector(3, u0), .{ 0, 0, 0 << 0 }); + + try testType(@Vector(3, i1), .{ -1 << 0, -1, 0 }); + try testType(@Vector(3, u1), .{ 0, 1, 1 << 0 }); + + try testType(@Vector(3, i2), .{ -1 << 1, -1, 0 }); + try testType(@Vector(3, u2), .{ 0, 1, 1 << 1 }); + + try testType(@Vector(3, i3), .{ -1 << 2, -1, 0 }); + try testType(@Vector(3, u3), .{ 0, 1, 1 << 2 }); + + try testType(@Vector(3, i4), .{ -1 << 3, -1, 0 }); + try testType(@Vector(3, u4), .{ 0, 1, 1 << 3 }); + try testType(@Vector(1, u4), .{ + 0xb, + }); + try testType(@Vector(2, u4), .{ + 0x3, 0x4, + }); + try testType(@Vector(4, u4), .{ + 0x9, 0x2, 0xf, 0xe, + }); + try testType(@Vector(8, u4), .{ + 0x8, 0x1, 0xb, 0x1, 0xf, 0x5, 0x9, 0x6, + }); + try testType(@Vector(16, u4), .{ + 0xb, 0x6, 0x0, 0x7, 0x8, 0x5, 0x6, 0x9, 0xe, 0xb, 0x3, 0xa, 0xb, 0x5, 0x8, 0xc, + }); + try testType(@Vector(32, u4), .{ + 0xe, 0x6, 0xe, 0xa, 0xb, 0x4, 0xa, 0xb, 0x1, 0x3, 0xb, 0xc, 0x0, 0xb, 0x9, 0x4, 0xd, 0xa, 0xd, 0xd, 0x4, 0x8, 0x8, 0x6, 0xb, 0xe, 0x9, 0x6, 0xc, 0xd, 0x5, 0xd, + }); + try testType(@Vector(64, u4), .{ + 0x1, 0xc, 0xe, 0x9, 0x9, 0xf, 0x3, 0xf, 0x9, 0x9, 0x5, 0x3, 0xb, 0xd, 0xd, 0xf, 0x1, 0x2, 0xf, 0x9, 0x4, 0x4, 0x8, 0x9, 0x2, 0x9, 0x8, 0xe, 0x8, 0xa, 0x4, 0x3, + 0x4, 0xc, 0xb, 0x6, 0x4, 0x0, 0xa, 0x5, 0x1, 0xa, 0x4, 0xe, 0xa, 0x7, 0xd, 0x0, 0x4, 0xe, 0xe, 0x7, 0x7, 0xa, 0x4, 0x5, 0x6, 0xc, 0x6, 0x2, 0x6, 0xa, 0xe, 0xa, + }); + try testType(@Vector(128, u4), .{ + 0xd, 0x5, 0x6, 0xe, 0x3, 0x3, 0x3, 0xe, 0xd, 0xd, 0x9, 0x0, 0x0, 0xe, 0xa, 0x9, 0x8, 0x7, 0xb, 0x5, 0x7, 0xf, 0xb, 0x8, 0x0, 0xf, 0xb, 0x3, 0xa, 0x2, 0xb, 0xc, + 0x1, 0x1, 0xc, 0x8, 0x8, 0x6, 0x9, 0x1, 0xb, 0x0, 0x2, 0xb, 0x2, 0x2, 0x7, 0x6, 0x1, 0x1, 0xb, 0x4, 0x6, 0x4, 0x7, 0xc, 0xd, 0xc, 0xa, 0x8, 0x1, 0x7, 0x8, 0xa, + 0x9, 0xa, 0x1, 0x8, 0x1, 0x7, 0x9, 0x4, 0x5, 0x9, 0xd, 0x0, 0xa, 0xf, 0x3, 0x3, 0x9, 0x2, 0xf, 0x5, 0xb, 0x8, 0x6, 0xb, 0xf, 0x5, 0x8, 0x3, 0x9, 0xf, 0x6, 0x8, + 0xc, 0x8, 0x3, 0x4, 0xa, 0xe, 0xc, 0x1, 0xe, 0x9, 0x1, 0x8, 0xf, 0x6, 0xc, 0xc, 0x6, 0xf, 0x6, 0xd, 0xb, 0x9, 0xc, 0x3, 0xd, 0xa, 0x6, 0x8, 0x4, 0xa, 0x6, 0x9, + }); + try testType(@Vector(256, u4), .{ + 0x6, 0xc, 0xe, 0x3, 0x8, 0x2, 0xb, 0xd, 0x3, 0xa, 0x3, 0x8, 0xb, 0x8, 0x3, 0x0, 0xb, 0x5, 0x1, 0x3, 0x2, 0x2, 0xf, 0xc, 0x5, 0x1, 0x3, 0xb, 0x1, 0xc, 0x2, 0xd, + 0xa, 0x8, 0x1, 0xc, 0xb, 0xa, 0x3, 0x1, 0xe, 0x4, 0xf, 0xb, 0xd, 0x8, 0xf, 0xa, 0xc, 0xb, 0xb, 0x0, 0xa, 0xc, 0xf, 0xe, 0x8, 0xd, 0x9, 0x3, 0xa, 0xe, 0x8, 0x7, + 0x5, 0xa, 0x0, 0xe, 0x0, 0xd, 0x2, 0x2, 0x9, 0x4, 0x8, 0x9, 0x0, 0x4, 0x4, 0x8, 0xe, 0x1, 0xf, 0x1, 0x9, 0x3, 0xf, 0xc, 0xa, 0x0, 0x3, 0x2, 0x4, 0x1, 0x2, 0x3, + 0xf, 0x2, 0x7, 0xb, 0x5, 0x0, 0xd, 0x3, 0x4, 0xf, 0xa, 0x3, 0xc, 0x2, 0x5, 0xe, 0x7, 0x5, 0xd, 0x7, 0x9, 0x0, 0xd, 0x7, 0x9, 0xd, 0x5, 0x7, 0xf, 0xd, 0xb, 0x4, + 0x9, 0x6, 0xf, 0xb, 0x1, 0xb, 0x6, 0xb, 0xf, 0x7, 0xf, 0x0, 0x4, 0x7, 0x5, 0xa, 0x8, 0x1, 0xf, 0x9, 0x9, 0x0, 0x6, 0xb, 0x1, 0x2, 0x4, 0x3, 0x2, 0x0, 0x7, 0x0, + 0x6, 0x7, 0xf, 0x1, 0xe, 0xa, 0x8, 0x2, 0x9, 0xc, 0x1, 0x5, 0x7, 0x1, 0xb, 0x0, 0x1, 0x3, 0xd, 0x3, 0x0, 0x1, 0xa, 0x0, 0x3, 0x7, 0x1, 0x2, 0xb, 0xc, 0x2, 0x9, + 0x8, 0x8, 0x7, 0x0, 0xd, 0x5, 0x1, 0x5, 0x7, 0x7, 0x2, 0x3, 0x8, 0x7, 0xc, 0x8, 0xf, 0xa, 0xf, 0xf, 0x3, 0x2, 0x0, 0x4, 0x7, 0x5, 0x6, 0xd, 0x6, 0x3, 0xa, 0x4, + 0x1, 0x1, 0x2, 0xc, 0x3, 0xe, 0x2, 0xc, 0x7, 0x6, 0xe, 0xf, 0xb, 0x8, 0x6, 0x6, 0x9, 0x0, 0x4, 0xb, 0xe, 0x4, 0x2, 0x7, 0xf, 0xc, 0x0, 0x6, 0xd, 0xa, 0xe, 0xc, + }); + + try testType(@Vector(3, i5), .{ -1 << 4, -1, 0 }); + try testType(@Vector(3, u5), .{ 0, 1, 1 << 4 }); + + try testType(@Vector(3, i7), .{ -1 << 6, -1, 0 }); + try testType(@Vector(3, u7), .{ 0, 1, 1 << 6 }); + + try testType(@Vector(3, i8), .{ -1 << 7, -1, 0 }); + try testType(@Vector(3, u8), .{ 0, 1, 1 << 7 }); + try testType(@Vector(1, u8), .{ + 0x33, + }); + try testType(@Vector(2, u8), .{ + 0x66, 0x87, + }); + try testType(@Vector(4, u8), .{ + 0x9d, 0xcb, 0x30, 0x7b, + }); + try testType(@Vector(8, u8), .{ + 0x4b, 0x35, 0x3f, 0x5c, 0xa5, 0x91, 0x23, 0x6d, + }); + try testType(@Vector(16, u8), .{ + 0xb7, 0x57, 0x27, 0x29, 0x58, 0xf8, 0xc9, 0x6c, 0xbe, 0x41, 0xf4, 0xd7, 0x4d, 0x01, 0xf0, 0x37, + }); + try testType(@Vector(32, u8), .{ + 0x5f, 0x61, 0x34, 0xe8, 0x37, 0x12, 0xba, 0x5a, 0x85, 0xf3, 0x3e, 0xa2, 0x0f, 0xd0, 0x65, 0xae, + 0xed, 0xf5, 0xe8, 0x65, 0x61, 0x28, 0x4a, 0x27, 0x2e, 0x01, 0x40, 0x8c, 0xe3, 0x36, 0x5d, 0xb6, + }); + try testType(@Vector(64, u8), .{ + 0xb0, 0x19, 0x5c, 0xc2, 0x3b, 0x16, 0x70, 0xad, 0x26, 0x45, 0xf2, 0xe1, 0x4f, 0x0f, 0x01, 0x72, + 0x7f, 0x1f, 0x07, 0x9e, 0xee, 0x9b, 0xb3, 0x38, 0x50, 0xf3, 0x56, 0x73, 0xd0, 0xd1, 0xee, 0xe3, + 0xeb, 0xf3, 0x1b, 0xe0, 0x77, 0x78, 0x75, 0xc6, 0x19, 0xe4, 0x69, 0xaa, 0x73, 0x08, 0xcd, 0x0c, + 0xf9, 0xed, 0x94, 0xf8, 0x79, 0x86, 0x63, 0x31, 0xbf, 0xd1, 0xe3, 0x17, 0x2b, 0xb9, 0xa1, 0x72, + }); + try testType(@Vector(128, u8), .{ + 0x2e, 0x93, 0x87, 0x09, 0x4f, 0x68, 0x14, 0xab, 0x3f, 0x04, 0x86, 0xc1, 0x95, 0xe8, 0x74, 0x11, + 0x57, 0x25, 0xe1, 0x88, 0xc0, 0x96, 0x33, 0x99, 0x15, 0x86, 0x2c, 0x84, 0x2e, 0xd7, 0x57, 0x21, + 0xd3, 0x18, 0xd5, 0x0e, 0xb4, 0x60, 0xe2, 0x08, 0xce, 0xbc, 0xd5, 0x4d, 0x8f, 0x59, 0x01, 0x67, + 0x71, 0x0a, 0x74, 0x48, 0xef, 0x39, 0x49, 0x7e, 0xa8, 0x39, 0x34, 0x75, 0x95, 0x3b, 0x38, 0xea, + 0x60, 0xd7, 0xed, 0x8f, 0xbb, 0xc0, 0x7d, 0xc2, 0x79, 0x2d, 0xbf, 0xa5, 0x64, 0xf4, 0x09, 0x86, + 0xfb, 0x29, 0xfe, 0xc7, 0xff, 0x62, 0x1a, 0x6f, 0xf8, 0xbd, 0xfe, 0xa4, 0xac, 0x24, 0xcf, 0x56, + 0x82, 0x69, 0x81, 0x0d, 0xc1, 0x51, 0x8d, 0x85, 0xf4, 0x00, 0xe7, 0x25, 0xab, 0xa5, 0x33, 0x45, + 0x66, 0x2e, 0x33, 0xc8, 0xf3, 0x35, 0x16, 0x7d, 0x1f, 0xc9, 0xf7, 0x44, 0xab, 0x66, 0x28, 0x0d, + }); + + try testType(@Vector(3, i9), .{ -1 << 8, -1, 0 }); + try testType(@Vector(3, u9), .{ 0, 1, 1 << 8 }); + + try testType(@Vector(3, i15), .{ -1 << 14, -1, 0 }); + try testType(@Vector(3, u15), .{ 0, 1, 1 << 14 }); + + try testType(@Vector(3, i16), .{ -1 << 15, -1, 0 }); + try testType(@Vector(3, u16), .{ 0, 1, 1 << 15 }); + try testType(@Vector(1, u16), .{ + 0x4da6, + }); + try testType(@Vector(2, u16), .{ + 0x04d7, 0x50c6, + }); + try testType(@Vector(4, u16), .{ + 0x4c06, 0xd71f, 0x4d8f, 0xe0a4, + }); + try testType(@Vector(8, u16), .{ + 0xee9a, 0x881d, 0x31fb, 0xd3f7, 0x2c74, 0x6949, 0x4e04, 0x53d7, + }); + try testType(@Vector(16, u16), .{ + 0xeafe, 0x9a7b, 0x0d6f, 0x18cb, 0xaf8f, 0x8ee4, 0xa47e, 0xd39a, + 0x6572, 0x9c53, 0xf36e, 0x982e, 0x41c1, 0x8682, 0xf5dc, 0x7e01, + }); + try testType(@Vector(32, u16), .{ + 0xdfb3, 0x7de6, 0xd9ed, 0xb42e, 0x95ac, 0x9b5b, 0x0422, 0xdfcd, + 0x6196, 0x4dbe, 0x1818, 0x8816, 0x75e7, 0xc9b0, 0x92f7, 0x1f71, + 0xe584, 0x576c, 0x043a, 0x0f31, 0xfc4c, 0x2c87, 0x6b02, 0x0229, + 0x25b7, 0x53cd, 0x9bab, 0x866b, 0x9008, 0xf0f3, 0xeb21, 0x88e2, + }); + try testType(@Vector(64, u16), .{ + 0x084c, 0x445f, 0xce89, 0xd3ee, 0xb399, 0x315d, 0x8ef8, 0x4f6f, + 0xf9af, 0xcbc4, 0x0332, 0xcd55, 0xa4dc, 0xbc38, 0x6e33, 0x8ead, + 0xd15a, 0x5057, 0x58ef, 0x657a, 0xe9f0, 0x1418, 0x2b62, 0x3387, + 0x1c15, 0x04e1, 0x0276, 0x3783, 0xad9c, 0xea9a, 0x0e5e, 0xe803, + 0x2ee7, 0x0cf1, 0x30f1, 0xb12a, 0x381b, 0x353d, 0xf637, 0xf853, + 0x2ac1, 0x7ce8, 0x6a50, 0xcbb8, 0xc9b8, 0x9b25, 0xd1e9, 0xeff0, + 0xc0a2, 0x8e51, 0xde7a, 0x4e58, 0x5685, 0xeb3f, 0xd29b, 0x66ed, + 0x3dd5, 0xcb59, 0x6003, 0xf710, 0x943a, 0x7276, 0xe547, 0xe48f, + }); + + try testType(@Vector(3, i17), .{ -1 << 16, -1, 0 }); + try testType(@Vector(3, u17), .{ 0, 1, 1 << 16 }); + + try testType(@Vector(3, i31), .{ -1 << 30, -1, 0 }); + try testType(@Vector(3, u31), .{ 0, 1, 1 << 30 }); + + try testType(@Vector(3, i32), .{ -1 << 31, -1, 0 }); + try testType(@Vector(3, u32), .{ 0, 1, 1 << 31 }); + try testType(@Vector(1, u32), .{ + 0x17e2805c, + }); + try testType(@Vector(2, u32), .{ + 0xdb6aadc5, 0xb1ff3754, + }); + try testType(@Vector(4, u32), .{ + 0xf7897b31, 0x342e1af9, 0x190fd76b, 0x283b5374, + }); + try testType(@Vector(8, u32), .{ + 0x81a0bd16, 0xc55da94e, 0x910f7e7c, 0x078d5ef7, + 0x0bdb1e4a, 0xf1a96e99, 0xcdd729b5, 0xe6966a1c, + }); + try testType(@Vector(16, u32), .{ + 0xfee812db, 0x29eacbed, 0xaed48136, 0x3053de13, + 0xbbda20df, 0x6faa274a, 0xe0b5ec3a, 0x1878b0dc, + 0x98204475, 0x810d8d05, 0x1e6996b6, 0xc543826a, + 0x53b47d8c, 0xc72c3142, 0x12f7e1f9, 0xf6782e54, + }); + try testType(@Vector(32, u32), .{ + 0xf0cf30d3, 0xe3c587b8, 0xcee44739, 0xe4a0bd72, + 0x41d44cce, 0x6d7c4259, 0xd85580a5, 0xec4b02d7, + 0xa366483d, 0x2d7b59d4, 0xe9c0ace4, 0x82cb441c, + 0xa23958ba, 0x04a70148, 0x3f0d20a3, 0xf9e21e37, + 0x009fce8b, 0x4a34a229, 0xf09c35cf, 0xc0977d4d, + 0xcc4d4647, 0xa30f1363, 0x27a65b14, 0xe572c785, + 0x8f42e320, 0x2b2cdeca, 0x11205bd4, 0x739d26aa, + 0xcbcc2df0, 0x5f7a3649, 0xbde1b7aa, 0x180a169f, + }); + + try testType(@Vector(3, i33), .{ -1 << 32, -1, 0 }); + try testType(@Vector(3, u33), .{ 0, 1, 1 << 32 }); + + try testType(@Vector(3, i63), .{ -1 << 62, -1, 0 }); + try testType(@Vector(3, u63), .{ 0, 1, 1 << 62 }); + + try testType(@Vector(3, i64), .{ -1 << 63, -1, 0 }); + try testType(@Vector(3, u64), .{ 0, 1, 1 << 63 }); + try testType(@Vector(1, u64), .{ + 0x7d2e439abb0edba7, + }); + try testType(@Vector(2, u64), .{ + 0x3749ee5a2d237b9f, 0x6d8f4c3e1378f389, + }); + try testType(@Vector(4, u64), .{ + 0x03c127040e10d52b, 0xa86fe019072e27eb, + 0x0a554a47b709cdba, 0xf4342cc597e196c3, + }); + try testType(@Vector(8, u64), .{ + 0xea455c104375a055, 0x5c35d9d945edb2fa, + 0xc11b73d9d9d546fc, 0x2a9d63aae838dd5b, + 0xed6603f1f5d574b3, 0x2f37b354c81c1e56, + 0xbe7f5e2476bc76bd, 0xb0c88eacfffa9a8f, + }); + try testType(@Vector(16, u64), .{ + 0x2258fc04b31f8dbe, 0x3a2e5483003a10d8, + 0xebf24b31c0460510, 0x15d5b4c09b53ffa5, + 0x05abf6e744b17cc6, 0x9747b483f2d159fe, + 0x4616d8b2c8673125, 0x8ae3f91d422447eb, + 0x18da2f101a9e9776, 0x77a1197fb0441007, + 0x4ba480c8ec2dd10b, 0xeb99b9c0a1725278, + 0xd9d0acc5084ecdf0, 0xa0a23317fff4f515, + 0x0901c59a9a6a408b, 0x7c77ca72e25df033, + }); + + try testType(@Vector(3, i65), .{ -1 << 64, -1, 0 }); + try testType(@Vector(3, u65), .{ 0, 1, 1 << 64 }); + + try testType(@Vector(3, i127), .{ -1 << 126, -1, 0 }); + try testType(@Vector(3, u127), .{ 0, 1, 1 << 126 }); + + try testType(@Vector(3, i128), .{ -1 << 127, -1, 0 }); + try testType(@Vector(3, u128), .{ 0, 1, 1 << 127 }); + try testType(@Vector(1, u128), .{ + 0x809f29e7fbafadc01145e1732590e7d9, + }); + try testType(@Vector(2, u128), .{ + 0x5150ac3438aacd0d51132cc2723b2995, + 0x151be9c47ad29cf719cf8358dd40165c, + }); + try testType(@Vector(4, u128), .{ + 0x4bae22df929f2f7cb9bd84deaad3e7a8, + 0x1ed46b2d6e1f3569f56b2ac33d8bc1cb, + 0xae93ea459d2ccfd5fb794e6d5c31aabb, + 0xb1177136acf099f550b70949ac202ec4, + }); + try testType(@Vector(8, u128), .{ + 0x7cd78db6baed6bfdf8c5265136c4e0fd, + 0xa41b8984c6bbde84640068194b7eba98, + 0xd33102778f2ae1a48d1e9bf8801bbbf0, + 0x0d59f6de003513a60055c86cbce2c200, + 0x825579d90012afddfbf04851c0748561, + 0xc2647c885e9d6f0ee1f5fac5da8ef7f5, + 0xcb4bbc1f81aa8ee68aa4dc140745687b, + 0x4ff10f914f74b46c694407f5bf7c7836, + }); + + try testType(@Vector(3, i129), .{ -1 << 128, -1, 0 }); + try testType(@Vector(3, u129), .{ 0, 1, 1 << 128 }); + + try testType(@Vector(3, i191), .{ -1 << 190, -1, 0 }); + try testType(@Vector(3, u191), .{ 0, 1, 1 << 190 }); + + try testType(@Vector(3, i192), .{ -1 << 191, -1, 0 }); + try testType(@Vector(3, u192), .{ 0, 1, 1 << 191 }); + try testType(@Vector(1, u192), .{ + 0xe7baafcb9781626a77571b0539b9471a60c97d6c02106c8b, + }); + try testType(@Vector(2, u192), .{ + 0xbc9510913ed09e2c2aa50ffab9f1bc7b303a87f36e232a83, + 0x1f37bee446d7712d1ad457c47a66812cb926198d052aee65, + }); + try testType(@Vector(4, u192), .{ + 0xdca6a7cfc19c69efc34022062a8ca36f2569ab3dce001202, + 0xd25a4529e621c9084181fdb6917c6a32eccc58b63601b35d, + 0x0a258afd6debbaf8c158f1caa61fed63b31871d13f51b43d, + 0x6b40a178674fcb82c623ac322f851623d5e993dac97a219a, + }); + + try testType(@Vector(3, i193), .{ -1 << 192, -1, 0 }); + try testType(@Vector(3, u193), .{ 0, 1, 1 << 192 }); + + try testType(@Vector(3, i255), .{ -1 << 254, -1, 0 }); + try testType(@Vector(3, u255), .{ 0, 1, 1 << 254 }); + + try testType(@Vector(3, i256), .{ -1 << 255, -1, 0 }); + try testType(@Vector(3, u256), .{ 0, 1, 1 << 255 }); + try testType(@Vector(1, u256), .{ + 0x230413bb481fa3a997796acf282010c560d1942e7339fd584a0f15a90c83fbda, + }); + try testType(@Vector(2, u256), .{ + 0x3ad569f8d91fdbc9da8ec0e933565919f2feb90b996c90c352b461aa0908e62d, + 0x0f109696d64647983f1f757042515510729ad1350e862cbf38cb73b5cf99f0f7, + }); + try testType(@Vector(4, u256), .{ + 0x1717c6ded4ac6de282d59f75f068da47d5a47a30f2c5053d2d59e715f9d28b97, + 0x3087189ce7540e2e0028b80af571ebc6353a00b2917f243a869ed29ecca0adaa, + 0x1507c6a9d104684bf503cdb08841cf91adab4644306bd67aafff5326604833ce, + 0x857e134ff9179733c871295b25f824bd3eb562977bad30890964fa0cdc15bb07, + }); + + try testType(@Vector(3, i257), .{ -1 << 256, -1, 0 }); + try testType(@Vector(3, u257), .{ 0, 1, 1 << 256 }); + + try testType(@Vector(3, i511), .{ -1 << 510, -1, 0 }); + try testType(@Vector(3, u511), .{ 0, 1, 1 << 510 }); + + try testType(@Vector(3, i512), .{ -1 << 511, -1, 0 }); + try testType(@Vector(3, u512), .{ 0, 1, 1 << 511 }); + try testType(@Vector(1, u512), .{ + 0xa3ff51a609f1370e5eeb96b05169bf7469e465cf76ac5b4ea8ffd166c1ba3cd94f2dedf0d647a1fe424f3a06e6d7940f03e257f28100970b00bd5528c52b9ae6, + }); + try testType(@Vector(2, u512), .{ + 0xc6d43cd46ae31ab71f9468a895c83bf17516c6b2f1c9b04b9aa113bf7fe1b789eb7d95fcf951f12a9a6f2124589551efdd8c00f528b366a7bfb852faf8f3da53, + 0xc9099d2bdf8d1a0d30485ec6db4a24cbc0d89a863de30e18313ee1d66f71dd2d26235caaa703286cf4a2b51e1a12ef96d2d944c66c0bd3f0d72dd4cf0fc8100e, + }); + + try testType(@Vector(3, i513), .{ -1 << 512, -1, 0 }); + try testType(@Vector(3, u513), .{ 0, 1, 1 << 512 }); + + try testType(@Vector(3, i1023), .{ -1 << 1022, -1, 0 }); + try testType(@Vector(3, u1023), .{ 0, 1, 1 << 1022 }); + + try testType(@Vector(3, i1024), .{ -1 << 1023, -1, 0 }); + try testType(@Vector(3, u1024), .{ 0, 1, 1 << 1023 }); + try testType(@Vector(1, u1024), .{ + 0xc6cfaa6571139552e1f067402dfc131d9b9a58aafda97198a78764b05138fb68cf26f085b7652f3d5ae0e56aa21732f296a581bb411d4a73795c213de793489fa49b173b9f5c089aa6295ff1fcdc14d491a05035b45d08fc35cd67a83d887a02b8db512f07518132e0ba56533c7d6fbe958255eddf5649bd8aba288c0dd84a25, + }); + + try testType(@Vector(3, i1025), .{ -1 << 1024, -1, 0 }); + try testType(@Vector(3, u1025), .{ 0, 1, 1 << 1024 }); +} + fn testBinary(comptime op: anytype) !void { const testType = struct { fn testType(comptime Type: type, comptime imm_lhs: Type, comptime imm_rhs: Type) !void { @@ -306,6 +1012,63 @@ fn testBinary(comptime op: anytype) !void { 0x8b0b4a27fc94a0e90652d19bc755b63d, 0xa858bce5ad0e48c13588a4e170e8667c, }); + + try testType(@Vector(1, u256), .{ + 0x28df37e1f57a56133ba3f5b5b2164ce24eb6c29a8973a597fd91fbee8ab4bafb, + }, .{ + 0x63f725028cab082b5b1e6cb474428c8c3655cf438f3bb05c7a87f8270198f357, + }); + try testType(@Vector(2, u256), .{ + 0xcc79740b85597ef411e6d7e92049dfaa2328781ea4911540a3dcb512b71c7f3c, + 0x51ae46d2f93cbecff1578481f6ddc633dacee94ecaf81597c752c5c5db0ae766, + }, .{ + 0x257f0107305cb71cef582a9a58612a019f335e390d7998f51f5898f245874a6e, + 0x0a95a17323a4d16a715720f122b752785e9877e3dd3d3f9b72cdac3d1139a81f, + }); + try testType(@Vector(4, u256), .{ + 0x19667a6e269342cba437a8904c7ba42a762358d32723723ae2637b01124e63c5, + 0x14f7d3599a7edc7bcc46874f68d4291793e6ef72bd1f3763bc5e923f54f2f781, + 0x1c939de0ae980b80de773a04088ba45813441336cdfdc281ee356c98d71f653b, + 0x39f5d755965382fe13d1b1d6690b8e3827f153f8166768c4ad8a28a963b781f2, + }, .{ + 0xbe03de37cdcb8126083b4e86cd8a9803121d31b186fd5ce555ad77ce624dd6c7, + 0xa0c0730f0d7f141cc959849d09730b049f00693361539f1bc4758270554a60c1, + 0x2664bdba8de4eaa36ecee72f6bfec5b4daa6b4e00272d8116f2cc532c29490cc, + 0xe47a122bd45d5e7d69722d864a6b795ddee965a0993094f8791dd309d692de8b, + }); + + try testType(@Vector(1, u512), .{ + 0x651058c1d89a8f34cfc5e66b6d25294eecfcc4a7e1e4a356eb51ee7d7b2db25378e4afee51b7d18d16e520772a60c50a02d7966f40ced1870b32c658e5821397, + }, .{ + 0xd726e265ec80cb99510ba4f480ca64e959de5c528a7f54c386ecad22eeeefa845f0fd44b1bd64258a5f868197ee2d8fed59df9c9f0b72e74051a7ff20230880e, + }); + try testType(@Vector(2, u512), .{ + 0x22c8183c95cca8b09fdf541e431b73e9e4a1a5a00dff12381937fab52681d09d38ea25727d7025a2be08942cfa01535759e1644792e347c7901ec94b343c6337, + 0x292fdf644e75927e1aea9465ae2f60fb27550cd095f1afdea2cf7855286d26fbeed1c0b9c0474b73cb6b75621f7eadaa2f94ec358179ce2aaa0766df20da1ef3, + }, .{ + 0xe1cd8c0ca244c6626d4415e10b4ac43fa69e454c529c24fec4b13e6b945684d4ea833709c16c636ca78cffa5c5bf0fe945cd714a9ad695184a6bdad31dec9e31, + 0x8fa3d86099e9e2789d72f8e792290356d659ab20ac0414ff94745984c6ae7d986082197bb849889f912e896670aa2c1a11bd7e66e3f650710b0f0a18a1533f90, + }); + + try testType(@Vector(1, u1024), .{ + 0x0ca1a0dfaf8bb1da714b457d23c71aef948e66c7cd45c0aa941498a796fb18502ec32f34e885d0a107d44ae81595f8b52c2f0fb38e584b7139903a0e8a823ae20d01ca0662722dd474e7efc40f32d74cc065d97d8a09d0447f1ab6107fa0a57f3f8c866ae872506627ce82f18add79cee8dc69837f4ead3ca770c4d622d7e544, + }, .{ + 0xf1e3bbe031d59351770a7a501b6e969b2c00d144f17648db3f944b69dfeb7be72e5ff933a061eba4eaa422f8ca09e5a97d0b0dd740fd4076eba8c72d7a278523f399202dc2d043c4e0eb58a2bcd4066e2146e321810b1ee4d3afdddb4f026bcc7905ce17e033a7727b4e08f33b53c63d8c9f763fc6c31d0523eb38c30d5e40bc, + }); +} + +inline fn bitNot(comptime Type: type, rhs: Type) @TypeOf(~rhs) { + return ~rhs; +} +test bitNot { + try testUnary(bitNot); +} + +inline fn clz(comptime Type: type, rhs: Type) @TypeOf(@clz(rhs)) { + return @clz(rhs); +} +test clz { + try testUnary(clz); } inline fn bitAnd(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs & rhs) { diff --git a/tools/update_cpu_features.zig b/tools/update_cpu_features.zig index da0aa3f565..065d40d3f4 100644 --- a/tools/update_cpu_features.zig +++ b/tools/update_cpu_features.zig @@ -902,8 +902,8 @@ const llvm_targets = [_]LlvmTarget{ .features = &.{ "v8a", "exynos" }, }, }, - // LLVM removed support for v2 and v3 but zig wants to support targeting old hardware .extra_features = &.{ + // LLVM removed support for v2 and v3 but zig wants to support targeting old hardware .{ .zig_name = "v2", .desc = "ARMv2 architecture", @@ -1043,10 +1043,22 @@ const llvm_targets = [_]LlvmTarget{ .llvm_name = "64bit-mode", .omit = true, }, + .{ + .llvm_name = "alderlake", + .extra_deps = &.{ "smap", "smep" }, + }, .{ .llvm_name = "amdfam10", .extra_deps = &.{"3dnowa"}, }, + .{ + .llvm_name = "arrowlake", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "arrowlake-s", + .extra_deps = &.{ "smap", "smep" }, + }, .{ .llvm_name = "athlon", .extra_deps = &.{"3dnowa"}, @@ -1081,16 +1093,64 @@ const llvm_targets = [_]LlvmTarget{ }, .{ .llvm_name = "barcelona", - .extra_deps = &.{"3dnowa"}, + .extra_deps = &.{ "3dnowa", "smap", "smep" }, + }, + .{ + .llvm_name = "broadwell", + .extra_deps = &.{ "smap", "smep" }, }, .{ .llvm_name = "c3", .extra_deps = &.{"3dnow"}, }, + .{ + .llvm_name = "cannonlake", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "cascadelake", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "emeraldrapids", + .extra_deps = &.{ "smap", "smep" }, + }, .{ .llvm_name = "geode", .extra_deps = &.{"3dnowa"}, }, + .{ + .llvm_name = "goldmont", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "goldmont_plus", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "haswell", + .extra_deps = &.{"smep"}, + }, + .{ + .llvm_name = "i386", + .extra_deps = &.{"bsf_bsr_0_clobbers_result"}, + }, + .{ + .llvm_name = "i486", + .extra_deps = &.{"bsf_bsr_0_clobbers_result"}, + }, + .{ + .llvm_name = "icelake_client", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "icelake_server", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "ivybridge", + .extra_deps = &.{"smep"}, + }, .{ .llvm_name = "k6-2", .extra_deps = &.{"3dnow"}, @@ -1127,6 +1187,10 @@ const llvm_targets = [_]LlvmTarget{ .llvm_name = "lakemont", .extra_deps = &.{"soft_float"}, }, + .{ + .llvm_name = "meteorlake", + .extra_deps = &.{ "smap", "smep" }, + }, .{ .llvm_name = "opteron", .extra_deps = &.{"3dnowa"}, @@ -1135,6 +1199,38 @@ const llvm_targets = [_]LlvmTarget{ .llvm_name = "opteron-sse3", .extra_deps = &.{"3dnowa"}, }, + .{ + .llvm_name = "raptorlake", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "rocketlake", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "sapphirerapids", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "silvermont", + .extra_deps = &.{"smep"}, + }, + .{ + .llvm_name = "skx", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "skylake", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "skylake_avx512", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "tigerlake", + .extra_deps = &.{ "smap", "smep" }, + }, .{ .llvm_name = "winchip2", .extra_deps = &.{"3dnow"}, @@ -1143,9 +1239,29 @@ const llvm_targets = [_]LlvmTarget{ .llvm_name = "sse4.2", .extra_deps = &.{"crc32"}, }, + .{ + .llvm_name = "znver1", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "znver2", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "znver3", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "znver4", + .extra_deps = &.{ "smap", "smep" }, + }, + .{ + .llvm_name = "znver5", + .extra_deps = &.{ "smap", "smep" }, + }, }, - // Features removed from LLVM .extra_features = &.{ + // Features removed from LLVM .{ .zig_name = "3dnow", .desc = "Enable 3DNow! instructions", @@ -1171,6 +1287,22 @@ const llvm_targets = [_]LlvmTarget{ .desc = "Prefetch with Intent to Write and T1 Hint", .deps = &.{}, }, + // Custom Zig features + .{ + .zig_name = "bsf_bsr_0_clobbers_result", + .desc = "BSF/BSR may clobber the lower 32-bits of the result register when the source is zero", + .deps = &.{}, + }, + .{ + .zig_name = "smap", + .desc = "Enable Supervisor Mode Access Prevention", + .deps = &.{}, + }, + .{ + .zig_name = "smep", + .desc = "Enable Supervisor Mode Execution Prevention", + .deps = &.{}, + }, }, .omit_cpus = &.{ // LLVM defines a bunch of dumb aliases with foreach loops in X86.td. From e5d5a8bc4ea6b27dc3540ad4800a1231ff50b33d Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Thu, 2 Jan 2025 03:10:19 -0500 Subject: [PATCH 10/25] x86_64: implement switch jump tables --- lib/std/Thread/Condition.zig | 6 +- lib/std/Thread/Mutex.zig | 2 +- lib/std/debug.zig | 6 +- lib/std/debug/SelfInfo.zig | 8 +- lib/std/heap.zig | 2 +- lib/std/math/big/int.zig | 2 +- lib/std/os.zig | 2 +- lib/std/os/windows.zig | 2 +- lib/std/posix.zig | 2 +- lib/std/posix/test.zig | 2 +- lib/std/simd.zig | 10 +- lib/std/zig/system/NativePaths.zig | 2 +- src/Liveness.zig | 19 +- src/arch/x86_64/CodeGen.zig | 299 +++++++++++++++++++++--- src/arch/x86_64/Emit.zig | 92 +++++--- src/arch/x86_64/Lower.zig | 15 +- src/arch/x86_64/Mir.zig | 9 +- src/arch/x86_64/bits.zig | 5 +- src/arch/x86_64/encoder.zig | 103 ++++---- src/link/MachO.zig | 2 +- test/behavior/align.zig | 20 +- test/behavior/asm.zig | 2 +- test/behavior/call.zig | 1 + test/behavior/cast.zig | 4 +- test/behavior/eval.zig | 2 +- test/behavior/math.zig | 8 +- test/behavior/maximum_minimum.zig | 2 +- test/behavior/muladd.zig | 8 +- test/behavior/saturating_arithmetic.zig | 10 +- test/behavior/struct.zig | 8 +- test/behavior/var_args.zig | 10 +- test/behavior/vector.zig | 8 +- test/behavior/wrapping_arithmetic.zig | 2 +- 33 files changed, 476 insertions(+), 199 deletions(-) diff --git a/lib/std/Thread/Condition.zig b/lib/std/Thread/Condition.zig index e6c25d761c..65bfa32ad0 100644 --- a/lib/std/Thread/Condition.zig +++ b/lib/std/Thread/Condition.zig @@ -161,17 +161,17 @@ const WindowsImpl = struct { } } - if (comptime builtin.mode == .Debug) { + if (builtin.mode == .Debug) { // The internal state of the DebugMutex needs to be handled here as well. mutex.impl.locking_thread.store(0, .unordered); } const rc = os.windows.kernel32.SleepConditionVariableSRW( &self.condition, - if (comptime builtin.mode == .Debug) &mutex.impl.impl.srwlock else &mutex.impl.srwlock, + if (builtin.mode == .Debug) &mutex.impl.impl.srwlock else &mutex.impl.srwlock, timeout_ms, 0, // the srwlock was assumed to acquired in exclusive mode not shared ); - if (comptime builtin.mode == .Debug) { + if (builtin.mode == .Debug) { // The internal state of the DebugMutex needs to be handled here as well. mutex.impl.locking_thread.store(std.Thread.getCurrentId(), .unordered); } diff --git a/lib/std/Thread/Mutex.zig b/lib/std/Thread/Mutex.zig index be421c4c94..402c96a4d5 100644 --- a/lib/std/Thread/Mutex.zig +++ b/lib/std/Thread/Mutex.zig @@ -158,7 +158,7 @@ const FutexImpl = struct { // On x86, use `lock bts` instead of `lock cmpxchg` as: // - they both seem to mark the cache-line as modified regardless: https://stackoverflow.com/a/63350048 // - `lock bts` is smaller instruction-wise which makes it better for inlining - if (comptime builtin.target.cpu.arch.isX86()) { + if (builtin.target.cpu.arch.isX86()) { const locked_bit = @ctz(locked); return self.state.bitSet(locked_bit, .acquire) == 0; } diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 3664bd0cef..02eb60d6a7 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -179,7 +179,7 @@ pub fn dumpHexFallible(bytes: []const u8) !void { /// TODO multithreaded awareness pub fn dumpCurrentStackTrace(start_addr: ?usize) void { nosuspend { - if (comptime builtin.target.isWasm()) { + if (builtin.target.isWasm()) { if (native_os == .wasi) { const stderr = io.getStdErr().writer(); stderr.print("Unable to dump stack trace: not implemented for Wasm\n", .{}) catch return; @@ -267,7 +267,7 @@ pub inline fn getContext(context: *ThreadContext) bool { /// TODO multithreaded awareness pub fn dumpStackTraceFromBase(context: *ThreadContext) void { nosuspend { - if (comptime builtin.target.isWasm()) { + if (builtin.target.isWasm()) { if (native_os == .wasi) { const stderr = io.getStdErr().writer(); stderr.print("Unable to dump stack trace: not implemented for Wasm\n", .{}) catch return; @@ -365,7 +365,7 @@ pub fn captureStackTrace(first_address: ?usize, stack_trace: *std.builtin.StackT /// TODO multithreaded awareness pub fn dumpStackTrace(stack_trace: std.builtin.StackTrace) void { nosuspend { - if (comptime builtin.target.isWasm()) { + if (builtin.target.isWasm()) { if (native_os == .wasi) { const stderr = io.getStdErr().writer(); stderr.print("Unable to dump stack trace: not implemented for Wasm\n", .{}) catch return; diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 544cf0ac6f..4dd0b4e842 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -121,13 +121,13 @@ pub fn deinit(self: *SelfInfo) void { } pub fn getModuleForAddress(self: *SelfInfo, address: usize) !*Module { - if (comptime builtin.target.isDarwin()) { + if (builtin.target.isDarwin()) { return self.lookupModuleDyld(address); } else if (native_os == .windows) { return self.lookupModuleWin32(address); } else if (native_os == .haiku) { return self.lookupModuleHaiku(address); - } else if (comptime builtin.target.isWasm()) { + } else if (builtin.target.isWasm()) { return self.lookupModuleWasm(address); } else { return self.lookupModuleDl(address); @@ -138,13 +138,13 @@ pub fn getModuleForAddress(self: *SelfInfo, address: usize) !*Module { // This can be called when getModuleForAddress fails, so implementations should provide // a path that doesn't rely on any side-effects of a prior successful module lookup. pub fn getModuleNameForAddress(self: *SelfInfo, address: usize) ?[]const u8 { - if (comptime builtin.target.isDarwin()) { + if (builtin.target.isDarwin()) { return self.lookupModuleNameDyld(address); } else if (native_os == .windows) { return self.lookupModuleNameWin32(address); } else if (native_os == .haiku) { return null; - } else if (comptime builtin.target.isWasm()) { + } else if (builtin.target.isWasm()) { return null; } else { return self.lookupModuleNameDl(address); diff --git a/lib/std/heap.zig b/lib/std/heap.zig index 3d19d8daa6..33f79e265a 100644 --- a/lib/std/heap.zig +++ b/lib/std/heap.zig @@ -890,7 +890,7 @@ test { _ = @import("heap/memory_pool.zig"); _ = ArenaAllocator; _ = GeneralPurposeAllocator; - if (comptime builtin.target.isWasm()) { + if (builtin.target.isWasm()) { _ = WasmAllocator; _ = WasmPageAllocator; } diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig index 98d37d8994..2549644dbc 100644 --- a/lib/std/math/big/int.zig +++ b/lib/std/math/big/int.zig @@ -2523,7 +2523,7 @@ pub const Const = struct { /// Returns the number of leading zeros in twos-complement form. pub fn clz(a: Const, bits: Limb) Limb { // Limbs are stored in little-endian order but we need to iterate big-endian. - if (!a.positive) return 0; + if (!a.positive and !a.eqlZero()) return 0; var total_limb_lz: Limb = 0; var i: usize = a.limbs.len; const bits_per_limb = @bitSizeOf(Limb); diff --git a/lib/std/os.zig b/lib/std/os.zig index 27e6577111..80f45dd59d 100644 --- a/lib/std/os.zig +++ b/lib/std/os.zig @@ -157,7 +157,7 @@ pub fn getFdPath(fd: std.posix.fd_t, out_buffer: *[max_path_bytes]u8) std.posix. return target; }, .freebsd => { - if (comptime builtin.os.isAtLeast(.freebsd, .{ .major = 13, .minor = 0, .patch = 0 }) orelse false) { + if (builtin.os.isAtLeast(.freebsd, .{ .major = 13, .minor = 0, .patch = 0 }) orelse false) { var kfile: std.c.kinfo_file = undefined; kfile.structsize = std.c.KINFO_FILE_SIZE; switch (posix.errno(std.c.fcntl(fd, std.c.F.KINFO, @intFromPtr(&kfile)))) { diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index ceed0618d1..cfc2403800 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -1061,7 +1061,7 @@ pub fn DeleteFile(sub_path_w: []const u16, options: DeleteFileOptions) DeleteFil // us INVALID_PARAMETER. // The same reasoning for win10_rs5 as in os.renameatW() applies (FILE_DISPOSITION_IGNORE_READONLY_ATTRIBUTE requires >= win10_rs5). var need_fallback = true; - if (comptime builtin.target.os.version_range.windows.min.isAtLeast(.win10_rs5)) { + if (builtin.target.os.version_range.windows.min.isAtLeast(.win10_rs5)) { // Deletion with posix semantics if the filesystem supports it. var info = FILE_DISPOSITION_INFORMATION_EX{ .Flags = FILE_DISPOSITION_DELETE | diff --git a/lib/std/posix.zig b/lib/std/posix.zig index f07421fdc1..f99875437d 100644 --- a/lib/std/posix.zig +++ b/lib/std/posix.zig @@ -6819,7 +6819,7 @@ pub fn memfd_createZ(name: [*:0]const u8, flags: u32) MemFdCreateError!fd_t { } }, .freebsd => { - if (comptime builtin.os.version_range.semver.max.order(.{ .major = 13, .minor = 0, .patch = 0 }) == .lt) + if (builtin.os.version_range.semver.max.order(.{ .major = 13, .minor = 0, .patch = 0 }) == .lt) @compileError("memfd_create is unavailable on FreeBSD < 13.0"); const rc = system.memfd_create(name, flags); switch (errno(rc)) { diff --git a/lib/std/posix/test.zig b/lib/std/posix/test.zig index 653637c0a7..89346e66fe 100644 --- a/lib/std/posix/test.zig +++ b/lib/std/posix/test.zig @@ -804,7 +804,7 @@ test "getrlimit and setrlimit" { // // This happens for example if RLIMIT_MEMLOCK is bigger than ~2GiB. // In that case the following the limit would be RLIM_INFINITY and the following setrlimit fails with EPERM. - if (comptime builtin.cpu.arch.isMIPS() and builtin.link_libc) { + if (builtin.cpu.arch.isMIPS() and builtin.link_libc) { if (limit.cur != linux.RLIM.INFINITY) { try posix.setrlimit(resource, limit); } diff --git a/lib/std/simd.zig b/lib/std/simd.zig index e06a873f09..b4aef7246c 100644 --- a/lib/std/simd.zig +++ b/lib/std/simd.zig @@ -163,7 +163,7 @@ pub fn interlace(vecs: anytype) @Vector(vectorLength(@TypeOf(vecs[0])) * vecs.le // The indices are correct. The problem seems to be with the @shuffle builtin. // On MIPS, the test that interlaces small_base gives { 0, 2, 0, 0, 64, 255, 248, 200, 0, 0 }. // Calling this with two inputs seems to work fine, but I'll let the compile error trigger for all inputs, just to be safe. - comptime if (builtin.cpu.arch.isMIPS()) @compileError("TODO: Find out why interlace() doesn't work on MIPS"); + if (builtin.cpu.arch.isMIPS()) @compileError("TODO: Find out why interlace() doesn't work on MIPS"); const VecType = @TypeOf(vecs[0]); const vecs_arr = @as([vecs.len]VecType, vecs); @@ -248,7 +248,7 @@ test "vector patterns" { try std.testing.expectEqual([8]u32{ 10, 20, 30, 40, 55, 66, 77, 88 }, join(base, other_base)); try std.testing.expectEqual([2]u32{ 20, 30 }, extract(base, 1, 2)); - if (comptime !builtin.cpu.arch.isMIPS()) { + if (!builtin.cpu.arch.isMIPS()) { try std.testing.expectEqual([8]u32{ 10, 55, 20, 66, 30, 77, 40, 88 }, interlace(.{ base, other_base })); const small_braid = interlace(small_bases); @@ -390,7 +390,7 @@ pub fn prefixScanWithFunc( comptime identity: std.meta.Child(@TypeOf(vec)), ) if (ErrorType == void) @TypeOf(vec) else ErrorType!@TypeOf(vec) { // I haven't debugged this, but it might be a cousin of sorts to what's going on with interlace. - comptime if (builtin.cpu.arch.isMIPS()) @compileError("TODO: Find out why prefixScan doesn't work on MIPS"); + if (builtin.cpu.arch.isMIPS()) @compileError("TODO: Find out why prefixScan doesn't work on MIPS"); const len = vectorLength(@TypeOf(vec)); @@ -465,9 +465,7 @@ test "vector prefix scan" { if ((builtin.cpu.arch == .armeb or builtin.cpu.arch == .thumbeb) and builtin.zig_backend == .stage2_llvm) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/22060 if (builtin.cpu.arch == .aarch64_be and builtin.zig_backend == .stage2_llvm) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/21893 - if (comptime builtin.cpu.arch.isMIPS()) { - return error.SkipZigTest; - } + if (builtin.cpu.arch.isMIPS()) return error.SkipZigTest; const int_base = @Vector(4, i32){ 11, 23, 9, -21 }; const float_base = @Vector(4, f32){ 2, 0.5, -10, 6.54321 }; diff --git a/lib/std/zig/system/NativePaths.zig b/lib/std/zig/system/NativePaths.zig index 3c96134556..be8e7b05dd 100644 --- a/lib/std/zig/system/NativePaths.zig +++ b/lib/std/zig/system/NativePaths.zig @@ -83,7 +83,7 @@ pub fn detect(arena: Allocator, native_target: std.Target) !NativePaths { // TODO: consider also adding homebrew paths // TODO: consider also adding macports paths - if (comptime builtin.target.isDarwin()) { + if (builtin.target.isDarwin()) { if (std.zig.system.darwin.isSdkInstalled(arena)) sdk: { const sdk = std.zig.system.darwin.getSdk(arena, native_target) orelse break :sdk; try self.addLibDir(try std.fs.path.join(arena, &.{ sdk, "usr/lib" })); diff --git a/src/Liveness.zig b/src/Liveness.zig index e6ed782a21..e6e9c07363 100644 --- a/src/Liveness.zig +++ b/src/Liveness.zig @@ -719,32 +719,25 @@ pub const SwitchBrTable = struct { /// Caller owns the memory. pub fn getSwitchBr(l: Liveness, gpa: Allocator, inst: Air.Inst.Index, cases_len: u32) Allocator.Error!SwitchBrTable { - var index: usize = l.special.get(inst) orelse return SwitchBrTable{ - .deaths = &.{}, - }; + var index: usize = l.special.get(inst) orelse return .{ .deaths = &.{} }; const else_death_count = l.extra[index]; index += 1; - var deaths = std.ArrayList([]const Air.Inst.Index).init(gpa); - defer deaths.deinit(); - try deaths.ensureTotalCapacity(cases_len + 1); + var deaths = try gpa.alloc([]const Air.Inst.Index, cases_len); + errdefer gpa.free(deaths); var case_i: u32 = 0; while (case_i < cases_len - 1) : (case_i += 1) { const case_death_count: u32 = l.extra[index]; index += 1; - const case_deaths: []const Air.Inst.Index = @ptrCast(l.extra[index..][0..case_death_count]); + deaths[case_i] = @ptrCast(l.extra[index..][0..case_death_count]); index += case_death_count; - deaths.appendAssumeCapacity(case_deaths); } { // Else - const else_deaths: []const Air.Inst.Index = @ptrCast(l.extra[index..][0..else_death_count]); - deaths.appendAssumeCapacity(else_deaths); + deaths[case_i] = @ptrCast(l.extra[index..][0..else_death_count]); } - return SwitchBrTable{ - .deaths = try deaths.toOwnedSlice(), - }; + return .{ .deaths = deaths }; } /// Note that this information is technically redundant, but is useful for diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index d2d1fedb6f..79e69c7c07 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -61,9 +61,10 @@ src_loc: Zcu.LazySrcLoc, eflags_inst: ?Air.Inst.Index = null, /// MIR Instructions -mir_instructions: std.MultiArrayList(Mir.Inst) = .{}, +mir_instructions: std.MultiArrayList(Mir.Inst) = .empty, /// MIR extra data mir_extra: std.ArrayListUnmanaged(u32) = .empty, +mir_table: std.ArrayListUnmanaged(Mir.Inst.Index) = .empty, /// Byte offset within the source file of the ending curly. end_di_line: u32, @@ -75,8 +76,8 @@ end_di_column: u32, exitlude_jump_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .empty, reused_operands: std.StaticBitSet(Liveness.bpi - 1) = undefined, -const_tracking: ConstTrackingMap = .{}, -inst_tracking: InstTrackingMap = .{}, +const_tracking: ConstTrackingMap = .empty, +inst_tracking: InstTrackingMap = .empty, // Key is the block instruction blocks: std.AutoHashMapUnmanaged(Air.Inst.Index, BlockData) = .empty, @@ -86,16 +87,26 @@ register_manager: RegisterManager = .{}, /// Generation of the current scope, increments by 1 for every entered scope. scope_generation: u32 = 0, -frame_allocs: std.MultiArrayList(FrameAlloc) = .{}, +frame_allocs: std.MultiArrayList(FrameAlloc) = .empty, free_frame_indices: std.AutoArrayHashMapUnmanaged(FrameIndex, void) = .empty, -frame_locs: std.MultiArrayList(Mir.FrameLoc) = .{}, +frame_locs: std.MultiArrayList(Mir.FrameLoc) = .empty, loops: std.AutoHashMapUnmanaged(Air.Inst.Index, struct { /// The state to restore before branching. state: State, /// The branch target. target: Mir.Inst.Index, -}) = .{}, +}) = .empty, +loop_switches: std.AutoHashMapUnmanaged(Air.Inst.Index, struct { + start: u31, + len: u11, + min: Value, + else_relocs: union(enum) { + @"unreachable", + forward: std.ArrayListUnmanaged(Mir.Inst.Index), + backward: Mir.Inst.Index, + }, +}) = .empty, next_temp_index: Temp.Index = @enumFromInt(0), temp_type: [Temp.Index.max]Type = undefined, @@ -904,6 +915,7 @@ pub fn generate( function.free_frame_indices.deinit(gpa); function.frame_locs.deinit(gpa); function.loops.deinit(gpa); + function.loop_switches.deinit(gpa); var block_it = function.blocks.valueIterator(); while (block_it.next()) |block| block.deinit(gpa); function.blocks.deinit(gpa); @@ -912,6 +924,7 @@ pub fn generate( function.exitlude_jump_relocs.deinit(gpa); function.mir_instructions.deinit(gpa); function.mir_extra.deinit(gpa); + function.mir_table.deinit(gpa); } try function.inst_tracking.ensureTotalCapacity(gpa, Temp.Index.max); for (0..Temp.Index.max) |temp_index| { @@ -978,6 +991,7 @@ pub fn generate( var mir: Mir = .{ .instructions = function.mir_instructions.toOwnedSlice(), .extra = try function.mir_extra.toOwnedSlice(gpa), + .table = try function.mir_table.toOwnedSlice(gpa), .frame_locs = function.frame_locs.toOwnedSlice(), }; defer mir.deinit(gpa); @@ -1012,7 +1026,6 @@ pub fn generate( }, .prev_di_pc = 0, }; - defer emit.deinit(); emit.emitMir() catch |err| switch (err) { error.LowerFail, error.EmitFail => return function.failMsg(emit.lower.err_msg.?), @@ -1056,6 +1069,7 @@ pub fn generateLazy( defer { function.mir_instructions.deinit(gpa); function.mir_extra.deinit(gpa); + function.mir_table.deinit(gpa); } function.genLazy(lazy_sym) catch |err| switch (err) { @@ -1067,6 +1081,7 @@ pub fn generateLazy( var mir: Mir = .{ .instructions = function.mir_instructions.toOwnedSlice(), .extra = try function.mir_extra.toOwnedSlice(gpa), + .table = try function.mir_table.toOwnedSlice(gpa), .frame_locs = function.frame_locs.toOwnedSlice(), }; defer mir.deinit(gpa); @@ -1093,7 +1108,6 @@ pub fn generateLazy( .prev_di_loc = undefined, // no debug info yet .prev_di_pc = undefined, // no debug info yet }; - defer emit.deinit(); emit.emitMir() catch |err| switch (err) { error.LowerFail, error.EmitFail => return function.failMsg(emit.lower.err_msg.?), error.InvalidInstruction => return function.fail("failed to find a viable x86 instruction (Zig compiler bug)", .{}), @@ -1161,6 +1175,7 @@ fn formatWipMir( .mir = .{ .instructions = data.self.mir_instructions.slice(), .extra = data.self.mir_extra.items, + .table = data.self.mir_table.items, .frame_locs = (std.MultiArrayList(Mir.FrameLoc){}).slice(), }, .cc = .auto, @@ -20748,25 +20763,195 @@ fn lowerBlock(self: *CodeGen, inst: Air.Inst.Index, body: []const Air.Inst.Index self.getValueIfFree(tracking.short, inst); } -fn lowerSwitchBr(self: *CodeGen, inst: Air.Inst.Index, switch_br: Air.UnwrappedSwitch, condition: MCValue) !void { +fn lowerSwitchBr( + self: *CodeGen, + inst: Air.Inst.Index, + switch_br: Air.UnwrappedSwitch, + condition: MCValue, + condition_dies: bool, + is_loop: bool, +) !void { const zcu = self.pt.zcu; const condition_ty = self.typeOf(switch_br.operand); - const liveness = try self.liveness.getSwitchBr(self.gpa, inst, switch_br.cases_len + 1); - defer self.gpa.free(liveness.deaths); - const signedness = switch (condition_ty.zigTypeTag(zcu)) { - .bool, .pointer => .unsigned, - .int, .@"enum", .error_set => condition_ty.intInfo(zcu).signedness, - else => unreachable, + const ExpectedContents = extern struct { + liveness_deaths: [1 << 8 | 1]Air.Inst.Index, + bigint_limbs: [std.math.big.int.calcTwosCompLimbCount(1 << 8)]std.math.big.Limb, + relocs: [1 << 6]Mir.Inst.Index, }; + var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) = + std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa); + const allocator = stack.get(); self.scope_generation += 1; const state = try self.saveState(); - var it = switch_br.iterateCases(); - while (it.next()) |case| { - var relocs = try self.gpa.alloc(Mir.Inst.Index, case.items.len + case.ranges.len); - defer self.gpa.free(relocs); + const liveness = try self.liveness.getSwitchBr(allocator, inst, switch_br.cases_len + 1); + defer allocator.free(liveness.deaths); + + if (!self.mod.pic and self.target.ofmt == .elf) table: { + var prong_items: u32 = 0; + var min: ?Value = null; + var max: ?Value = null; + { + var cases_it = switch_br.iterateCases(); + while (cases_it.next()) |case| { + prong_items += @intCast(case.items.len + case.ranges.len); + for (case.items) |item| { + const val = Value.fromInterned(item.toInterned().?); + if (min == null or val.compareHetero(.lt, min.?, zcu)) min = val; + if (max == null or val.compareHetero(.gt, max.?, zcu)) max = val; + } + for (case.ranges) |range| { + const low = Value.fromInterned(range[0].toInterned().?); + if (min == null or low.compareHetero(.lt, min.?, zcu)) min = low; + const high = Value.fromInterned(range[1].toInterned().?); + if (max == null or high.compareHetero(.gt, max.?, zcu)) max = high; + } + } + } + // This condition also triggers for switches with no non-else prongs and switches on bool. + if (prong_items < 1 << 2 or prong_items > 1 << 8) break :table; + + var min_space: Value.BigIntSpace = undefined; + const min_bigint = min.?.toBigInt(&min_space, zcu); + var max_space: Value.BigIntSpace = undefined; + const max_bigint = max.?.toBigInt(&max_space, zcu); + const limbs = try allocator.alloc( + std.math.big.Limb, + @max(min_bigint.limbs.len, max_bigint.limbs.len) + 1, + ); + defer allocator.free(limbs); + const table_len = table_len: { + var table_len_bigint: std.math.big.int.Mutable = .{ .limbs = limbs, .positive = undefined, .len = undefined }; + table_len_bigint.sub(max_bigint, min_bigint); + assert(table_len_bigint.positive); // min <= max + break :table_len @as(u11, table_len_bigint.toConst().to(u10) catch break :table) + 1; // no more than a 1024 entry table + }; + assert(prong_items <= table_len); // each prong item introduces at least one unique integer to the range + if (prong_items < table_len >> 2) break :table; // no more than 75% waste + + const condition_index = if (condition_dies and condition.isModifiable()) condition else condition_index: { + const condition_index = try self.allocTempRegOrMem(condition_ty, true); + try self.genCopy(condition_ty, condition_index, condition, .{}); + break :condition_index condition_index; + }; + try self.spillEflagsIfOccupied(); + if (min.?.orderAgainstZero(zcu).compare(.neq)) try self.genBinOpMir( + .{ ._, .sub }, + condition_ty, + condition_index, + .{ .air_ref = Air.internedToRef(min.?.toIntern()) }, + ); + const else_reloc = if (switch_br.else_body_len > 0) else_reloc: { + try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition_index, .{ .immediate = table_len - 1 }); + break :else_reloc try self.asmJccReloc(.a, undefined); + } else undefined; + const table_start: u31 = @intCast(self.mir_table.items.len); + { + const condition_index_reg = if (condition_index.isRegister()) + condition_index.getReg().? + else + try self.copyToTmpRegister(.usize, condition_index); + const condition_index_lock = self.register_manager.lockReg(condition_index_reg); + defer if (condition_index_lock) |lock| self.register_manager.unlockReg(lock); + try self.truncateRegister(condition_ty, condition_index_reg); + const ptr_size = @divExact(self.target.ptrBitWidth(), 8); + try self.asmMemory(.{ ._, .jmp }, .{ + .base = .table, + .mod = .{ .rm = .{ + .size = .ptr, + .index = registerAlias(condition_index_reg, ptr_size), + .scale = .fromFactor(@intCast(ptr_size)), + .disp = table_start * ptr_size, + } }, + }); + } + const else_reloc_marker: u32 = 0; + assert(self.mir_instructions.len > else_reloc_marker); + try self.mir_table.appendNTimes(self.gpa, else_reloc_marker, table_len); + if (is_loop) try self.loop_switches.putNoClobber(self.gpa, inst, .{ + .start = table_start, + .len = table_len, + .min = min.?, + .else_relocs = if (switch_br.else_body_len > 0) .{ .forward = .empty } else .@"unreachable", + }); + defer if (is_loop) { + var loop_switch_data = self.loop_switches.fetchRemove(inst).?.value; + switch (loop_switch_data.else_relocs) { + .@"unreachable", .backward => {}, + .forward => |*else_relocs| else_relocs.deinit(self.gpa), + } + }; + var cases_it = switch_br.iterateCases(); + while (cases_it.next()) |case| { + { + const table = self.mir_table.items[table_start..][0..table_len]; + for (case.items) |item| { + const val = Value.fromInterned(item.toInterned().?); + var val_space: Value.BigIntSpace = undefined; + const val_bigint = val.toBigInt(&val_space, zcu); + var index_bigint: std.math.big.int.Mutable = .{ .limbs = limbs, .positive = undefined, .len = undefined }; + index_bigint.sub(val_bigint, min_bigint); + table[index_bigint.toConst().to(u10) catch unreachable] = @intCast(self.mir_instructions.len); + } + for (case.ranges) |range| { + var low_space: Value.BigIntSpace = undefined; + const low_bigint = Value.fromInterned(range[0].toInterned().?).toBigInt(&low_space, zcu); + var high_space: Value.BigIntSpace = undefined; + const high_bigint = Value.fromInterned(range[1].toInterned().?).toBigInt(&high_space, zcu); + var index_bigint: std.math.big.int.Mutable = .{ .limbs = limbs, .positive = undefined, .len = undefined }; + index_bigint.sub(low_bigint, min_bigint); + const start = index_bigint.toConst().to(u10) catch unreachable; + index_bigint.sub(high_bigint, min_bigint); + const end = @as(u11, index_bigint.toConst().to(u10) catch unreachable) + 1; + @memset(table[start..end], @intCast(self.mir_instructions.len)); + } + } + + for (liveness.deaths[case.idx]) |operand| try self.processDeath(operand); + + try self.genBodyBlock(case.body); + try self.restoreState(state, &.{}, .{ + .emit_instructions = false, + .update_tracking = true, + .resurrect = true, + .close_scope = true, + }); + } + if (switch_br.else_body_len > 0) { + const else_body = cases_it.elseBody(); + + const else_deaths = liveness.deaths.len - 1; + for (liveness.deaths[else_deaths]) |operand| try self.processDeath(operand); + + self.performReloc(else_reloc); + if (is_loop) { + const loop_switch_data = self.loop_switches.getPtr(inst).?; + for (loop_switch_data.else_relocs.forward.items) |reloc| self.performReloc(reloc); + loop_switch_data.else_relocs.forward.deinit(self.gpa); + loop_switch_data.else_relocs = .{ .backward = @intCast(self.mir_instructions.len) }; + } + for (self.mir_table.items[table_start..][0..table_len]) |*entry| if (entry.* == else_reloc_marker) { + entry.* = @intCast(self.mir_instructions.len); + }; + + try self.genBodyBlock(else_body); + try self.restoreState(state, &.{}, .{ + .emit_instructions = false, + .update_tracking = true, + .resurrect = true, + .close_scope = true, + }); + } + return; + } + + const signedness = if (condition_ty.isAbiInt(zcu)) condition_ty.intInfo(zcu).signedness else .unsigned; + var cases_it = switch_br.iterateCases(); + while (cases_it.next()) |case| { + var relocs = try allocator.alloc(Mir.Inst.Index, case.items.len + case.ranges.len); + defer allocator.free(relocs); try self.spillEflagsIfOccupied(); for (case.items, relocs[0..case.items.len]) |item, *reloc| { @@ -20849,9 +21034,8 @@ fn lowerSwitchBr(self: *CodeGen, inst: Air.Inst.Index, switch_br: Air.UnwrappedS // Relocate the "skip" branch to fall through to the next case. self.performReloc(skip_case_reloc); } - if (switch_br.else_body_len > 0) { - const else_body = it.elseBody(); + const else_body = cases_it.elseBody(); const else_deaths = liveness.deaths.len - 1; for (liveness.deaths[else_deaths]) |operand| try self.processDeath(operand); @@ -20873,11 +21057,11 @@ fn airSwitchBr(self: *CodeGen, inst: Air.Inst.Index) !void { // If the condition dies here in this switch instruction, process // that death now instead of later as this has an effect on // whether it needs to be spilled in the branches - if (self.liveness.operandDies(inst, 0)) { + const condition_dies = self.liveness.operandDies(inst, 0); + if (condition_dies) { if (switch_br.operand.toIndex()) |op_inst| try self.processDeath(op_inst); } - - try self.lowerSwitchBr(inst, switch_br, condition); + try self.lowerSwitchBr(inst, switch_br, condition, condition_dies, false); // We already took care of pl_op.operand earlier, so there's nothing left to do } @@ -20915,7 +21099,7 @@ fn airLoopSwitchBr(self: *CodeGen, inst: Air.Inst.Index) !void { // Stop tracking block result without forgetting tracking info try self.freeValue(mat_cond); - try self.lowerSwitchBr(inst, switch_br, mat_cond); + try self.lowerSwitchBr(inst, switch_br, mat_cond, true, true); try self.processDeath(inst); } @@ -20924,8 +21108,67 @@ fn airSwitchDispatch(self: *CodeGen, inst: Air.Inst.Index) !void { const br = self.air.instructions.items(.data)[@intFromEnum(inst)].br; const block_ty = self.typeOfIndex(br.block_inst); - const block_tracking = self.inst_tracking.getPtr(br.block_inst).?; const loop_data = self.loops.getPtr(br.block_inst).?; + if (self.loop_switches.getPtr(br.block_inst)) |table| { + // Process operand death so that it is properly accounted for in the State below. + const condition_dies = self.liveness.operandDies(inst, 0); + + try self.restoreState(loop_data.state, &.{}, .{ + .emit_instructions = true, + .update_tracking = false, + .resurrect = false, + .close_scope = false, + }); + + const condition_ty = self.typeOf(br.operand); + const condition = try self.resolveInst(br.operand); + const condition_index = if (condition_dies and condition.isModifiable()) condition else condition_index: { + const condition_index = try self.allocTempRegOrMem(condition_ty, true); + try self.genCopy(condition_ty, condition_index, condition, .{}); + break :condition_index condition_index; + }; + try self.spillEflagsIfOccupied(); + if (table.min.orderAgainstZero(self.pt.zcu).compare(.neq)) try self.genBinOpMir( + .{ ._, .sub }, + condition_ty, + condition_index, + .{ .air_ref = Air.internedToRef(table.min.toIntern()) }, + ); + switch (table.else_relocs) { + .@"unreachable" => {}, + .forward => |*else_relocs| { + try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition_index, .{ .immediate = table.len - 1 }); + try else_relocs.append(self.gpa, try self.asmJccReloc(.a, undefined)); + }, + .backward => |else_reloc| { + try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition_index, .{ .immediate = table.len - 1 }); + _ = try self.asmJccReloc(.a, else_reloc); + }, + } + { + const condition_index_reg = if (condition_index.isRegister()) + condition_index.getReg().? + else + try self.copyToTmpRegister(.usize, condition_index); + const condition_index_lock = self.register_manager.lockReg(condition_index_reg); + defer if (condition_index_lock) |lock| self.register_manager.unlockReg(lock); + try self.truncateRegister(condition_ty, condition_index_reg); + const ptr_size = @divExact(self.target.ptrBitWidth(), 8); + try self.asmMemory(.{ ._, .jmp }, .{ + .base = .table, + .mod = .{ .rm = .{ + .size = .ptr, + .index = registerAlias(condition_index_reg, ptr_size), + .scale = .fromFactor(@intCast(ptr_size)), + .disp = @intCast(table.start * ptr_size), + } }, + }); + } + + return self.finishAir(inst, .none, .{ br.operand, .none, .none }); + } + + const block_tracking = self.inst_tracking.getPtr(br.block_inst).?; done: { try self.getValue(block_tracking.short, null); const src_mcv = try self.resolveInst(br.operand); @@ -22543,6 +22786,7 @@ fn genSetMem( .none => .{ .immediate = @bitCast(@as(i64, disp)) }, .reg => |base_reg| .{ .register_offset = .{ .reg = base_reg, .off = disp } }, .frame => |base_frame_index| .{ .lea_frame = .{ .index = base_frame_index, .off = disp } }, + .table => unreachable, .reloc => |sym_index| .{ .lea_symbol = .{ .sym_index = sym_index, .off = disp } }, }; switch (src_mcv) { @@ -22652,6 +22896,7 @@ fn genSetMem( .index = frame_index, .off = disp, }).compare(.gte, src_align), + .table => unreachable, .reloc => false, })).write( self, @@ -23260,6 +23505,7 @@ fn airCmpxchg(self: *CodeGen, inst: Air.Inst.Index) !void { const ptr_lock = switch (ptr_mem.base) { .none, .frame, .reloc => null, .reg => |reg| self.register_manager.lockReg(reg), + .table => unreachable, }; defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock); @@ -23327,6 +23573,7 @@ fn atomicOp( const mem_lock = switch (ptr_mem.base) { .none, .frame, .reloc => null, .reg => |reg| self.register_manager.lockReg(reg), + .table => unreachable, }; defer if (mem_lock) |lock| self.register_manager.unlockReg(lock); diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index 6e0d75f883..bd5efec81c 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -10,22 +10,21 @@ prev_di_loc: Loc, /// Relative to the beginning of `code`. prev_di_pc: usize, -code_offset_mapping: std.AutoHashMapUnmanaged(Mir.Inst.Index, usize) = .empty, -relocs: std.ArrayListUnmanaged(Reloc) = .empty, - pub const Error = Lower.Error || error{ EmitFail, } || link.File.UpdateDebugInfoError; pub fn emitMir(emit: *Emit) Error!void { const gpa = emit.lower.bin_file.comp.gpa; + const code_offset_mapping = try emit.lower.allocator.alloc(u32, emit.lower.mir.instructions.len); + defer emit.lower.allocator.free(code_offset_mapping); + var relocs: std.ArrayListUnmanaged(Reloc) = .empty; + defer relocs.deinit(emit.lower.allocator); + var table_relocs: std.ArrayListUnmanaged(TableReloc) = .empty; + defer table_relocs.deinit(emit.lower.allocator); for (0..emit.lower.mir.instructions.len) |mir_i| { const mir_index: Mir.Inst.Index = @intCast(mir_i); - try emit.code_offset_mapping.putNoClobber( - emit.lower.allocator, - mir_index, - @intCast(emit.code.items.len), - ); + code_offset_mapping[mir_index] = @intCast(emit.code.items.len); const lowered = try emit.lower.lowerMir(mir_index); var lowered_relocs = lowered.relocs; for (lowered.insts, 0..) |lowered_inst, lowered_index| { @@ -89,13 +88,17 @@ pub fn emitMir(emit: *Emit) Error!void { lowered_relocs[0].lowered_inst_index == lowered_index) : ({ lowered_relocs = lowered_relocs[1..]; }) switch (lowered_relocs[0].target) { - .inst => |target| try emit.relocs.append(emit.lower.allocator, .{ + .inst => |target| try relocs.append(emit.lower.allocator, .{ .source = start_offset, .source_offset = end_offset - 4, .target = target, .target_offset = lowered_relocs[0].off, .length = @intCast(end_offset - start_offset), }), + .table => try table_relocs.append(emit.lower.allocator, .{ + .source_offset = end_offset - 4, + .target_offset = lowered_relocs[0].off, + }), .linker_extern_fn => |sym_index| if (emit.lower.bin_file.cast(.elf)) |elf_file| { // Add relocation to the decl. const zo = elf_file.zigObjectPtr().?; @@ -103,7 +106,7 @@ pub fn emitMir(emit: *Emit) Error!void { const r_type = @intFromEnum(std.elf.R_X86_64.PLT32); try atom_ptr.addReloc(gpa, .{ .r_offset = end_offset - 4, - .r_info = (@as(u64, @intCast(sym_index)) << 32) | r_type, + .r_info = @as(u64, sym_index) << 32 | r_type, .r_addend = lowered_relocs[0].off - 4, }, zo); } else if (emit.lower.bin_file.cast(.macho)) |macho_file| { @@ -150,7 +153,7 @@ pub fn emitMir(emit: *Emit) Error!void { const r_type = @intFromEnum(std.elf.R_X86_64.TLSLD); try atom.addReloc(gpa, .{ .r_offset = end_offset - 4, - .r_info = (@as(u64, @intCast(sym_index)) << 32) | r_type, + .r_info = @as(u64, sym_index) << 32 | r_type, .r_addend = lowered_relocs[0].off - 4, }, zo); }, @@ -161,7 +164,7 @@ pub fn emitMir(emit: *Emit) Error!void { const r_type = @intFromEnum(std.elf.R_X86_64.DTPOFF32); try atom.addReloc(gpa, .{ .r_offset = end_offset - 4, - .r_info = (@as(u64, @intCast(sym_index)) << 32) | r_type, + .r_info = @as(u64, sym_index) << 32 | r_type, .r_addend = lowered_relocs[0].off, }, zo); }, @@ -176,7 +179,7 @@ pub fn emitMir(emit: *Emit) Error!void { @intFromEnum(std.elf.R_X86_64.PC32); try atom.addReloc(gpa, .{ .r_offset = end_offset - 4, - .r_info = (@as(u64, @intCast(sym_index)) << 32) | r_type, + .r_info = @as(u64, sym_index) << 32 | r_type, .r_addend = lowered_relocs[0].off - 4, }, zo); } else { @@ -186,7 +189,7 @@ pub fn emitMir(emit: *Emit) Error!void { @intFromEnum(std.elf.R_X86_64.@"32"); try atom.addReloc(gpa, .{ .r_offset = end_offset - 4, - .r_info = (@as(u64, @intCast(sym_index)) << 32) | r_type, + .r_info = @as(u64, sym_index) << 32 | r_type, .r_addend = lowered_relocs[0].off, }, zo); } @@ -412,7 +415,7 @@ pub fn emitMir(emit: *Emit) Error!void { loc_buf[0] = switch (mem.base()) { .none => .{ .constu = 0 }, .reg => |reg| .{ .breg = reg.dwarfNum() }, - .frame => unreachable, + .frame, .table => unreachable, .reloc => |sym_index| .{ .addr = .{ .sym = sym_index } }, }; break :base &loc_buf[0]; @@ -463,13 +466,40 @@ pub fn emitMir(emit: *Emit) Error!void { } } } - try emit.fixupRelocs(); -} + { + // TODO this function currently assumes all relocs via JMP/CALL instructions are 32bit in size. + // This should be reversed like it is done in aarch64 MIR emit code: start with the smallest + // possible resolution, i.e., 8bit, and iteratively converge on the minimum required resolution + // until the entire decl is correctly emitted with all JMP/CALL instructions within range. + for (relocs.items) |reloc| { + const target = code_offset_mapping[reloc.target]; + const disp = @as(i64, @intCast(target)) - @as(i64, @intCast(reloc.source + reloc.length)) + reloc.target_offset; + std.mem.writeInt(i32, emit.code.items[reloc.source_offset..][0..4], @intCast(disp), .little); + } + } + if (emit.lower.mir.table.len > 0) { + if (emit.lower.bin_file.cast(.elf)) |elf_file| { + const zo = elf_file.zigObjectPtr().?; + const atom = zo.symbol(emit.atom_index).atom(elf_file).?; -pub fn deinit(emit: *Emit) void { - emit.relocs.deinit(emit.lower.allocator); - emit.code_offset_mapping.deinit(emit.lower.allocator); - emit.* = undefined; + const ptr_size = @divExact(emit.lower.target.ptrBitWidth(), 8); + var table_offset = std.mem.alignForward(u32, @intCast(emit.code.items.len), ptr_size); + for (table_relocs.items) |table_reloc| try atom.addReloc(gpa, .{ + .r_offset = table_reloc.source_offset, + .r_info = @as(u64, emit.atom_index) << 32 | @intFromEnum(std.elf.R_X86_64.@"32"), + .r_addend = @as(i64, table_offset) + table_reloc.target_offset, + }, zo); + for (emit.lower.mir.table) |entry| { + try atom.addReloc(gpa, .{ + .r_offset = table_offset, + .r_info = @as(u64, emit.atom_index) << 32 | @intFromEnum(std.elf.R_X86_64.@"64"), + .r_addend = code_offset_mapping[entry], + }, zo); + table_offset += ptr_size; + } + try emit.code.appendNTimes(gpa, 0, table_offset - emit.code.items.len); + } else unreachable; + } } fn fail(emit: *Emit, comptime format: []const u8, args: anytype) Error { @@ -481,7 +511,7 @@ fn fail(emit: *Emit, comptime format: []const u8, args: anytype) Error { const Reloc = struct { /// Offset of the instruction. - source: usize, + source: u32, /// Offset of the relocation within the instruction. source_offset: u32, /// Target of the relocation. @@ -492,18 +522,12 @@ const Reloc = struct { length: u5, }; -fn fixupRelocs(emit: *Emit) Error!void { - // TODO this function currently assumes all relocs via JMP/CALL instructions are 32bit in size. - // This should be reversed like it is done in aarch64 MIR emit code: start with the smallest - // possible resolution, i.e., 8bit, and iteratively converge on the minimum required resolution - // until the entire decl is correctly emitted with all JMP/CALL instructions within range. - for (emit.relocs.items) |reloc| { - const target = emit.code_offset_mapping.get(reloc.target) orelse - return emit.fail("JMP/CALL relocation target not found!", .{}); - const disp = @as(i64, @intCast(target)) - @as(i64, @intCast(reloc.source + reloc.length)) + reloc.target_offset; - std.mem.writeInt(i32, emit.code.items[reloc.source_offset..][0..4], @intCast(disp), .little); - } -} +const TableReloc = struct { + /// Offset of the relocation. + source_offset: u32, + /// Offset from the start of the table. + target_offset: i32, +}; const Loc = struct { line: u32, diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index bfe699a825..55582100ea 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -57,6 +57,7 @@ pub const Reloc = struct { const Target = union(enum) { inst: Mir.Inst.Index, + table, linker_reloc: u32, linker_tlsld: u32, linker_dtpoff: u32, @@ -348,7 +349,7 @@ pub fn fail(lower: *Lower, comptime format: []const u8, args: anytype) Error { return error.LowerFail; } -pub fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate { +pub fn imm(lower: *const Lower, ops: Mir.Inst.Ops, i: u32) Immediate { return switch (ops) { .rri_s, .ri_s, @@ -379,8 +380,16 @@ pub fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate { }; } -pub fn mem(lower: Lower, payload: u32) Memory { - return lower.mir.resolveFrameLoc(lower.mir.extraData(Mir.Memory, payload).data).decode(); +pub fn mem(lower: *Lower, payload: u32) Memory { + var m = lower.mir.resolveFrameLoc(lower.mir.extraData(Mir.Memory, payload).data).decode(); + switch (m) { + .sib => |*sib| switch (sib.base) { + else => {}, + .table => sib.disp = lower.reloc(.table, sib.disp).signed, + }, + else => {}, + } + return m; } fn reloc(lower: *Lower, target: Reloc.Target, off: i32) Immediate { diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index c5f29d3a0c..595f79e8dd 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -9,6 +9,7 @@ instructions: std.MultiArrayList(Inst).Slice, /// The meaning of this data is determined by `Inst.Tag` value. extra: []const u32, +table: []const Inst.Index, frame_locs: std.MultiArrayList(FrameLoc).Slice, pub const Inst = struct { @@ -1237,7 +1238,7 @@ pub const Memory = struct { size: bits.Memory.Size, index: Register, scale: bits.Memory.Scale, - _: u16 = undefined, + _: u15 = undefined, }; pub fn encode(mem: bits.Memory) Memory { @@ -1260,7 +1261,7 @@ pub const Memory = struct { }, }, .base = switch (mem.base) { - .none => undefined, + .none, .table => undefined, .reg => |reg| @intFromEnum(reg), .frame => |frame_index| @intFromEnum(frame_index), .reloc => |sym_index| sym_index, @@ -1289,6 +1290,7 @@ pub const Memory = struct { .none => .none, .reg => .{ .reg = @enumFromInt(mem.base) }, .frame => .{ .frame = @enumFromInt(mem.base) }, + .table => .table, .reloc => .{ .reloc = mem.base }, }, .scale_index = switch (mem.info.index) { @@ -1317,6 +1319,7 @@ pub const Memory = struct { pub fn deinit(mir: *Mir, gpa: std.mem.Allocator) void { mir.instructions.deinit(gpa); gpa.free(mir.extra); + gpa.free(mir.table); mir.frame_locs.deinit(gpa); mir.* = undefined; } @@ -1352,7 +1355,7 @@ pub fn resolveFrameAddr(mir: Mir, frame_addr: bits.FrameAddr) bits.RegisterOffse pub fn resolveFrameLoc(mir: Mir, mem: Memory) Memory { return switch (mem.info.base) { - .none, .reg, .reloc => mem, + .none, .reg, .table, .reloc => mem, .frame => if (mir.frame_locs.len > 0) .{ .info = .{ .base = .reg, diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig index 500dc488e6..8f13620730 100644 --- a/src/arch/x86_64/bits.zig +++ b/src/arch/x86_64/bits.zig @@ -482,17 +482,18 @@ pub const Memory = struct { base: Base = .none, mod: Mod = .{ .rm = .{} }, - pub const Base = union(enum(u2)) { + pub const Base = union(enum(u3)) { none, reg: Register, frame: FrameIndex, + table, reloc: u32, pub const Tag = @typeInfo(Base).@"union".tag_type.?; pub fn isExtended(self: Base) bool { return switch (self) { - .none, .frame, .reloc => false, // rsp, rbp, and rip are not extended + .none, .frame, .table, .reloc => false, // rsp, rbp, and rip are not extended .reg => |reg| reg.isExtended(), }; } diff --git a/src/arch/x86_64/encoder.zig b/src/arch/x86_64/encoder.zig index 048fb6508d..bf0c0c0467 100644 --- a/src/arch/x86_64/encoder.zig +++ b/src/arch/x86_64/encoder.zig @@ -138,7 +138,7 @@ pub const Instruction = struct { .moffs => true, .rip => false, .sib => |s| switch (s.base) { - .none, .frame, .reloc => false, + .none, .frame, .table, .reloc => false, .reg => |reg| reg.class() == .segment, }, }; @@ -161,9 +161,9 @@ pub const Instruction = struct { pub fn disp(mem: Memory) Immediate { return switch (mem) { - .sib => |s| Immediate.s(s.disp), - .rip => |r| Immediate.s(r.disp), - .moffs => |m| Immediate.u(m.offset), + .sib => |s| .s(s.disp), + .rip => |r| .s(r.disp), + .moffs => |m| .u(m.offset), }; } @@ -277,6 +277,7 @@ pub const Instruction = struct { .none => any = false, .reg => |reg| try writer.print("{s}", .{@tagName(reg)}), .frame => |frame_index| try writer.print("{}", .{frame_index}), + .table => try writer.print("Table", .{}), .reloc => |sym_index| try writer.print("Symbol({d})", .{sym_index}), } if (mem.scaleIndex()) |si| { @@ -614,7 +615,7 @@ pub const Instruction = struct { switch (mem) { .moffs => unreachable, .sib => |sib| switch (sib.base) { - .none => { + .none, .table => { try encoder.modRm_SIBDisp0(operand_enc); if (mem.scaleIndex()) |si| { const scale = math.log2_int(u4, si.scale); @@ -1191,7 +1192,7 @@ const TestEncode = struct { ) !void { var stream = std.io.fixedBufferStream(&enc.buffer); var count_writer = std.io.countingWriter(stream.writer()); - const inst = try Instruction.new(.none, mnemonic, ops); + const inst: Instruction = try .new(.none, mnemonic, ops); try inst.encode(count_writer.writer(), .{}); enc.index = count_writer.bytes_written; } @@ -1205,9 +1206,9 @@ test "encode" { var buf = std.ArrayList(u8).init(testing.allocator); defer buf.deinit(); - const inst = try Instruction.new(.none, .mov, &.{ + const inst: Instruction = try .new(.none, .mov, &.{ .{ .reg = .rbx }, - .{ .imm = Instruction.Immediate.u(4) }, + .{ .imm = .u(4) }, }); try inst.encode(buf.writer(), .{}); try testing.expectEqualSlices(u8, &.{ 0x48, 0xc7, 0xc3, 0x4, 0x0, 0x0, 0x0 }, buf.items); @@ -1217,47 +1218,47 @@ test "lower I encoding" { var enc = TestEncode{}; try enc.encode(.push, &.{ - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings("\x6A\x10", enc.code(), "push 0x10"); try enc.encode(.push, &.{ - .{ .imm = Instruction.Immediate.u(0x1000) }, + .{ .imm = .u(0x1000) }, }); try expectEqualHexStrings("\x66\x68\x00\x10", enc.code(), "push 0x1000"); try enc.encode(.push, &.{ - .{ .imm = Instruction.Immediate.u(0x10000000) }, + .{ .imm = .u(0x10000000) }, }); try expectEqualHexStrings("\x68\x00\x00\x00\x10", enc.code(), "push 0x10000000"); try enc.encode(.adc, &.{ .{ .reg = .rax }, - .{ .imm = Instruction.Immediate.u(0x10000000) }, + .{ .imm = .u(0x10000000) }, }); try expectEqualHexStrings("\x48\x15\x00\x00\x00\x10", enc.code(), "adc rax, 0x10000000"); try enc.encode(.add, &.{ .{ .reg = .al }, - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings("\x04\x10", enc.code(), "add al, 0x10"); try enc.encode(.add, &.{ .{ .reg = .rax }, - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings("\x48\x83\xC0\x10", enc.code(), "add rax, 0x10"); try enc.encode(.sbb, &.{ .{ .reg = .ax }, - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings("\x66\x1D\x10\x00", enc.code(), "sbb ax, 0x10"); try enc.encode(.xor, &.{ .{ .reg = .al }, - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings("\x34\x10", enc.code(), "xor al, 0x10"); } @@ -1267,43 +1268,43 @@ test "lower MI encoding" { try enc.encode(.mov, &.{ .{ .reg = .r12 }, - .{ .imm = Instruction.Immediate.u(0x1000) }, + .{ .imm = .u(0x1000) }, }); try expectEqualHexStrings("\x49\xC7\xC4\x00\x10\x00\x00", enc.code(), "mov r12, 0x1000"); try enc.encode(.mov, &.{ .{ .mem = Instruction.Memory.initSib(.byte, .{ .base = .{ .reg = .r12 } }) }, - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings("\x41\xC6\x04\x24\x10", enc.code(), "mov BYTE PTR [r12], 0x10"); try enc.encode(.mov, &.{ .{ .reg = .r12 }, - .{ .imm = Instruction.Immediate.u(0x1000) }, + .{ .imm = .u(0x1000) }, }); try expectEqualHexStrings("\x49\xC7\xC4\x00\x10\x00\x00", enc.code(), "mov r12, 0x1000"); try enc.encode(.mov, &.{ .{ .reg = .r12 }, - .{ .imm = Instruction.Immediate.u(0x1000) }, + .{ .imm = .u(0x1000) }, }); try expectEqualHexStrings("\x49\xC7\xC4\x00\x10\x00\x00", enc.code(), "mov r12, 0x1000"); try enc.encode(.mov, &.{ .{ .reg = .rax }, - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings("\x48\xc7\xc0\x10\x00\x00\x00", enc.code(), "mov rax, 0x10"); try enc.encode(.mov, &.{ .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .r11 } }) }, - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings("\x41\xc7\x03\x10\x00\x00\x00", enc.code(), "mov DWORD PTR [r11], 0x10"); try enc.encode(.mov, &.{ .{ .mem = Instruction.Memory.initRip(.qword, 0x10) }, - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings( "\x48\xC7\x05\x10\x00\x00\x00\x10\x00\x00\x00", @@ -1313,19 +1314,19 @@ test "lower MI encoding" { try enc.encode(.mov, &.{ .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .rbp }, .disp = -8 }) }, - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings("\x48\xc7\x45\xf8\x10\x00\x00\x00", enc.code(), "mov QWORD PTR [rbp - 8], 0x10"); try enc.encode(.mov, &.{ .{ .mem = Instruction.Memory.initSib(.word, .{ .base = .{ .reg = .rbp }, .disp = -2 }) }, - .{ .imm = Instruction.Immediate.s(-16) }, + .{ .imm = .s(-16) }, }); try expectEqualHexStrings("\x66\xC7\x45\xFE\xF0\xFF", enc.code(), "mov WORD PTR [rbp - 2], -16"); try enc.encode(.mov, &.{ .{ .mem = Instruction.Memory.initSib(.byte, .{ .base = .{ .reg = .rbp }, .disp = -1 }) }, - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings("\xC6\x45\xFF\x10", enc.code(), "mov BYTE PTR [rbp - 1], 0x10"); @@ -1335,7 +1336,7 @@ test "lower MI encoding" { .disp = 0x10000000, .scale_index = .{ .scale = 2, .index = .rcx }, }) }, - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings( "\x48\xC7\x04\x4D\x00\x00\x00\x10\x10\x00\x00\x00", @@ -1345,43 +1346,43 @@ test "lower MI encoding" { try enc.encode(.adc, &.{ .{ .mem = Instruction.Memory.initSib(.byte, .{ .base = .{ .reg = .rbp }, .disp = -0x10 }) }, - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings("\x80\x55\xF0\x10", enc.code(), "adc BYTE PTR [rbp - 0x10], 0x10"); try enc.encode(.adc, &.{ .{ .mem = Instruction.Memory.initRip(.qword, 0) }, - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings("\x48\x83\x15\x00\x00\x00\x00\x10", enc.code(), "adc QWORD PTR [rip], 0x10"); try enc.encode(.adc, &.{ .{ .reg = .rax }, - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings("\x48\x83\xD0\x10", enc.code(), "adc rax, 0x10"); try enc.encode(.add, &.{ .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .rdx }, .disp = -8 }) }, - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings("\x83\x42\xF8\x10", enc.code(), "add DWORD PTR [rdx - 8], 0x10"); try enc.encode(.add, &.{ .{ .reg = .rax }, - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings("\x48\x83\xC0\x10", enc.code(), "add rax, 0x10"); try enc.encode(.add, &.{ .{ .mem = Instruction.Memory.initSib(.qword, .{ .base = .{ .reg = .rbp }, .disp = -0x10 }) }, - .{ .imm = Instruction.Immediate.s(-0x10) }, + .{ .imm = .s(-0x10) }, }); try expectEqualHexStrings("\x48\x83\x45\xF0\xF0", enc.code(), "add QWORD PTR [rbp - 0x10], -0x10"); try enc.encode(.@"and", &.{ .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .ds }, .disp = 0x10000000 }) }, - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings( "\x83\x24\x25\x00\x00\x00\x10\x10", @@ -1391,7 +1392,7 @@ test "lower MI encoding" { try enc.encode(.@"and", &.{ .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .es }, .disp = 0x10000000 }) }, - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings( "\x26\x83\x24\x25\x00\x00\x00\x10\x10", @@ -1401,7 +1402,7 @@ test "lower MI encoding" { try enc.encode(.@"and", &.{ .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .r12 }, .disp = 0x10000000 }) }, - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings( "\x41\x83\xA4\x24\x00\x00\x00\x10\x10", @@ -1411,7 +1412,7 @@ test "lower MI encoding" { try enc.encode(.sub, &.{ .{ .mem = Instruction.Memory.initSib(.dword, .{ .base = .{ .reg = .r11 }, .disp = 0x10000000 }) }, - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings( "\x41\x83\xAB\x00\x00\x00\x10\x10", @@ -1630,14 +1631,14 @@ test "lower RMI encoding" { try enc.encode(.imul, &.{ .{ .reg = .r11 }, .{ .reg = .r12 }, - .{ .imm = Instruction.Immediate.s(-2) }, + .{ .imm = .s(-2) }, }); try expectEqualHexStrings("\x4D\x6B\xDC\xFE", enc.code(), "imul r11, r12, -2"); try enc.encode(.imul, &.{ .{ .reg = .r11 }, .{ .mem = Instruction.Memory.initRip(.qword, -16) }, - .{ .imm = Instruction.Immediate.s(-1024) }, + .{ .imm = .s(-1024) }, }); try expectEqualHexStrings( "\x4C\x69\x1D\xF0\xFF\xFF\xFF\x00\xFC\xFF\xFF", @@ -1648,7 +1649,7 @@ test "lower RMI encoding" { try enc.encode(.imul, &.{ .{ .reg = .bx }, .{ .mem = Instruction.Memory.initSib(.word, .{ .base = .{ .reg = .rbp }, .disp = -16 }) }, - .{ .imm = Instruction.Immediate.s(-1024) }, + .{ .imm = .s(-1024) }, }); try expectEqualHexStrings( "\x66\x69\x5D\xF0\x00\xFC", @@ -1659,7 +1660,7 @@ test "lower RMI encoding" { try enc.encode(.imul, &.{ .{ .reg = .bx }, .{ .mem = Instruction.Memory.initSib(.word, .{ .base = .{ .reg = .rbp }, .disp = -16 }) }, - .{ .imm = Instruction.Immediate.u(1024) }, + .{ .imm = .u(1024) }, }); try expectEqualHexStrings( "\x66\x69\x5D\xF0\x00\x04", @@ -1775,7 +1776,7 @@ test "lower M encoding" { try expectEqualHexStrings("\x65\xFF\x14\x25\x00\x00\x00\x00", enc.code(), "call gs:0x0"); try enc.encode(.call, &.{ - .{ .imm = Instruction.Immediate.s(0) }, + .{ .imm = .s(0) }, }); try expectEqualHexStrings("\xE8\x00\x00\x00\x00", enc.code(), "call 0x0"); @@ -1834,7 +1835,7 @@ test "lower OI encoding" { try enc.encode(.mov, &.{ .{ .reg = .rax }, - .{ .imm = Instruction.Immediate.u(0x1000000000000000) }, + .{ .imm = .u(0x1000000000000000) }, }); try expectEqualHexStrings( "\x48\xB8\x00\x00\x00\x00\x00\x00\x00\x10", @@ -1844,7 +1845,7 @@ test "lower OI encoding" { try enc.encode(.mov, &.{ .{ .reg = .r11 }, - .{ .imm = Instruction.Immediate.u(0x1000000000000000) }, + .{ .imm = .u(0x1000000000000000) }, }); try expectEqualHexStrings( "\x49\xBB\x00\x00\x00\x00\x00\x00\x00\x10", @@ -1854,19 +1855,19 @@ test "lower OI encoding" { try enc.encode(.mov, &.{ .{ .reg = .r11d }, - .{ .imm = Instruction.Immediate.u(0x10000000) }, + .{ .imm = .u(0x10000000) }, }); try expectEqualHexStrings("\x41\xBB\x00\x00\x00\x10", enc.code(), "mov r11d, 0x10000000"); try enc.encode(.mov, &.{ .{ .reg = .r11w }, - .{ .imm = Instruction.Immediate.u(0x1000) }, + .{ .imm = .u(0x1000) }, }); try expectEqualHexStrings("\x66\x41\xBB\x00\x10", enc.code(), "mov r11w, 0x1000"); try enc.encode(.mov, &.{ .{ .reg = .r11b }, - .{ .imm = Instruction.Immediate.u(0x10) }, + .{ .imm = .u(0x10) }, }); try expectEqualHexStrings("\x41\xB3\x10", enc.code(), "mov r11b, 0x10"); } @@ -1940,7 +1941,7 @@ test "lower NP encoding" { } fn invalidInstruction(mnemonic: Instruction.Mnemonic, ops: []const Instruction.Operand) !void { - const err = Instruction.new(.none, mnemonic, ops); + const err: Instruction = .new(.none, mnemonic, ops); try testing.expectError(error.InvalidInstruction, err); } @@ -1988,12 +1989,12 @@ test "invalid instruction" { .{ .reg = .r12d }, }); try invalidInstruction(.push, &.{ - .{ .imm = Instruction.Immediate.u(0x1000000000000000) }, + .{ .imm = .u(0x1000000000000000) }, }); } fn cannotEncode(mnemonic: Instruction.Mnemonic, ops: []const Instruction.Operand) !void { - try testing.expectError(error.CannotEncode, Instruction.new(.none, mnemonic, ops)); + try testing.expectError(error.CannotEncode, .new(.none, mnemonic, ops)); } test "cannot encode" { @@ -2177,7 +2178,7 @@ const Assembler = struct { pub fn assemble(as: *Assembler, writer: anytype) !void { while (try as.next()) |parsed_inst| { - const inst = try Instruction.new(.none, parsed_inst.mnemonic, &parsed_inst.ops); + const inst: Instruction = try .new(.none, parsed_inst.mnemonic, &parsed_inst.ops); try inst.encode(writer, .{}); } } diff --git a/src/link/MachO.zig b/src/link/MachO.zig index e36fd4e80a..a5d4379004 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -3548,7 +3548,7 @@ pub fn getTarget(self: MachO) std.Target { pub fn invalidateKernelCache(dir: fs.Dir, sub_path: []const u8) !void { const tracy = trace(@src()); defer tracy.end(); - if (comptime builtin.target.isDarwin() and builtin.target.cpu.arch == .aarch64) { + if (builtin.target.isDarwin() and builtin.target.cpu.arch == .aarch64) { try dir.copyFile(sub_path, dir, sub_path, .{}); } } diff --git a/test/behavior/align.zig b/test/behavior/align.zig index e1b8e3a18f..e6917eb649 100644 --- a/test/behavior/align.zig +++ b/test/behavior/align.zig @@ -277,8 +277,8 @@ test "function alignment" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO - // function alignment is a compile error on wasm32/wasm64 - if (native_arch == .wasm32 or native_arch == .wasm64) return error.SkipZigTest; + // function alignment is a compile error on wasm + if (native_arch.isWasm()) return error.SkipZigTest; const S = struct { fn alignExpr() align(@sizeOf(usize) * 2) i32 { @@ -307,8 +307,8 @@ test "implicitly decreasing fn alignment" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - // function alignment is a compile error on wasm32/wasm64 - if (native_arch == .wasm32 or native_arch == .wasm64) return error.SkipZigTest; + // function alignment is a compile error on wasm + if (native_arch.isWasm()) return error.SkipZigTest; try testImplicitlyDecreaseFnAlign(alignedSmall, 1234); try testImplicitlyDecreaseFnAlign(alignedBig, 5678); @@ -331,9 +331,9 @@ test "@alignCast functions" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - // function alignment is a compile error on wasm32/wasm64 - if (native_arch == .wasm32 or native_arch == .wasm64) return error.SkipZigTest; - if (native_arch == .thumb or native_arch == .thumbeb) return error.SkipZigTest; + // function alignment is a compile error on wasm + if (native_arch.isWasm()) return error.SkipZigTest; + if (native_arch.isThumb()) return error.SkipZigTest; try expect(fnExpectsOnly1(simple4) == 0x19); } @@ -496,9 +496,9 @@ test "align(N) on functions" { return error.SkipZigTest; } - // function alignment is a compile error on wasm32/wasm64 - if (native_arch == .wasm32 or native_arch == .wasm64) return error.SkipZigTest; - if (native_arch == .thumb or native_arch == .thumbeb) return error.SkipZigTest; + // function alignment is a compile error on wasm + if (native_arch.isWasm()) return error.SkipZigTest; + if (native_arch.isThumb()) return error.SkipZigTest; try expect((@intFromPtr(&overaligned_fn) & (0x1000 - 1)) == 0); } diff --git a/test/behavior/asm.zig b/test/behavior/asm.zig index e82242f425..992f18282e 100644 --- a/test/behavior/asm.zig +++ b/test/behavior/asm.zig @@ -178,7 +178,7 @@ test "rw constraint (x86_64)" { } test "asm modifiers (AArch64)" { - if (builtin.target.cpu.arch != .aarch64) return error.SkipZigTest; + if (!builtin.target.cpu.arch.isAARCH64()) return error.SkipZigTest; if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_c and builtin.os.tag == .windows) return error.SkipZigTest; // MSVC doesn't support inline assembly diff --git a/test/behavior/call.zig b/test/behavior/call.zig index ed0e07a85b..c8239ac53e 100644 --- a/test/behavior/call.zig +++ b/test/behavior/call.zig @@ -660,6 +660,7 @@ test "arguments pointed to on stack into tailcall" { switch (builtin.cpu.arch) { .wasm32, + .wasm64, .mips, .mipsel, .mips64, diff --git a/test/behavior/cast.zig b/test/behavior/cast.zig index 37db3ba941..84c634bb0e 100644 --- a/test/behavior/cast.zig +++ b/test/behavior/cast.zig @@ -124,7 +124,7 @@ test "@floatFromInt(f80)" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_c and comptime builtin.cpu.arch.isArm()) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest; if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; @@ -1362,7 +1362,7 @@ test "cast f16 to wider types" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_c and comptime builtin.cpu.arch.isArm()) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest; if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; diff --git a/test/behavior/eval.zig b/test/behavior/eval.zig index dd3de9bb9f..4c67d29273 100644 --- a/test/behavior/eval.zig +++ b/test/behavior/eval.zig @@ -522,7 +522,7 @@ test "runtime 128 bit integer division" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_c and comptime builtin.cpu.arch.isArm()) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest; if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; diff --git a/test/behavior/math.zig b/test/behavior/math.zig index 789eeaef66..ffd0310ab9 100644 --- a/test/behavior/math.zig +++ b/test/behavior/math.zig @@ -785,7 +785,7 @@ test "128-bit multiplication" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_c and comptime builtin.cpu.arch.isArm()) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; { @@ -1374,7 +1374,7 @@ test "remainder division" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_c and comptime builtin.cpu.arch.isArm()) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_llvm and builtin.os.tag == .windows) { @@ -1527,7 +1527,7 @@ test "@round f80" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_c and comptime builtin.cpu.arch.isArm()) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest; if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; @@ -1540,7 +1540,7 @@ test "@round f128" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_c and comptime builtin.cpu.arch.isArm()) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest; if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; diff --git a/test/behavior/maximum_minimum.zig b/test/behavior/maximum_minimum.zig index d7d494a9ad..53b3d92406 100644 --- a/test/behavior/maximum_minimum.zig +++ b/test/behavior/maximum_minimum.zig @@ -122,7 +122,7 @@ test "@min/max for floats" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_c and comptime builtin.cpu.arch.isArm()) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest; if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; diff --git a/test/behavior/muladd.zig b/test/behavior/muladd.zig index ec07f203ec..2bebdd30f0 100644 --- a/test/behavior/muladd.zig +++ b/test/behavior/muladd.zig @@ -58,7 +58,7 @@ test "@mulAdd f80" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_c and comptime builtin.cpu.arch.isArm()) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest; if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; @@ -79,7 +79,7 @@ test "@mulAdd f128" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_c and comptime builtin.cpu.arch.isArm()) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest; if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; @@ -189,7 +189,7 @@ test "vector f80" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_c and comptime builtin.cpu.arch.isArm()) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; try comptime vector80(); @@ -216,7 +216,7 @@ test "vector f128" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_c and comptime builtin.cpu.arch.isArm()) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; try comptime vector128(); diff --git a/test/behavior/saturating_arithmetic.zig b/test/behavior/saturating_arithmetic.zig index ea3d51f3e6..d93899ad48 100644 --- a/test/behavior/saturating_arithmetic.zig +++ b/test/behavior/saturating_arithmetic.zig @@ -164,10 +164,10 @@ test "saturating multiplication <= 32 bits" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_c and comptime builtin.cpu.arch.isArm()) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .wasm32) { + if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch.isWasm()) { // https://github.com/ziglang/zig/issues/9660 return error.SkipZigTest; } @@ -264,10 +264,10 @@ test "saturating multiplication" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_c and comptime builtin.cpu.arch.isArm()) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .wasm32) { + if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch.isWasm()) { // https://github.com/ziglang/zig/issues/9660 return error.SkipZigTest; } @@ -311,7 +311,7 @@ test "saturating shift-left" { try testSatShl(i8, 127, 1, 127); try testSatShl(i8, -128, 1, -128); // TODO: remove this check once #9668 is completed - if (builtin.cpu.arch != .wasm32) { + if (!builtin.cpu.arch.isWasm()) { // skip testing ints > 64 bits on wasm due to miscompilation / wasmtime ci error try testSatShl(i128, maxInt(i128), 64, maxInt(i128)); try testSatShl(u128, maxInt(u128), 64, maxInt(u128)); diff --git a/test/behavior/struct.zig b/test/behavior/struct.zig index 74a77f3e21..95a31326ff 100644 --- a/test/behavior/struct.zig +++ b/test/behavior/struct.zig @@ -418,8 +418,8 @@ test "packed struct 24bits" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.cpu.arch == .wasm32) return error.SkipZigTest; // TODO - if (comptime builtin.cpu.arch.isArm()) return error.SkipZigTest; // TODO + if (builtin.cpu.arch.isWasm()) return error.SkipZigTest; // TODO + if (builtin.cpu.arch.isArm()) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; @@ -818,7 +818,7 @@ test "non-packed struct with u128 entry in union" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_c and comptime builtin.cpu.arch.isArm()) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; const U = union(enum) { @@ -941,7 +941,7 @@ test "tuple assigned to variable" { test "comptime struct field" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (comptime builtin.cpu.arch.isArm()) return error.SkipZigTest; // TODO + if (builtin.cpu.arch.isArm()) return error.SkipZigTest; // TODO const T = struct { a: i32, diff --git a/test/behavior/var_args.zig b/test/behavior/var_args.zig index b5370b7813..c4b92f9473 100644 --- a/test/behavior/var_args.zig +++ b/test/behavior/var_args.zig @@ -100,7 +100,7 @@ test "simple variadic function" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; - if (builtin.os.tag != .macos and comptime builtin.cpu.arch.isAARCH64()) { + if (builtin.os.tag != .macos and builtin.cpu.arch.isAARCH64()) { // https://github.com/ziglang/zig/issues/14096 return error.SkipZigTest; } @@ -161,7 +161,7 @@ test "coerce reference to var arg" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; - if (builtin.os.tag != .macos and comptime builtin.cpu.arch.isAARCH64()) { + if (builtin.os.tag != .macos and builtin.cpu.arch.isAARCH64()) { // https://github.com/ziglang/zig/issues/14096 return error.SkipZigTest; } @@ -194,7 +194,7 @@ test "variadic functions" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; - if (builtin.os.tag != .macos and comptime builtin.cpu.arch.isAARCH64()) { + if (builtin.os.tag != .macos and builtin.cpu.arch.isAARCH64()) { // https://github.com/ziglang/zig/issues/14096 return error.SkipZigTest; } @@ -239,7 +239,7 @@ test "copy VaList" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; - if (builtin.os.tag != .macos and comptime builtin.cpu.arch.isAARCH64()) { + if (builtin.os.tag != .macos and builtin.cpu.arch.isAARCH64()) { // https://github.com/ziglang/zig/issues/14096 return error.SkipZigTest; } @@ -273,7 +273,7 @@ test "unused VaList arg" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; - if (builtin.os.tag != .macos and comptime builtin.cpu.arch.isAARCH64()) { + if (builtin.os.tag != .macos and builtin.cpu.arch.isAARCH64()) { // https://github.com/ziglang/zig/issues/14096 return error.SkipZigTest; } diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index 6b03ac90e3..6af4b5b4b6 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -101,7 +101,7 @@ test "vector float operators" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_c and comptime builtin.cpu.arch.isArm()) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest; if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .aarch64) { @@ -754,7 +754,7 @@ test "vector reduce operation" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_c and comptime builtin.cpu.arch.isArm()) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; if (builtin.cpu.arch.isMIPS64()) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/21091 @@ -989,7 +989,7 @@ test "saturating multiplication" { if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; // TODO: once #9660 has been solved, remove this line - if (builtin.target.cpu.arch == .wasm32) return error.SkipZigTest; + if (builtin.target.cpu.arch.isWasm()) return error.SkipZigTest; const S = struct { fn doTheTest() !void { @@ -1256,7 +1256,7 @@ test "byte vector initialized in inline function" { if (builtin.cpu.arch == .aarch64_be and builtin.zig_backend == .stage2_llvm) return error.SkipZigTest; if (comptime builtin.zig_backend == .stage2_llvm and builtin.cpu.arch == .x86_64 and - builtin.cpu.features.isEnabled(@intFromEnum(std.Target.x86.Feature.avx512f))) + std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f)) { // TODO https://github.com/ziglang/zig/issues/13279 return error.SkipZigTest; diff --git a/test/behavior/wrapping_arithmetic.zig b/test/behavior/wrapping_arithmetic.zig index 958be2f6f0..f1fbc0de51 100644 --- a/test/behavior/wrapping_arithmetic.zig +++ b/test/behavior/wrapping_arithmetic.zig @@ -83,7 +83,7 @@ test "wrapping multiplication" { if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; // TODO: once #9660 has been solved, remove this line - if (builtin.cpu.arch == .wasm32) return error.SkipZigTest; + if (builtin.cpu.arch.isWasm()) return error.SkipZigTest; const S = struct { fn doTheTest() !void { From df7661b8d31f2a49ea2679d6edc433cc1ab53e80 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Thu, 2 Jan 2025 04:42:27 -0500 Subject: [PATCH 11/25] x86_64: optimize value copying slightly --- src/arch/x86_64/CodeGen.zig | 77 ++++++++++++++++++++++++++++++++----- 1 file changed, 68 insertions(+), 9 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 79e69c7c07..2dc25851df 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -327,6 +327,13 @@ pub const MCValue = union(enum) { }; } + fn isAddress(mcv: MCValue) bool { + return switch (mcv) { + .immediate, .register, .register_offset, .lea_frame => true, + else => false, + }; + } + fn address(mcv: MCValue) MCValue { return switch (mcv) { .none, @@ -23005,17 +23012,69 @@ fn genSetMem( try self.genSetMem(base, disp, ty, .{ .register = src_reg }, opts); }, - else => try self.genInlineMemcpy( - dst_ptr_mcv, - src_mcv.address(), - .{ .immediate = abi_size }, - ), + else => try self.genInlineMemcpy(dst_ptr_mcv, src_mcv.address(), .{ .immediate = abi_size }, .{ .no_alias = true }), }, .air_ref => |src_ref| try self.genSetMem(base, disp, ty, try self.resolveInst(src_ref), opts), } } -fn genInlineMemcpy(self: *CodeGen, dst_ptr: MCValue, src_ptr: MCValue, len: MCValue) InnerError!void { +fn genInlineMemcpy(self: *CodeGen, dst_ptr: MCValue, src_ptr: MCValue, len: MCValue, opts: struct { + no_alias: bool, +}) InnerError!void { + if (opts.no_alias and dst_ptr.isAddress() and src_ptr.isAddress()) switch (len) { + else => {}, + .immediate => |len_imm| switch (len_imm) { + else => {}, + 1 => if (self.register_manager.tryAllocReg(null, abi.RegisterClass.gp)) |reg| { + try self.asmRegisterMemory(.{ ._, .mov }, reg.to8(), try src_ptr.deref().mem(self, .{ .size = .byte })); + try self.asmMemoryRegister(.{ ._, .mov }, try dst_ptr.deref().mem(self, .{ .size = .byte }), reg.to8()); + return; + }, + 2 => if (self.register_manager.tryAllocReg(null, abi.RegisterClass.gp)) |reg| { + try self.asmRegisterMemory(.{ ._, .mov }, reg.to16(), try src_ptr.deref().mem(self, .{ .size = .word })); + try self.asmMemoryRegister(.{ ._, .mov }, try dst_ptr.deref().mem(self, .{ .size = .word }), reg.to16()); + return; + }, + 4 => if (self.register_manager.tryAllocReg(null, abi.RegisterClass.gp)) |reg| { + try self.asmRegisterMemory(.{ ._, .mov }, reg.to32(), try src_ptr.deref().mem(self, .{ .size = .dword })); + try self.asmMemoryRegister(.{ ._, .mov }, try dst_ptr.deref().mem(self, .{ .size = .dword }), reg.to32()); + return; + }, + 8 => if (self.target.cpu.arch == .x86_64) { + if (self.register_manager.tryAllocReg(null, abi.RegisterClass.gp)) |reg| { + try self.asmRegisterMemory(.{ ._, .mov }, reg.to64(), try src_ptr.deref().mem(self, .{ .size = .qword })); + try self.asmMemoryRegister(.{ ._, .mov }, try dst_ptr.deref().mem(self, .{ .size = .qword }), reg.to64()); + return; + } + }, + 16 => if (self.hasFeature(.avx)) { + if (self.register_manager.tryAllocReg(null, abi.RegisterClass.sse)) |reg| { + try self.asmRegisterMemory(.{ .v_dqu, .mov }, reg.to128(), try src_ptr.deref().mem(self, .{ .size = .xword })); + try self.asmMemoryRegister(.{ .v_dqu, .mov }, try dst_ptr.deref().mem(self, .{ .size = .xword }), reg.to128()); + return; + } + } else if (self.hasFeature(.sse2)) { + if (self.register_manager.tryAllocReg(null, abi.RegisterClass.sse)) |reg| { + try self.asmRegisterMemory(.{ ._dqu, .mov }, reg.to128(), try src_ptr.deref().mem(self, .{ .size = .xword })); + try self.asmMemoryRegister(.{ ._dqu, .mov }, try dst_ptr.deref().mem(self, .{ .size = .xword }), reg.to128()); + return; + } + } else if (self.hasFeature(.sse)) { + if (self.register_manager.tryAllocReg(null, abi.RegisterClass.sse)) |reg| { + try self.asmRegisterMemory(.{ ._ps, .movu }, reg.to128(), try src_ptr.deref().mem(self, .{ .size = .xword })); + try self.asmMemoryRegister(.{ ._ps, .movu }, try dst_ptr.deref().mem(self, .{ .size = .xword }), reg.to128()); + return; + } + }, + 32 => if (self.hasFeature(.avx)) { + if (self.register_manager.tryAllocReg(null, abi.RegisterClass.sse)) |reg| { + try self.asmRegisterMemory(.{ .v_dqu, .mov }, reg.to256(), try src_ptr.deref().mem(self, .{ .size = .yword })); + try self.asmMemoryRegister(.{ .v_dqu, .mov }, try dst_ptr.deref().mem(self, .{ .size = .yword }), reg.to256()); + return; + } + }, + }, + }; try self.spillRegisters(&.{ .rsi, .rdi, .rcx }); try self.genSetReg(.rsi, .usize, src_ptr, .{}); try self.genSetReg(.rdi, .usize, dst_ptr, .{}); @@ -24057,7 +24116,7 @@ fn airMemset(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void { len_reg, .s(elem_abi_size), ); - try self.genInlineMemcpy(second_elem_ptr_mcv, dst_ptr, len_mcv); + try self.genInlineMemcpy(second_elem_ptr_mcv, dst_ptr, len_mcv, .{ .no_alias = false }); self.performReloc(skip_reloc); }, @@ -24082,7 +24141,7 @@ fn airMemset(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void { } }, .{}); const bytes_to_copy: MCValue = .{ .immediate = elem_abi_size * (len - 1) }; - try self.genInlineMemcpy(second_elem_ptr_mcv, dst, bytes_to_copy); + try self.genInlineMemcpy(second_elem_ptr_mcv, dst, bytes_to_copy, .{ .no_alias = false }); }, .c, .many => unreachable, } @@ -24165,7 +24224,7 @@ fn airMemcpy(self: *CodeGen, inst: Air.Inst.Index) !void { else => src, }; - try self.genInlineMemcpy(dst_ptr, src_ptr, len); + try self.genInlineMemcpy(dst_ptr, src_ptr, len, .{ .no_alias = true }); return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none }); } From 7f22c41e09741de8cd7d87b938c0b15a9b4bd32e Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Thu, 2 Jan 2025 22:01:13 -0500 Subject: [PATCH 12/25] x86_64: add some ReleaseSmall support --- src/arch/x86_64/CodeGen.zig | 69 ++++++++++++++++++++++++++++--------- 1 file changed, 52 insertions(+), 17 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 2dc25851df..93eb16f795 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -27209,14 +27209,57 @@ fn regExtraBits(self: *CodeGen, ty: Type) u64 { return self.regBitSize(ty) - ty.bitSize(self.pt.zcu); } -fn hasFeature(self: *CodeGen, feature: std.Target.x86.Feature) bool { - return std.Target.x86.featureSetHas(self.target.cpu.features, feature); -} -fn hasAnyFeatures(self: *CodeGen, features: anytype) bool { - return std.Target.x86.featureSetHasAny(self.target.cpu.features, features); -} -fn hasAllFeatures(self: *CodeGen, features: anytype) bool { - return std.Target.x86.featureSetHasAll(self.target.cpu.features, features); +fn hasFeature(cg: *CodeGen, feature: std.Target.x86.Feature) bool { + return switch (feature) { + .@"64bit" => switch (cg.target.cpu.arch) { + else => unreachable, + .x86 => false, + .x86_64 => true, + }, + .false_deps_getmant, + .false_deps_lzcnt_tzcnt, + .false_deps_mulc, + .false_deps_mullq, + .false_deps_perm, + .false_deps_popcnt, + .false_deps_range, + .slow_3ops_lea, + .slow_incdec, + .slow_lea, + .slow_pmaddwd, + .slow_pmulld, + .slow_shld, + .slow_two_mem_ops, + .slow_unaligned_mem_16, + .slow_unaligned_mem_32, + => switch (cg.mod.optimize_mode) { + .Debug, .ReleaseSafe, .ReleaseFast => null, + .ReleaseSmall => false, + }, + .fast_11bytenop, + .fast_15bytenop, + .fast_7bytenop, + .fast_bextr, + .fast_dpwssd, + .fast_gather, + .fast_hops, + .fast_imm16, + .fast_lzcnt, + .fast_movbe, + .fast_scalar_fsqrt, + .fast_scalar_shift_masks, + .fast_shld_rotate, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, + .fast_vector_fsqrt, + .fast_vector_shift_masks, + => switch (cg.mod.optimize_mode) { + .Debug, .ReleaseSafe, .ReleaseFast => null, + .ReleaseSmall => true, + }, + .mmx => false, + else => null, + } orelse std.Target.x86.featureSetHas(cg.target.cpu.features, feature); } fn typeOf(self: *CodeGen, inst: Air.Inst.Ref) Type { @@ -29090,15 +29133,7 @@ fn select( cases: []const Select.Case, ) !void { cases: for (cases) |case| { - for (case.required_features) |required_feature| if (required_feature) |feature| if (!switch (feature) { - .@"64bit" => switch (cg.target.cpu.arch) { - else => unreachable, - .x86 => false, - .x86_64 => true, - }, - .mmx => false, - else => cg.hasFeature(feature), - }) continue :cases; + for (case.required_features) |required_feature| if (required_feature) |feature| if (!cg.hasFeature(feature)) continue :cases; for (case.dst_constraints[0..dst_temps.len], dst_tys) |dst_constraint, dst_ty| if (!dst_constraint.accepts(dst_ty, cg)) continue :cases; for (case.src_constraints[0..src_temps.len], src_temps) |src_constraint, src_temp| if (!src_constraint.accepts(src_temp.typeOf(cg), cg)) continue :cases; if (std.debug.runtime_safety) { From 3c74a478a41d2f806f3b9b2bb86fffbc40e100c3 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Thu, 2 Jan 2025 23:20:53 -0500 Subject: [PATCH 13/25] x86_64: fix unnecessary register saving --- src/arch/x86_64/CodeGen.zig | 43 +++++++++++++++---------------------- src/arch/x86_64/Mir.zig | 4 +++- src/register_manager.zig | 7 +++--- 3 files changed, 23 insertions(+), 31 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 93eb16f795..cb7be5cfc8 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -9731,7 +9731,7 @@ fn computeFrameLayout(self: *CodeGen, cc: std.builtin.CallingConvention) !FrameL // Create list of registers to save in the prologue. // TODO handle register classes - var save_reg_list = Mir.RegisterList{}; + var save_reg_list: Mir.RegisterList = .empty; const callee_preserved_regs = abi.getCalleePreservedRegs(abi.resolveCallingConvention(cc, self.target.*)); for (callee_preserved_regs) |reg| { @@ -9972,43 +9972,34 @@ fn restoreState(self: *CodeGen, state: State, deaths: []const Air.Inst.Index, co reg_locks.deinit(); }; - for (0..state.registers.len) |index| { - const current_maybe_inst = if (self.register_manager.free_registers.isSet(index)) - null - else - self.register_manager.registers[index]; - const target_maybe_inst = if (state.free_registers.isSet(index)) - null - else - state.registers[index]; + for ( + 0.., + self.register_manager.registers, + state.registers, + state.reg_tracking, + ) |reg_i, current_slot, target_slot, reg_tracking| { + const reg_index: RegisterManager.TrackedIndex = @intCast(reg_i); + const current_maybe_inst = if (self.register_manager.isRegIndexFree(reg_index)) null else current_slot; + const target_maybe_inst = if (state.free_registers.isSet(reg_index)) null else target_slot; if (std.debug.runtime_safety) if (target_maybe_inst) |target_inst| assert(self.inst_tracking.getIndex(target_inst).? < state.inst_tracking_len); if (opts.emit_instructions) { - if (current_maybe_inst) |current_inst| { + if (current_maybe_inst) |current_inst| try self.inst_tracking.getPtr(current_inst).?.spill(self, current_inst); - } - if (target_maybe_inst) |target_inst| { - const target_tracking = self.inst_tracking.getPtr(target_inst).?; - try target_tracking.materialize(self, target_inst, state.reg_tracking[index]); - } + if (target_maybe_inst) |target_inst| + try self.inst_tracking.getPtr(target_inst).?.materialize(self, target_inst, reg_tracking); } if (opts.update_tracking) { if (current_maybe_inst) |current_inst| { try self.inst_tracking.getPtr(current_inst).?.trackSpill(self, current_inst); - } - { - const reg = RegisterManager.regAtTrackedIndex(@intCast(index)); - self.register_manager.freeReg(reg); - self.register_manager.getRegAssumeFree(reg, target_maybe_inst); + self.register_manager.freeRegIndex(reg_index); } if (target_maybe_inst) |target_inst| { - self.inst_tracking.getPtr(target_inst).?.trackMaterialize( - target_inst, - state.reg_tracking[index], - ); + self.register_manager.getRegIndexAssumeFree(reg_index, target_maybe_inst); + self.inst_tracking.getPtr(target_inst).?.trackMaterialize(target_inst, reg_tracking); } } else if (target_maybe_inst) |_| - try reg_locks.append(self.register_manager.lockRegIndexAssumeUnused(@intCast(index))); + try reg_locks.append(self.register_manager.lockRegIndexAssumeUnused(reg_index)); } if (opts.emit_instructions) if (self.eflags_inst) |inst| try self.inst_tracking.getPtr(inst).?.spill(self, inst); diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 595f79e8dd..10e6dc4618 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -1168,11 +1168,13 @@ pub const AirOffset = struct { air_inst: Air.Inst.Index, off: i32 }; /// Used in conjunction with payload to transfer a list of used registers in a compact manner. pub const RegisterList = struct { - bitset: BitSet = BitSet.initEmpty(), + bitset: BitSet, const BitSet = IntegerBitSet(32); const Self = @This(); + pub const empty: RegisterList = .{ .bitset = .initEmpty() }; + fn getIndexForReg(registers: []const Register, reg: Register) BitSet.MaskInt { for (registers, 0..) |cpreg, i| { if (reg.id() == cpreg.id()) return @intCast(i); diff --git a/src/register_manager.zig b/src/register_manager.zig index 4176dd7d83..b9d3b6db0b 100644 --- a/src/register_manager.zig +++ b/src/register_manager.zig @@ -99,8 +99,7 @@ pub fn RegisterManager( max_id = @max(elem_id, max_id); } - const OptionalIndex = std.math.IntFittingRange(0, set.len); - comptime var map = [1]OptionalIndex{set.len} ** (max_id - min_id + 1); + comptime var map: [max_id - min_id + 1]std.math.IntFittingRange(0, set.len) = @splat(set.len); inline for (set, 0..) |elem, elem_index| map[comptime elem.id() - min_id] = elem_index; const id_index = reg.id() -% min_id; @@ -384,7 +383,7 @@ pub fn RegisterManager( /// Allocates the specified register with the specified /// instruction. Asserts that the register is free and no /// spilling is necessary. - fn getRegIndexAssumeFree( + pub fn getRegIndexAssumeFree( self: *Self, tracked_index: TrackedIndex, inst: ?Air.Inst.Index, @@ -403,7 +402,7 @@ pub fn RegisterManager( } /// Marks the specified register as free - fn freeRegIndex(self: *Self, tracked_index: TrackedIndex) void { + pub fn freeRegIndex(self: *Self, tracked_index: TrackedIndex) void { log.debug("freeing register {}", .{regAtTrackedIndex(tracked_index)}); self.registers[tracked_index] = undefined; self.markRegIndexFree(tracked_index); From 074232b3e53f6a6bbf0851a97c733a848e739bd8 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Fri, 3 Jan 2025 01:48:42 -0500 Subject: [PATCH 14/25] x86_64: implement a custom calling convention for the Zig language --- src/Type.zig | 11 ++ src/arch/x86_64/CodeGen.zig | 260 +++++++++++++++++++++--------------- src/arch/x86_64/Mir.zig | 8 +- src/arch/x86_64/abi.zig | 83 +++++++++--- 4 files changed, 231 insertions(+), 131 deletions(-) diff --git a/src/Type.zig b/src/Type.zig index 2980034a34..c6c4334270 100644 --- a/src/Type.zig +++ b/src/Type.zig @@ -1920,6 +1920,17 @@ pub fn isSlice(ty: Type, zcu: *const Zcu) bool { }; } +pub fn isSliceAtRuntime(ty: Type, zcu: *const Zcu) bool { + return switch (zcu.intern_pool.indexToKey(ty.toIntern())) { + .ptr_type => |ptr_type| ptr_type.flags.size == .slice, + .opt_type => |child| switch (zcu.intern_pool.indexToKey(child)) { + .ptr_type => |ptr_type| !ptr_type.flags.is_allowzero and ptr_type.flags.size == .slice, + else => false, + }, + else => false, + }; +} + pub fn slicePtrFieldType(ty: Type, zcu: *const Zcu) Type { return Type.fromInterned(zcu.intern_pool.slicePtrType(ty.toIntern())); } diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index cb7be5cfc8..369dfe2d9f 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -952,7 +952,6 @@ pub fn generate( ); const fn_info = zcu.typeToFunc(fn_type).?; - const cc = abi.resolveCallingConvention(fn_info.cc, function.target.*); var call_info = function.resolveCallingConventionValues(fn_info, &.{}, .args_frame) catch |err| switch (err) { error.CodegenFail => return error.CodegenFail, else => |e| return e, @@ -978,7 +977,7 @@ pub fn generate( .alignment = call_info.stack_align, }), ); - function.va_info = switch (cc) { + function.va_info = switch (fn_info.cc) { else => undefined, .x86_64_sysv => .{ .sysv = .{ .gp_count = call_info.gp_count, @@ -1010,7 +1009,7 @@ pub fn generate( .target = function.target, .allocator = gpa, .mir = mir, - .cc = cc, + .cc = fn_info.cc, .src_loc = src_loc, .output_mode = comp.config.output_mode, .link_mode = comp.config.link_mode, @@ -1100,7 +1099,7 @@ pub fn generateLazy( .target = function.target, .allocator = gpa, .mir = mir, - .cc = abi.resolveCallingConvention(.auto, function.target.*), + .cc = .auto, .src_loc = src_loc, .output_mode = comp.config.output_mode, .link_mode = comp.config.link_mode, @@ -2126,8 +2125,7 @@ fn gen(self: *CodeGen) InnerError!void { const pt = self.pt; const zcu = pt.zcu; const fn_info = zcu.typeToFunc(self.fn_type).?; - const cc = abi.resolveCallingConvention(fn_info.cc, self.target.*); - if (cc != .naked) { + if (fn_info.cc != .naked) { try self.asmRegister(.{ ._, .push }, .rbp); try self.asmPseudoImmediate(.pseudo_cfi_adjust_cfa_offset_i_s, .s(8)); try self.asmPseudoRegisterImmediate(.pseudo_cfi_rel_offset_ri_s, .rbp, .s(0)); @@ -2159,7 +2157,7 @@ fn gen(self: *CodeGen) InnerError!void { else => unreachable, } - if (fn_info.is_var_args) switch (cc) { + if (fn_info.is_var_args) switch (fn_info.cc) { .x86_64_sysv => { const info = &self.va_info.sysv; const reg_save_area_fi = try self.allocFrameIndex(.init(.{ @@ -2188,7 +2186,7 @@ fn gen(self: *CodeGen) InnerError!void { self.performReloc(skip_sse_reloc); }, .x86_64_win => return self.fail("TODO implement gen var arg function for Win64", .{}), - else => unreachable, + else => |cc| return self.fail("{s} does not support var args", .{@tagName(cc)}), }; try self.asmPseudo(.pseudo_dbg_prologue_end_none); @@ -2214,7 +2212,7 @@ fn gen(self: *CodeGen) InnerError!void { try self.asmPseudoRegisterImmediate(.pseudo_cfi_def_cfa_ri_s, .rsp, .s(8)); try self.asmOpOnly(.{ ._, .ret }); - const frame_layout = try self.computeFrameLayout(cc); + const frame_layout = try self.computeFrameLayout(fn_info.cc); const need_frame_align = frame_layout.stack_mask != std.math.maxInt(u32); const need_stack_adjust = frame_layout.stack_adjust > 0; const need_save_reg = frame_layout.save_reg_list.count() > 0; @@ -2262,12 +2260,13 @@ fn gen(self: *CodeGen) InnerError!void { } }, }); } else { + const scratch_reg = abi.getCAbiLinkerScratchReg(fn_info.cc); self.mir_instructions.set(backpatch_stack_alloc, .{ .tag = .pseudo, .ops = .pseudo_probe_adjust_setup_rri_s, .data = .{ .rri = .{ .r1 = .rsp, - .r2 = .rax, + .r2 = scratch_reg, .i = frame_layout.stack_adjust, } }, }); @@ -2276,25 +2275,35 @@ fn gen(self: *CodeGen) InnerError!void { .ops = .pseudo_probe_adjust_loop_rr, .data = .{ .rr = .{ .r1 = .rsp, - .r2 = .rax, + .r2 = scratch_reg, } }, }); } } if (need_frame_align or need_stack_adjust) { - self.mir_instructions.set(backpatch_stack_dealloc, .{ - .tag = .lea, - .ops = .rm, - .data = .{ .rx = .{ - .r1 = .rsp, - .payload = try self.addExtra(Mir.Memory.encode(.{ - .base = .{ .reg = .rbp }, - .mod = .{ .rm = .{ - .size = .qword, - .disp = -frame_layout.save_reg_list.size(), - } }, - })), - } }, + self.mir_instructions.set(backpatch_stack_dealloc, switch (-frame_layout.save_reg_list.size(self.target)) { + 0 => .{ + .tag = .mov, + .ops = .rr, + .data = .{ .rr = .{ + .r1 = .rsp, + .r2 = .rbp, + } }, + }, + else => |disp| .{ + .tag = .lea, + .ops = .rm, + .data = .{ .rx = .{ + .r1 = .rsp, + .payload = try self.addExtra(Mir.Memory.encode(.{ + .base = .{ .reg = .rbp }, + .mod = .{ .rm = .{ + .size = .qword, + .disp = disp, + } }, + })), + } }, + }, }); } if (need_save_reg) { @@ -9552,8 +9561,7 @@ fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void { const enum_ty: Type = .fromInterned(lazy_sym.ty); wip_mir_log.debug("{}.@tagName:", .{enum_ty.fmt(pt)}); - const resolved_cc = abi.resolveCallingConvention(.auto, self.target.*); - const param_regs = abi.getCAbiIntParamRegs(resolved_cc); + const param_regs = abi.getCAbiIntParamRegs(.auto); const param_locks = self.register_manager.lockRegsAssumeUnused(2, param_regs[0..2].*); defer for (param_locks) |lock| self.register_manager.unlockReg(lock); @@ -9700,7 +9708,7 @@ fn setFrameLoc( offset.* += self.frame_allocs.items(.abi_size)[frame_i]; } -fn computeFrameLayout(self: *CodeGen, cc: std.builtin.CallingConvention) !FrameLayout { +fn computeFrameLayout(self: *CodeGen, cc: std.builtin.CallingConvention.Tag) !FrameLayout { const frame_allocs_len = self.frame_allocs.len; try self.frame_locs.resize(self.gpa, frame_allocs_len); const stack_frame_order = try self.gpa.alloc(FrameIndex, frame_allocs_len - FrameIndex.named_count); @@ -9732,8 +9740,7 @@ fn computeFrameLayout(self: *CodeGen, cc: std.builtin.CallingConvention) !FrameL // Create list of registers to save in the prologue. // TODO handle register classes var save_reg_list: Mir.RegisterList = .empty; - const callee_preserved_regs = - abi.getCalleePreservedRegs(abi.resolveCallingConvention(cc, self.target.*)); + const callee_preserved_regs = abi.getCalleePreservedRegs(cc); for (callee_preserved_regs) |reg| { if (self.register_manager.isRegAllocated(reg)) { save_reg_list.push(callee_preserved_regs, reg); @@ -9747,7 +9754,7 @@ fn computeFrameLayout(self: *CodeGen, cc: std.builtin.CallingConvention) !FrameL const stack_frame_align_offset = if (need_align_stack) 0 else - save_reg_list.size() + frame_offset[@intFromEnum(FrameIndex.args_frame)]; + save_reg_list.size(self.target) + frame_offset[@intFromEnum(FrameIndex.args_frame)]; var rsp_offset: i32 = 0; self.setFrameLoc(.call_frame, .rsp, &rsp_offset, true); @@ -10036,10 +10043,9 @@ pub fn spillEflagsIfOccupied(self: *CodeGen) !void { } } -pub fn spillCallerPreservedRegs(self: *CodeGen, cc: std.builtin.CallingConvention) !void { +pub fn spillCallerPreservedRegs(self: *CodeGen, cc: std.builtin.CallingConvention.Tag) !void { switch (cc) { - .x86_64_sysv => try self.spillRegisters(abi.getCallerPreservedRegs(.{ .x86_64_sysv = .{} })), - .x86_64_win => try self.spillRegisters(abi.getCallerPreservedRegs(.{ .x86_64_win = .{} })), + inline .auto, .x86_64_sysv, .x86_64_win => |tag| try self.spillRegisters(abi.getCallerPreservedRegs(tag)), else => unreachable, } } @@ -19127,8 +19133,7 @@ fn airArg(self: *CodeGen, inst: Air.Inst.Index) !void { try self.spillEflagsIfOccupied(); const fn_info = zcu.typeToFunc(self.fn_type).?; - const cc = abi.resolveCallingConvention(fn_info.cc, self.target.*); - const param_int_regs = abi.getCAbiIntParamRegs(cc); + const param_int_regs = abi.getCAbiIntParamRegs(fn_info.cc); var prev_reg: Register = undefined; for ( param_int_regs[param_int_regs.len - regs_frame_addr.regs ..], @@ -19228,8 +19233,7 @@ fn airDbgArg(self: *CodeGen, inst: Air.Inst.Index) !void { } fn airDbgVarArgs(self: *CodeGen) !void { - if (self.pt.zcu.typeToFunc(self.fn_type).?.is_var_args) - try self.asmPseudo(.pseudo_dbg_var_args_none); + if (self.pt.zcu.typeToFunc(self.fn_type).?.is_var_args) try self.asmPseudo(.pseudo_dbg_var_args_none); } fn genLocalDebugInfo( @@ -19364,7 +19368,6 @@ fn genCall(self: *CodeGen, info: union(enum) { }), }; const fn_info = zcu.typeToFunc(fn_ty).?; - const resolved_cc = abi.resolveCallingConvention(fn_info.cc, self.target.*); const ExpectedContents = extern struct { var_args: [16][@sizeOf(Type)]u8 align(@alignOf(Type)), @@ -19406,7 +19409,7 @@ fn genCall(self: *CodeGen, info: union(enum) { } try self.spillEflagsIfOccupied(); - try self.spillCallerPreservedRegs(resolved_cc); + try self.spillCallerPreservedRegs(fn_info.cc); // set stack arguments first because this can clobber registers // also clobber spill arguments as we go @@ -19482,7 +19485,7 @@ fn genCall(self: *CodeGen, info: union(enum) { ); _ = try self.asmJccReloc(.b, loop); - const param_int_regs = abi.getCAbiIntParamRegs(resolved_cc); + const param_int_regs = abi.getCAbiIntParamRegs(fn_info.cc); for (param_int_regs[param_int_regs.len - regs_frame_addr.regs ..]) |dst_reg| { try self.register_manager.getReg(dst_reg, null); try reg_locks.append(self.register_manager.lockReg(dst_reg)); @@ -19546,7 +19549,7 @@ fn genCall(self: *CodeGen, info: union(enum) { }; defer if (src_lock) |lock| self.register_manager.unlockReg(lock); - const param_int_regs = abi.getCAbiIntParamRegs(resolved_cc); + const param_int_regs = abi.getCAbiIntParamRegs(fn_info.cc); for ( param_int_regs[param_int_regs.len - regs_frame_addr.regs ..], 0.., @@ -19559,8 +19562,7 @@ fn genCall(self: *CodeGen, info: union(enum) { else => unreachable, }; - if (fn_info.is_var_args) - try self.asmRegisterImmediate(.{ ._, .mov }, .al, .u(call_info.fp_count)); + if (fn_info.is_var_args) try self.asmRegisterImmediate(.{ ._, .mov }, .al, .u(call_info.fp_count)); // Due to incremental compilation, how function calls are generated depends // on linking. @@ -19582,8 +19584,9 @@ fn genCall(self: *CodeGen, info: union(enum) { } else if (self.bin_file.cast(.coff)) |coff_file| { const atom = try coff_file.getOrCreateAtomForNav(func.owner_nav); const sym_index = coff_file.getAtom(atom).getSymbolIndex().?; - try self.genSetReg(.rax, .usize, .{ .lea_got = sym_index }, .{}); - try self.asmRegister(.{ ._, .call }, .rax); + const scratch_reg = abi.getCAbiLinkerScratchReg(fn_info.cc); + try self.genSetReg(scratch_reg, .usize, .{ .lea_got = sym_index }, .{}); + try self.asmRegister(.{ ._, .call }, scratch_reg); } else if (self.bin_file.cast(.macho)) |macho_file| { const zo = macho_file.getZigObject().?; const sym_index = try zo.getOrCreateMetadataForNav(macho_file, func.owner_nav); @@ -19622,8 +19625,9 @@ fn genCall(self: *CodeGen, info: union(enum) { } } else { assert(self.typeOf(callee).zigTypeTag(zcu) == .pointer); - try self.genSetReg(.rax, .usize, .{ .air_ref = callee }, .{}); - try self.asmRegister(.{ ._, .call }, .rax); + const scratch_reg = abi.getCAbiLinkerScratchReg(fn_info.cc); + try self.genSetReg(scratch_reg, .usize, .{ .air_ref = callee }, .{}); + try self.asmRegister(.{ ._, .call }, scratch_reg); }, .lib => |lib| if (self.bin_file.cast(.elf)) |elf_file| { const target_sym_index = try elf_file.getGlobalSymbol(lib.callee, lib.lib); @@ -23095,11 +23099,12 @@ fn genExternSymbolRef( ) InnerError!void { if (self.bin_file.cast(.coff)) |coff_file| { const global_index = try coff_file.getGlobalSymbol(callee, lib); + const scratch_reg = abi.getCAbiLinkerScratchReg(self.fn_type.fnCallingConvention(self.pt.zcu)); _ = try self.addInst(.{ .tag = .mov, .ops = .import_reloc, .data = .{ .rx = .{ - .r1 = .rax, + .r1 = scratch_reg, .payload = try self.addExtra(bits.SymbolOffset{ .sym_index = link.File.Coff.global_symbol_bit | global_index, }), @@ -23107,7 +23112,7 @@ fn genExternSymbolRef( }); switch (tag) { .mov => {}, - .call => try self.asmRegister(.{ ._, .call }, .rax), + .call => try self.asmRegister(.{ ._, .call }, scratch_reg), else => unreachable, } } else return self.fail("TODO implement calling extern functions", .{}); @@ -24226,7 +24231,6 @@ fn airTagName(self: *CodeGen, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; const inst_ty = self.typeOfIndex(inst); const enum_ty = self.typeOf(un_op); - const resolved_cc = abi.resolveCallingConvention(.auto, self.target.*); // We need a properly aligned and sized call frame to be able to call this function. { @@ -24244,9 +24248,9 @@ fn airTagName(self: *CodeGen, inst: Air.Inst.Index) !void { } try self.spillEflagsIfOccupied(); - try self.spillCallerPreservedRegs(resolved_cc); + try self.spillCallerPreservedRegs(.auto); - const param_regs = abi.getCAbiIntParamRegs(resolved_cc); + const param_regs = abi.getCAbiIntParamRegs(.auto); const dst_mcv = try self.allocRegOrMem(inst, false); try self.genSetReg(param_regs[0], .usize, dst_mcv.address(), .{}); @@ -24255,7 +24259,7 @@ fn airTagName(self: *CodeGen, inst: Air.Inst.Index) !void { try self.genSetReg(param_regs[1], enum_ty, operand, .{}); const enum_lazy_sym: link.File.LazySymbol = .{ .kind = .code, .ty = enum_ty.toIntern() }; - try self.genLazySymbolRef(.call, .rax, enum_lazy_sym); + try self.genLazySymbolRef(.call, abi.getCAbiLinkerScratchReg(self.fn_type.fnCallingConvention(zcu)), enum_lazy_sym); return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); } @@ -26349,10 +26353,7 @@ fn airVaStart(self: *CodeGen, inst: Air.Inst.Index) !void { const va_list_ty = self.air.instructions.items(.data)[@intFromEnum(inst)].ty; const ptr_anyopaque_ty = try pt.singleMutPtrType(.anyopaque); - const result: MCValue = switch (abi.resolveCallingConvention( - self.fn_type.fnCallingConvention(zcu), - self.target.*, - )) { + const result: MCValue = switch (self.fn_type.fnCallingConvention(zcu)) { .x86_64_sysv => result: { const info = self.va_info.sysv; const dst_fi = try self.allocFrameIndex(.initSpill(va_list_ty, zcu)); @@ -26396,7 +26397,7 @@ fn airVaStart(self: *CodeGen, inst: Air.Inst.Index) !void { break :result .{ .load_frame = .{ .index = dst_fi } }; }, .x86_64_win => return self.fail("TODO implement c_va_start for Win64", .{}), - else => unreachable, + else => |cc| return self.fail("{s} does not support var args", .{@tagName(cc)}), }; return self.finishAir(inst, result, .{ .none, .none, .none }); } @@ -26410,10 +26411,7 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void { const ptr_anyopaque_ty = try pt.singleMutPtrType(.anyopaque); const unused = self.liveness.isUnused(inst); - const result: MCValue = switch (abi.resolveCallingConvention( - self.fn_type.fnCallingConvention(zcu), - self.target.*, - )) { + const result: MCValue = switch (self.fn_type.fnCallingConvention(zcu)) { .x86_64_sysv => result: { try self.spillEflagsIfOccupied(); @@ -26592,7 +26590,7 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void { break :result promote_mcv; }, .x86_64_win => return self.fail("TODO implement c_va_arg for Win64", .{}), - else => unreachable, + else => |cc| return self.fail("{s} does not support var args", .{@tagName(cc)}), }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } @@ -26753,13 +26751,15 @@ fn resolveCallingConventionValues( errdefer self.gpa.free(result.args); const ret_ty: Type = .fromInterned(fn_info.return_type); - - const resolved_cc = abi.resolveCallingConvention(cc, self.target.*); switch (cc) { .naked => { assert(result.args.len == 0); result.return_value = .init(.unreach); - result.stack_align = .@"8"; + result.stack_align = switch (self.target.cpu.arch) { + else => unreachable, + .x86 => .@"4", + .x86_64 => .@"8", + }; }, .x86_64_sysv, .x86_64_win => |cc_opts| { var ret_int_reg_i: u32 = 0; @@ -26768,17 +26768,14 @@ fn resolveCallingConventionValues( var param_sse_reg_i: u32 = 0; result.stack_align = .fromByteUnits(cc_opts.incoming_stack_alignment orelse 16); - switch (resolved_cc) { + switch (cc) { .x86_64_sysv => {}, - .x86_64_win => { - // Align the stack to 16bytes before allocating shadow stack space (if any). - result.stack_byte_count += @intCast(4 * Type.usize.abiSize(zcu)); - }, + .x86_64_win => result.stack_byte_count += @intCast(4 * 8), else => unreachable, } // Return values - if (ret_ty.zigTypeTag(zcu) == .noreturn) { + if (ret_ty.isNoReturn(zcu)) { result.return_value = .init(.unreach); } else if (!ret_ty.hasRuntimeBitsIgnoreComptime(zcu)) { // TODO: is this even possible for C calling convention? @@ -26787,7 +26784,7 @@ fn resolveCallingConventionValues( var ret_tracking: [4]InstTracking = undefined; var ret_tracking_i: usize = 0; - const classes = switch (resolved_cc) { + const classes = switch (cc) { .x86_64_sysv => std.mem.sliceTo(&abi.classifySystemV(ret_ty, zcu, self.target.*, .ret), .none), .x86_64_win => &.{abi.classifyWindows(ret_ty, zcu)}, else => unreachable, @@ -26795,7 +26792,7 @@ fn resolveCallingConventionValues( for (classes) |class| switch (class) { .integer => { const ret_int_reg = registerAlias( - abi.getCAbiIntReturnRegs(resolved_cc)[ret_int_reg_i], + abi.getCAbiIntReturnRegs(cc)[ret_int_reg_i], @intCast(@min(ret_ty.abiSize(zcu), 8)), ); ret_int_reg_i += 1; @@ -26804,7 +26801,7 @@ fn resolveCallingConventionValues( ret_tracking_i += 1; }, .sse, .float, .float_combine, .win_i128 => { - const ret_sse_regs = abi.getCAbiSseReturnRegs(resolved_cc); + const ret_sse_regs = abi.getCAbiSseReturnRegs(cc); const abi_size: u32 = @intCast(ret_ty.abiSize(zcu)); const reg_size = @min(abi_size, self.vectorSize(.float)); var byte_offset: u32 = 0; @@ -26818,18 +26815,18 @@ fn resolveCallingConventionValues( }, .sseup => assert(ret_tracking[ret_tracking_i - 1].short.register.class() == .sse), .x87 => { - ret_tracking[ret_tracking_i] = .init(.{ .register = .st0 }); + ret_tracking[ret_tracking_i] = .init(.{ .register = abi.getCAbiX87ReturnRegs(cc)[0] }); ret_tracking_i += 1; }, .x87up => assert(ret_tracking[ret_tracking_i - 1].short.register.class() == .x87), .complex_x87 => { - ret_tracking[ret_tracking_i] = .init(.{ .register_pair = .{ .st0, .st1 } }); + ret_tracking[ret_tracking_i] = .init(.{ .register_pair = abi.getCAbiX87ReturnRegs(cc)[0..2].* }); ret_tracking_i += 1; }, .memory => { - const ret_int_reg = abi.getCAbiIntReturnRegs(resolved_cc)[ret_int_reg_i].to64(); + const ret_int_reg = abi.getCAbiIntReturnRegs(cc)[ret_int_reg_i].to64(); ret_int_reg_i += 1; - const ret_indirect_reg = abi.getCAbiIntParamRegs(resolved_cc)[param_int_reg_i]; + const ret_indirect_reg = abi.getCAbiIntParamRegs(cc)[param_int_reg_i]; param_int_reg_i += 1; ret_tracking[ret_tracking_i] = .{ @@ -26864,7 +26861,7 @@ fn resolveCallingConventionValues( // Input params for (param_types, result.args) |ty, *arg| { assert(ty.hasRuntimeBitsIgnoreComptime(zcu)); - switch (resolved_cc) { + switch (cc) { .x86_64_sysv => {}, .x86_64_win => { param_int_reg_i = @max(param_int_reg_i, param_sse_reg_i); @@ -26876,14 +26873,14 @@ fn resolveCallingConventionValues( var arg_mcv: [4]MCValue = undefined; var arg_mcv_i: usize = 0; - const classes = switch (resolved_cc) { + const classes = switch (cc) { .x86_64_sysv => std.mem.sliceTo(&abi.classifySystemV(ty, zcu, self.target.*, .arg), .none), .x86_64_win => &.{abi.classifyWindows(ty, zcu)}, else => unreachable, }; classes: for (classes) |class| switch (class) { .integer => { - const param_int_regs = abi.getCAbiIntParamRegs(resolved_cc); + const param_int_regs = abi.getCAbiIntParamRegs(cc); if (param_int_reg_i >= param_int_regs.len) break; const param_int_reg = @@ -26894,7 +26891,7 @@ fn resolveCallingConventionValues( arg_mcv_i += 1; }, .sse, .float, .float_combine => { - const param_sse_regs = abi.getCAbiSseParamRegs(resolved_cc); + const param_sse_regs = abi.getCAbiSseParamRegs(cc); const abi_size: u32 = @intCast(ty.abiSize(zcu)); const reg_size = @min(abi_size, self.vectorSize(.float)); var byte_offset: u32 = 0; @@ -26909,14 +26906,13 @@ fn resolveCallingConventionValues( } }, .sseup => assert(arg_mcv[arg_mcv_i - 1].register.class() == .sse), - .x87, .x87up, .complex_x87, .memory, .win_i128 => switch (resolved_cc) { + .x87, .x87up, .complex_x87, .memory, .win_i128 => switch (cc) { .x86_64_sysv => switch (class) { .x87, .x87up, .complex_x87, .memory => break, else => unreachable, }, .x86_64_win => if (ty.abiSize(zcu) > 8) { - const param_int_reg = - abi.getCAbiIntParamRegs(resolved_cc)[param_int_reg_i].to64(); + const param_int_reg = abi.getCAbiIntParamRegs(cc)[param_int_reg_i].to64(); param_int_reg_i += 1; arg_mcv[arg_mcv_i] = .{ .indirect = .{ .reg = param_int_reg } }; @@ -26927,7 +26923,7 @@ fn resolveCallingConventionValues( .none => unreachable, .integer_per_element => { const param_int_regs_len: u32 = - @intCast(abi.getCAbiIntParamRegs(resolved_cc).len); + @intCast(abi.getCAbiIntParamRegs(cc).len); const remaining_param_int_regs: u3 = @intCast(param_int_regs_len - param_int_reg_i); param_int_reg_i = param_int_regs_len; @@ -26989,42 +26985,86 @@ fn resolveCallingConventionValues( result.fp_count = param_sse_reg_i; }, .auto => { - result.stack_align = .@"16"; + result.stack_align = abi.zigcc.stack_align orelse .fromByteUnits(self.vectorSize(.float)); + + var param_gpr = abi.getCAbiIntParamRegs(cc); + var param_x87 = abi.getCAbiX87ParamRegs(cc); + var param_sse = abi.getCAbiSseParamRegs(cc); // Return values - if (ret_ty.zigTypeTag(zcu) == .noreturn) { - result.return_value = .init(.unreach); - } else if (!ret_ty.hasRuntimeBitsIgnoreComptime(zcu)) { - result.return_value = .init(.none); - } else { - const ret_reg = abi.getCAbiIntReturnRegs(resolved_cc)[0]; - const ret_ty_size: u31 = @intCast(ret_ty.abiSize(zcu)); - if (ret_ty_size <= 8 and !ret_ty.isRuntimeFloat()) { - const aliased_reg = registerAlias(ret_reg, ret_ty_size); - result.return_value = .{ .short = .{ .register = aliased_reg }, .long = .none }; - } else { - const ret_indirect_reg = abi.getCAbiIntParamRegs(resolved_cc)[0]; - result.return_value = .{ - .short = .{ .indirect = .{ .reg = ret_reg } }, - .long = .{ .indirect = .{ .reg = ret_indirect_reg } }, - }; - } - } + result.return_value = if (ret_ty.isNoReturn(zcu)) + .init(.unreach) + else if (!ret_ty.hasRuntimeBitsIgnoreComptime(zcu)) + .init(.none) + else return_value: { + const ret_gpr = abi.getCAbiIntReturnRegs(cc); + const ret_size: u31 = @intCast(ret_ty.abiSize(zcu)); + if (abi.zigcc.return_in_regs) switch (self.regClassForType(ret_ty)) { + .general_purpose => if (ret_size <= @as(u4, switch (self.target.cpu.arch) { + else => unreachable, + .x86 => 4, + .x86_64 => 8, + })) + break :return_value .init(.{ .register = registerAlias(ret_gpr[0], ret_size) }) + else if (ret_gpr.len >= 2 and ret_ty.isSliceAtRuntime(zcu)) + break :return_value .init(.{ .register_pair = ret_gpr[0..2].* }), + .segment, .mmx, .ip => unreachable, + .x87 => break :return_value .init(.{ .register = .st0 }), + .sse => if (ret_size <= self.vectorSize(.float)) break :return_value .init(.{ + .register = registerAlias(abi.getCAbiSseReturnRegs(cc)[0], @max(ret_size, 16)), + }), + }; + const ret_indirect_reg = param_gpr[0]; + param_gpr = param_gpr[1..]; + break :return_value .{ + .short = .{ .indirect = .{ .reg = ret_gpr[0] } }, + .long = .{ .indirect = .{ .reg = ret_indirect_reg } }, + }; + }; // Input params - for (param_types, result.args) |ty, *arg| { - if (!ty.hasRuntimeBitsIgnoreComptime(zcu)) { + for (param_types, result.args) |param_ty, *arg| { + if (!param_ty.hasRuntimeBitsIgnoreComptime(zcu)) { arg.* = .none; continue; } - const param_align = ty.abiAlignment(zcu); + const param_size: u31 = @intCast(param_ty.abiSize(zcu)); + if (abi.zigcc.params_in_regs) switch (self.regClassForType(param_ty)) { + .general_purpose => if (param_gpr.len >= 1 and param_size <= @as(u4, switch (self.target.cpu.arch) { + else => unreachable, + .x86 => 4, + .x86_64 => 8, + })) { + arg.* = .{ .register = registerAlias(param_gpr[0], param_size) }; + param_gpr = param_gpr[1..]; + continue; + } else if (param_gpr.len >= 2 and param_ty.isSliceAtRuntime(zcu)) { + arg.* = .{ .register_pair = param_gpr[0..2].* }; + param_gpr = param_gpr[2..]; + continue; + }, + .segment, .mmx, .ip => unreachable, + .x87 => if (param_x87.len >= 1) { + arg.* = .{ .register = param_x87[0] }; + param_x87 = param_x87[1..]; + continue; + }, + .sse => if (param_sse.len >= 1 and param_size <= self.vectorSize(.float)) { + arg.* = .{ + .register = registerAlias(param_sse[0], @max(param_size, 16)), + }; + param_sse = param_sse[1..]; + continue; + }, + }; + const param_align = param_ty.abiAlignment(zcu); result.stack_byte_count = @intCast(param_align.forward(result.stack_byte_count)); result.stack_align = result.stack_align.max(param_align); arg.* = .{ .load_frame = .{ .index = stack_frame_base, .off = result.stack_byte_count, } }; - result.stack_byte_count += @intCast(ty.abiSize(zcu)); + result.stack_byte_count += param_size; } }, else => return self.fail("TODO implement function parameters and return values for {} on x86_64", .{cc}), diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 10e6dc4618..ca9859bc37 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -1200,8 +1200,12 @@ pub const RegisterList = struct { return @intCast(self.bitset.count()); } - pub fn size(self: Self) i32 { - return @intCast(self.bitset.count() * 8); + pub fn size(self: Self, target: *const std.Target) i32 { + return @intCast(self.bitset.count() * @as(u4, switch (target.cpu.arch) { + else => unreachable, + .x86 => 4, + .x86_64 => 8, + })); } }; diff --git a/src/arch/x86_64/abi.zig b/src/arch/x86_64/abi.zig index 3d710c426a..750ea99706 100644 --- a/src/arch/x86_64/abi.zig +++ b/src/arch/x86_64/abi.zig @@ -408,6 +408,31 @@ fn classifySystemVUnion( return starting_byte_offset + loaded_union.sizeUnordered(ip); } +pub const zigcc = struct { + pub const stack_align: ?InternPool.Alignment = null; + pub const return_in_regs = true; + pub const params_in_regs = true; + + const volatile_gpr = gp_regs.len - 5; + const volatile_x87 = x87_regs.len - 1; + const volatile_sse = sse_avx_regs.len; + + /// Note that .rsp and .rbp also belong to this set, however, we never expect to use them + /// for anything else but stack offset tracking therefore we exclude them from this set. + pub const callee_preserved_regs = gp_regs[volatile_gpr..] ++ x87_regs[volatile_x87 .. x87_regs.len - 1] ++ sse_avx_regs[volatile_sse..]; + /// These registers need to be preserved (saved on the stack) and restored by the caller before + /// the caller relinquishes control to a subroutine via call instruction (or similar). + /// In other words, these registers are free to use by the callee. + pub const caller_preserved_regs = gp_regs[0..volatile_gpr] ++ x87_regs[0..volatile_x87] ++ sse_avx_regs[0..volatile_sse]; + + const int_param_regs = gp_regs[0 .. volatile_gpr - 1]; + const x87_param_regs = x87_regs[0..volatile_x87]; + const sse_param_regs = sse_avx_regs[0..volatile_sse]; + const int_return_regs = gp_regs[0..volatile_gpr]; + const x87_return_regs = x87_regs[0..volatile_x87]; + const sse_return_regs = sse_avx_regs[0..volatile_gpr]; +}; + pub const SysV = struct { /// Note that .rsp and .rbp also belong to this set, however, we never expect to use them /// for anything else but stack offset tracking therefore we exclude them from this set. @@ -418,8 +443,10 @@ pub const SysV = struct { pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8, .r9, .r10, .r11 } ++ x87_regs ++ sse_avx_regs; pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 }; + pub const c_abi_x87_param_regs = x87_regs[0..0].*; pub const c_abi_sse_param_regs = sse_avx_regs[0..8].*; pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx }; + pub const c_abi_x87_return_regs = x87_regs[0..2].*; pub const c_abi_sse_return_regs = sse_avx_regs[0..4].*; }; @@ -433,72 +460,90 @@ pub const Win64 = struct { pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .r8, .r9, .r10, .r11 } ++ x87_regs ++ sse_avx_regs; pub const c_abi_int_param_regs = [_]Register{ .rcx, .rdx, .r8, .r9 }; + pub const c_abi_x87_param_regs = x87_regs[0..0].*; pub const c_abi_sse_param_regs = sse_avx_regs[0..4].*; pub const c_abi_int_return_regs = [_]Register{.rax}; + pub const c_abi_x87_return_regs = x87_regs[0..0].*; pub const c_abi_sse_return_regs = sse_avx_regs[0..1].*; }; -pub fn resolveCallingConvention( - cc: std.builtin.CallingConvention, - target: std.Target, -) std.builtin.CallingConvention { - return switch (cc) { - .auto => switch (target.os.tag) { - else => .{ .x86_64_sysv = .{} }, - .windows => .{ .x86_64_win = .{} }, - }, - else => cc, - }; -} - -pub fn getCalleePreservedRegs(cc: std.builtin.CallingConvention) []const Register { +pub fn getCalleePreservedRegs(cc: std.builtin.CallingConvention.Tag) []const Register { return switch (cc) { + .auto => zigcc.callee_preserved_regs, .x86_64_sysv => &SysV.callee_preserved_regs, .x86_64_win => &Win64.callee_preserved_regs, else => unreachable, }; } -pub fn getCallerPreservedRegs(cc: std.builtin.CallingConvention) []const Register { +pub fn getCallerPreservedRegs(cc: std.builtin.CallingConvention.Tag) []const Register { return switch (cc) { + .auto => zigcc.caller_preserved_regs, .x86_64_sysv => &SysV.caller_preserved_regs, .x86_64_win => &Win64.caller_preserved_regs, else => unreachable, }; } -pub fn getCAbiIntParamRegs(cc: std.builtin.CallingConvention) []const Register { +pub fn getCAbiIntParamRegs(cc: std.builtin.CallingConvention.Tag) []const Register { return switch (cc) { + .auto => zigcc.int_param_regs, .x86_64_sysv => &SysV.c_abi_int_param_regs, .x86_64_win => &Win64.c_abi_int_param_regs, else => unreachable, }; } -pub fn getCAbiSseParamRegs(cc: std.builtin.CallingConvention) []const Register { +pub fn getCAbiX87ParamRegs(cc: std.builtin.CallingConvention.Tag) []const Register { return switch (cc) { + .auto => zigcc.x87_param_regs, + .x86_64_sysv => &SysV.c_abi_x87_param_regs, + .x86_64_win => &Win64.c_abi_x87_param_regs, + else => unreachable, + }; +} + +pub fn getCAbiSseParamRegs(cc: std.builtin.CallingConvention.Tag) []const Register { + return switch (cc) { + .auto => zigcc.sse_param_regs, .x86_64_sysv => &SysV.c_abi_sse_param_regs, .x86_64_win => &Win64.c_abi_sse_param_regs, else => unreachable, }; } -pub fn getCAbiIntReturnRegs(cc: std.builtin.CallingConvention) []const Register { +pub fn getCAbiIntReturnRegs(cc: std.builtin.CallingConvention.Tag) []const Register { return switch (cc) { + .auto => zigcc.int_return_regs, .x86_64_sysv => &SysV.c_abi_int_return_regs, .x86_64_win => &Win64.c_abi_int_return_regs, else => unreachable, }; } -pub fn getCAbiSseReturnRegs(cc: std.builtin.CallingConvention) []const Register { +pub fn getCAbiX87ReturnRegs(cc: std.builtin.CallingConvention.Tag) []const Register { return switch (cc) { + .auto => zigcc.x87_return_regs, + .x86_64_sysv => &SysV.c_abi_x87_return_regs, + .x86_64_win => &Win64.c_abi_x87_return_regs, + else => unreachable, + }; +} + +pub fn getCAbiSseReturnRegs(cc: std.builtin.CallingConvention.Tag) []const Register { + return switch (cc) { + .auto => zigcc.sse_return_regs, .x86_64_sysv => &SysV.c_abi_sse_return_regs, .x86_64_win => &Win64.c_abi_sse_return_regs, else => unreachable, }; } +pub fn getCAbiLinkerScratchReg(cc: std.builtin.CallingConvention.Tag) Register { + const int_return_regs = getCAbiIntReturnRegs(cc); + return int_return_regs[int_return_regs.len - 1]; +} + const gp_regs = [_]Register{ .rax, .rdx, .rbx, .rcx, .rsi, .rdi, .r8, .r9, .r10, .r11, .r12, .r13, .r14, .r15, }; From b7acd977896a13d37d3f592627e55d372aeedc6a Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Fri, 3 Jan 2025 03:56:43 -0500 Subject: [PATCH 15/25] x86_64: fix hazards exposed by new calling convention --- src/arch/x86_64/CodeGen.zig | 73 ++++++++++----- src/arch/x86_64/Disassembler.zig | 48 +++++++--- src/arch/x86_64/Encoding.zig | 32 +++++-- src/arch/x86_64/encoder.zig | 19 ++-- src/arch/x86_64/encodings.zig | 156 +++++++++++++++---------------- 5 files changed, 196 insertions(+), 132 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 369dfe2d9f..aaf7847384 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -22303,8 +22303,8 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C .off = -dst_reg_off.off, } }, }, opts), - inline .register_pair, .register_triple, .register_quadruple => |dst_regs| { - const src_info: ?struct { addr_reg: Register, addr_lock: RegisterLock } = switch (src_mcv) { + inline .register_pair, .register_triple, .register_quadruple => |dst_regs, dst_tag| { + const src_info: ?struct { addr_reg: Register, addr_lock: RegisterLock } = src_info: switch (src_mcv) { .register => |src_reg| switch (dst_regs[0].class()) { .general_purpose => switch (src_reg.class()) { else => unreachable, @@ -22329,43 +22329,66 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C }, else => unreachable, }, - .register_pair, .memory, .indirect, .load_frame => null, - .load_symbol, .load_direct, .load_got, .load_tlv => src: { + dst_tag => |src_regs| { + var hazard_regs = src_regs; + for (dst_regs, &hazard_regs, 1..) |dst_reg, src_reg, hazard_index| { + const dst_id = dst_reg.id(); + if (dst_id == src_reg.id()) continue; + var mir_tag: Mir.Inst.Tag = .mov; + for (hazard_regs[hazard_index..]) |*hazard_reg| { + if (dst_id != hazard_reg.id()) continue; + mir_tag = .xchg; + hazard_reg.* = src_reg; + } + try self.asmRegisterRegister(.{ ._, mir_tag }, dst_reg.to64(), src_reg.to64()); + } + return; + }, + .memory, .indirect, .load_frame => null, + .load_symbol, .load_direct, .load_got, .load_tlv => { const src_addr_reg = (try self.register_manager.allocReg(null, abi.RegisterClass.gp)).to64(); const src_addr_lock = self.register_manager.lockRegAssumeUnused(src_addr_reg); errdefer self.register_manager.unlockReg(src_addr_lock); try self.genSetReg(src_addr_reg, .usize, src_mcv.address(), opts); - break :src .{ .addr_reg = src_addr_reg, .addr_lock = src_addr_lock }; + break :src_info .{ .addr_reg = src_addr_reg, .addr_lock = src_addr_lock }; }, - .air_ref => |src_ref| return self.genCopy( - ty, - dst_mcv, - try self.resolveInst(src_ref), - opts, - ), + .air_ref => |src_ref| return self.genCopy(ty, dst_mcv, try self.resolveInst(src_ref), opts), else => return self.fail("TODO implement genCopy for {s} of {}", .{ @tagName(src_mcv), ty.fmt(pt), }), }; defer if (src_info) |info| self.register_manager.unlockReg(info.addr_lock); - var part_disp: i32 = 0; - for (dst_regs, try self.splitType(dst_regs.len, ty), 0..) |dst_reg, dst_ty, part_i| { - try self.genSetReg(dst_reg, dst_ty, switch (src_mcv) { - inline .register_pair, - .register_triple, - .register_quadruple, - => |src_regs| .{ .register = src_regs[part_i] }, - .memory, .indirect, .load_frame => src_mcv.address().offset(part_disp).deref(), - .load_symbol, .load_direct, .load_got, .load_tlv => .{ .indirect = .{ - .reg = src_info.?.addr_reg, - .off = part_disp, - } }, + for ([_]bool{ false, true }) |emit_hazard| { + var hazard_count: u3 = 0; + var part_disp: i32 = 0; + for (dst_regs, try self.splitType(dst_regs.len, ty), 0..) |dst_reg, dst_ty, part_i| { + defer part_disp += @intCast(dst_ty.abiSize(pt.zcu)); + const is_hazard = if (src_mcv.getReg()) |src_reg| + dst_reg.id() == src_reg.id() + else if (src_info) |info| + dst_reg.id() == info.addr_reg.id() + else + false; + if (is_hazard) hazard_count += 1; + if (is_hazard != emit_hazard) continue; + try self.genSetReg(dst_reg, dst_ty, switch (src_mcv) { + dst_tag => |src_regs| .{ .register = src_regs[part_i] }, + .memory, .indirect, .load_frame => src_mcv.address().offset(part_disp).deref(), + .load_symbol, .load_direct, .load_got, .load_tlv => .{ .indirect = .{ + .reg = src_info.?.addr_reg, + .off = part_disp, + } }, + else => unreachable, + }, opts); + } + switch (hazard_count) { + 0 => break, + 1 => continue, else => unreachable, - }, opts); - part_disp += @intCast(dst_ty.abiSize(pt.zcu)); + } } }, .indirect => |reg_off| try self.genSetMem( diff --git a/src/arch/x86_64/Disassembler.zig b/src/arch/x86_64/Disassembler.zig index 85f4f3edef..e781a6cdc8 100644 --- a/src/arch/x86_64/Disassembler.zig +++ b/src/arch/x86_64/Disassembler.zig @@ -38,8 +38,36 @@ pub fn next(dis: *Disassembler) Error!?Instruction { const enc = try dis.parseEncoding(prefixes) orelse return error.UnknownOpcode; switch (enc.data.op_en) { - .zo => return inst(enc, .{}), - .d, .i => { + .z => return inst(enc, .{}), + .o => { + const reg_low_enc: u3 = @truncate(dis.code[dis.pos - 1]); + return inst(enc, .{ + .op1 = .{ .reg = parseGpRegister(reg_low_enc, prefixes.rex.b, prefixes.rex, enc.data.ops[0].regBitSize()) }, + }); + }, + .zo => { + const reg_low_enc: u3 = @truncate(dis.code[dis.pos - 1]); + return inst(enc, .{ + .op1 = .{ .reg = enc.data.ops[0].toReg() }, + .op2 = .{ .reg = parseGpRegister(reg_low_enc, prefixes.rex.b, prefixes.rex, enc.data.ops[1].regBitSize()) }, + }); + }, + .oz => { + const reg_low_enc: u3 = @truncate(dis.code[dis.pos - 1]); + return inst(enc, .{ + .op1 = .{ .reg = parseGpRegister(reg_low_enc, prefixes.rex.b, prefixes.rex, enc.data.ops[0].regBitSize()) }, + .op2 = .{ .reg = enc.data.ops[1].toReg() }, + }); + }, + .oi => { + const reg_low_enc: u3 = @truncate(dis.code[dis.pos - 1]); + const imm = try dis.parseImm(enc.data.ops[1]); + return inst(enc, .{ + .op1 = .{ .reg = parseGpRegister(reg_low_enc, prefixes.rex.b, prefixes.rex, enc.data.ops[0].regBitSize()) }, + .op2 = .{ .imm = imm }, + }); + }, + .i, .d => { const imm = try dis.parseImm(enc.data.ops[0]); return inst(enc, .{ .op1 = .{ .imm = imm }, @@ -48,20 +76,10 @@ pub fn next(dis: *Disassembler) Error!?Instruction { .zi => { const imm = try dis.parseImm(enc.data.ops[1]); return inst(enc, .{ - .op1 = .{ .reg = Register.rax.toBitSize(enc.data.ops[0].regBitSize()) }, + .op1 = .{ .reg = enc.data.ops[0].toReg() }, .op2 = .{ .imm = imm }, }); }, - .o, .oi => { - const reg_low_enc = @as(u3, @truncate(dis.code[dis.pos - 1])); - const op2: Instruction.Operand = if (enc.data.op_en == .oi) .{ - .imm = try dis.parseImm(enc.data.ops[1]), - } else .none; - return inst(enc, .{ - .op1 = .{ .reg = parseGpRegister(reg_low_enc, prefixes.rex.b, prefixes.rex, enc.data.ops[0].regBitSize()) }, - .op2 = op2, - }); - }, .m, .mi, .m1, .mc => { const modrm = try dis.parseModRmByte(); const act_enc = Encoding.findByOpcode(enc.opcode(), .{ @@ -118,7 +136,7 @@ pub fn next(dis: *Disassembler) Error!?Instruction { const seg = segmentRegister(prefixes.legacy); const offset = try dis.parseOffset(); return inst(enc, .{ - .op1 = .{ .reg = Register.rax.toBitSize(enc.data.ops[0].regBitSize()) }, + .op1 = .{ .reg = enc.data.ops[0].toReg() }, .op2 = .{ .mem = Memory.initMoffs(seg, offset) }, }); }, @@ -127,7 +145,7 @@ pub fn next(dis: *Disassembler) Error!?Instruction { const offset = try dis.parseOffset(); return inst(enc, .{ .op1 = .{ .mem = Memory.initMoffs(seg, offset) }, - .op2 = .{ .reg = Register.rax.toBitSize(enc.data.ops[1].regBitSize()) }, + .op2 = .{ .reg = enc.data.ops[1].toReg() }, }); }, .mr, .mri, .mrc => { diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 142fe4745b..9be0f36eda 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -176,9 +176,14 @@ pub fn format( for (opc) |byte| try writer.print("{x:0>2} ", .{byte}); switch (encoding.data.op_en) { - .zo, .fd, .td, .i, .zi, .d => {}, - .o, .oi => { - const tag = switch (encoding.data.ops[0]) { + .z, .fd, .td, .i, .zi, .d => {}, + .o, .zo, .oz, .oi => { + const op = switch (encoding.data.op_en) { + .o, .oz, .oi => encoding.data.ops[0], + .zo => encoding.data.ops[1], + else => unreachable, + }; + const tag = switch (op) { .r8 => "rb", .r16 => "rw", .r32 => "rd", @@ -213,7 +218,7 @@ pub fn format( try writer.print("{s} ", .{tag}); }, .rvmr => try writer.writeAll("/is4 "), - .zo, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rm0, .rvm, .mvr, .rmv => {}, + .z, .fd, .td, .o, .zo, .oz, .m, .m1, .mc, .mr, .rm, .mrc, .rm0, .rvm, .mvr, .rmv => {}, } try writer.print("{s} ", .{@tagName(encoding.mnemonic)}); @@ -455,8 +460,8 @@ pub const Mnemonic = enum { pub const OpEn = enum { // zig fmt: off - zo, - o, oi, + z, + o, zo, oz, oi, i, zi, d, m, fd, td, @@ -575,6 +580,21 @@ pub const Op = enum { }; } + pub fn toReg(op: Op) Register { + return switch (op) { + else => .none, + .al => .al, + .ax => .ax, + .eax => .eax, + .rax => .rax, + .cl => .cl, + .rip => .rip, + .eip => .eip, + .ip => .ip, + .xmm0 => .xmm0, + }; + } + pub fn immBitSize(op: Op) u64 { return switch (op) { .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable, diff --git a/src/arch/x86_64/encoder.zig b/src/arch/x86_64/encoder.zig index bf0c0c0467..b7449c2146 100644 --- a/src/arch/x86_64/encoder.zig +++ b/src/arch/x86_64/encoder.zig @@ -336,7 +336,7 @@ pub const Instruction = struct { .directive => .{ .mnemonic = mnemonic, .data = .{ - .op_en = .zo, + .op_en = .z, .ops = .{ if (ops.len > 0) Encoding.Op.fromOperand(ops[0], target) else .none, if (ops.len > 1) Encoding.Op.fromOperand(ops[1], target) else .none, @@ -401,7 +401,7 @@ pub const Instruction = struct { } switch (data.op_en) { - .zo, .o => {}, + .z, .o, .zo, .oz => {}, .i, .d => try encodeImm(inst.ops[0].imm, data.ops[0], encoder), .zi, .oi => try encodeImm(inst.ops[1].imm, data.ops[1], encoder), .fd => try encoder.imm64(inst.ops[1].mem.moffs.offset), @@ -454,7 +454,8 @@ pub const Instruction = struct { const final = opcode.len - 1; for (opcode[first..final]) |byte| try encoder.opcode_1byte(byte); switch (inst.encoding.data.op_en) { - .o, .oi => try encoder.opcode_withReg(opcode[final], inst.ops[0].reg.lowEnc()), + .o, .oz, .oi => try encoder.opcode_withReg(opcode[final], inst.ops[0].reg.lowEnc()), + .zo => try encoder.opcode_withReg(opcode[final], inst.ops[1].reg.lowEnc()), else => try encoder.opcode_1byte(opcode[final]), } } @@ -480,7 +481,7 @@ pub const Instruction = struct { } const segment_override: ?Register = switch (op_en) { - .zo, .i, .zi, .o, .oi, .d => null, + .z, .i, .zi, .o, .zo, .oz, .oi, .d => null, .fd => inst.ops[1].mem.base().reg, .td => inst.ops[0].mem.base().reg, .rm, .rmi, .rm0 => if (inst.ops[1].isSegmentRegister()) @@ -516,8 +517,9 @@ pub const Instruction = struct { rex.w = inst.encoding.data.mode == .long; switch (op_en) { - .zo, .i, .zi, .fd, .td, .d => {}, - .o, .oi => rex.b = inst.ops[0].reg.isExtended(), + .z, .i, .zi, .fd, .td, .d => {}, + .o, .oz, .oi => rex.b = inst.ops[0].reg.isExtended(), + .zo => rex.b = inst.ops[1].reg.isExtended(), .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .rmv => { const r_op = switch (op_en) { .rm, .rmi, .rm0, .rmv => inst.ops[0], @@ -550,8 +552,9 @@ pub const Instruction = struct { vex.w = inst.encoding.data.mode.isLong(); switch (op_en) { - .zo, .i, .zi, .fd, .td, .d => {}, - .o, .oi => vex.b = inst.ops[0].reg.isExtended(), + .z, .i, .zi, .fd, .td, .d => {}, + .o, .oz, .oi => vex.b = inst.ops[0].reg.isExtended(), + .zo => vex.b = inst.ops[1].reg.isExtended(), .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .vmi, .rvm, .rvmr, .rvmi, .mvr, .rmv => { const r_op = switch (op_en) { .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi, .rmv => inst.ops[0], diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index f6f86cd828..fcb500d0bf 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -124,27 +124,27 @@ pub const table = [_]Entry{ .{ .call, .d, &.{ .rel32 }, &.{ 0xe8 }, 0, .none, .none }, .{ .call, .m, &.{ .rm64 }, &.{ 0xff }, 2, .none, .none }, - .{ .cbw, .zo, &.{ .o16 }, &.{ 0x98 }, 0, .short, .none }, - .{ .cwde, .zo, &.{ .o32 }, &.{ 0x98 }, 0, .none, .none }, - .{ .cdqe, .zo, &.{ .o64 }, &.{ 0x98 }, 0, .long, .none }, + .{ .cbw, .z, &.{ .o16 }, &.{ 0x98 }, 0, .short, .none }, + .{ .cwde, .z, &.{ .o32 }, &.{ 0x98 }, 0, .none, .none }, + .{ .cdqe, .z, &.{ .o64 }, &.{ 0x98 }, 0, .long, .none }, - .{ .cwd, .zo, &.{ .o16 }, &.{ 0x99 }, 0, .short, .none }, - .{ .cdq, .zo, &.{ .o32 }, &.{ 0x99 }, 0, .none, .none }, - .{ .cqo, .zo, &.{ .o64 }, &.{ 0x99 }, 0, .long, .none }, + .{ .cwd, .z, &.{ .o16 }, &.{ 0x99 }, 0, .short, .none }, + .{ .cdq, .z, &.{ .o32 }, &.{ 0x99 }, 0, .none, .none }, + .{ .cqo, .z, &.{ .o64 }, &.{ 0x99 }, 0, .long, .none }, - .{ .clac, .zo, &.{}, &.{ 0x0f, 0x01, 0xca }, 0, .none, .smap }, + .{ .clac, .z, &.{}, &.{ 0x0f, 0x01, 0xca }, 0, .none, .smap }, - .{ .clc, .zo, &.{}, &.{ 0xf8 }, 0, .none, .none }, + .{ .clc, .z, &.{}, &.{ 0xf8 }, 0, .none, .none }, - .{ .cld, .zo, &.{}, &.{ 0xfc }, 0, .none, .none }, + .{ .cld, .z, &.{}, &.{ 0xfc }, 0, .none, .none }, .{ .clflush, .m, &.{ .m8 }, &.{ 0x0f, 0xae }, 7, .none, .none }, - .{ .cli, .zo, &.{}, &.{ 0xfa }, 0, .none, .none }, + .{ .cli, .z, &.{}, &.{ 0xfa }, 0, .none, .none }, - .{ .clts, .zo, &.{}, &.{ 0x0f, 0x06 }, 0, .none, .none }, + .{ .clts, .z, &.{}, &.{ 0x0f, 0x06 }, 0, .none, .none }, - .{ .clui, .zo, &.{}, &.{ 0xf3, 0x0f, 0x01, 0xee }, 0, .none, .uintr }, + .{ .clui, .z, &.{}, &.{ 0xf3, 0x0f, 0x01, 0xee }, 0, .none, .uintr }, .{ .cmova, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .cmov }, .{ .cmova, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .cmov }, @@ -260,15 +260,15 @@ pub const table = [_]Entry{ .{ .cmp, .rm, &.{ .r32, .rm32 }, &.{ 0x3b }, 0, .none, .none }, .{ .cmp, .rm, &.{ .r64, .rm64 }, &.{ 0x3b }, 0, .long, .none }, - .{ .cmps, .zo, &.{ .m8, .m8 }, &.{ 0xa6 }, 0, .none, .none }, - .{ .cmps, .zo, &.{ .m16, .m16 }, &.{ 0xa7 }, 0, .short, .none }, - .{ .cmps, .zo, &.{ .m32, .m32 }, &.{ 0xa7 }, 0, .none, .none }, - .{ .cmps, .zo, &.{ .m64, .m64 }, &.{ 0xa7 }, 0, .long, .none }, + .{ .cmps, .z, &.{ .m8, .m8 }, &.{ 0xa6 }, 0, .none, .none }, + .{ .cmps, .z, &.{ .m16, .m16 }, &.{ 0xa7 }, 0, .short, .none }, + .{ .cmps, .z, &.{ .m32, .m32 }, &.{ 0xa7 }, 0, .none, .none }, + .{ .cmps, .z, &.{ .m64, .m64 }, &.{ 0xa7 }, 0, .long, .none }, - .{ .cmpsb, .zo, &.{}, &.{ 0xa6 }, 0, .none, .none }, - .{ .cmpsw, .zo, &.{}, &.{ 0xa7 }, 0, .short, .none }, - .{ .cmpsd, .zo, &.{}, &.{ 0xa7 }, 0, .none, .none }, - .{ .cmpsq, .zo, &.{}, &.{ 0xa7 }, 0, .long, .none }, + .{ .cmpsb, .z, &.{}, &.{ 0xa6 }, 0, .none, .none }, + .{ .cmpsw, .z, &.{}, &.{ 0xa7 }, 0, .short, .none }, + .{ .cmpsd, .z, &.{}, &.{ 0xa7 }, 0, .none, .none }, + .{ .cmpsq, .z, &.{}, &.{ 0xa7 }, 0, .long, .none }, .{ .cmpxchg, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xb0 }, 0, .none, .none }, .{ .cmpxchg, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xb0 }, 0, .rex, .none }, @@ -279,7 +279,7 @@ pub const table = [_]Entry{ .{ .cmpxchg8b, .m, &.{ .m64 }, &.{ 0x0f, 0xc7 }, 1, .none, .none }, .{ .cmpxchg16b, .m, &.{ .m128 }, &.{ 0x0f, 0xc7 }, 1, .long, .none }, - .{ .cpuid, .zo, &.{}, &.{ 0x0f, 0xa2 }, 0, .none, .none }, + .{ .cpuid, .z, &.{}, &.{ 0x0f, 0xa2 }, 0, .none, .none }, .{ .dec, .m, &.{ .rm8 }, &.{ 0xfe }, 1, .none, .none }, .{ .dec, .m, &.{ .rm8 }, &.{ 0xfe }, 1, .rex, .none }, @@ -320,7 +320,7 @@ pub const table = [_]Entry{ .{ .inc, .m, &.{ .rm32 }, &.{ 0xff }, 0, .none, .none }, .{ .inc, .m, &.{ .rm64 }, &.{ 0xff }, 0, .long, .none }, - .{ .int3, .zo, &.{}, &.{ 0xcc }, 0, .none, .none }, + .{ .int3, .z, &.{}, &.{ 0xcc }, 0, .none, .none }, .{ .ja, .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none, .none }, .{ .jae, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none, .none }, @@ -361,23 +361,23 @@ pub const table = [_]Entry{ .{ .lea, .rm, &.{ .r32, .m }, &.{ 0x8d }, 0, .none, .none }, .{ .lea, .rm, &.{ .r64, .m }, &.{ 0x8d }, 0, .long, .none }, - .{ .lfence, .zo, &.{}, &.{ 0x0f, 0xae, 0xe8 }, 0, .none, .none }, + .{ .lfence, .z, &.{}, &.{ 0x0f, 0xae, 0xe8 }, 0, .none, .none }, - .{ .lods, .zo, &.{ .m8 }, &.{ 0xac }, 0, .none, .none }, - .{ .lods, .zo, &.{ .m16 }, &.{ 0xad }, 0, .short, .none }, - .{ .lods, .zo, &.{ .m32 }, &.{ 0xad }, 0, .none, .none }, - .{ .lods, .zo, &.{ .m64 }, &.{ 0xad }, 0, .long, .none }, + .{ .lods, .z, &.{ .m8 }, &.{ 0xac }, 0, .none, .none }, + .{ .lods, .z, &.{ .m16 }, &.{ 0xad }, 0, .short, .none }, + .{ .lods, .z, &.{ .m32 }, &.{ 0xad }, 0, .none, .none }, + .{ .lods, .z, &.{ .m64 }, &.{ 0xad }, 0, .long, .none }, - .{ .lodsb, .zo, &.{}, &.{ 0xac }, 0, .none, .none }, - .{ .lodsw, .zo, &.{}, &.{ 0xad }, 0, .short, .none }, - .{ .lodsd, .zo, &.{}, &.{ 0xad }, 0, .none, .none }, - .{ .lodsq, .zo, &.{}, &.{ 0xad }, 0, .long, .none }, + .{ .lodsb, .z, &.{}, &.{ 0xac }, 0, .none, .none }, + .{ .lodsw, .z, &.{}, &.{ 0xad }, 0, .short, .none }, + .{ .lodsd, .z, &.{}, &.{ 0xad }, 0, .none, .none }, + .{ .lodsq, .z, &.{}, &.{ 0xad }, 0, .long, .none }, .{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .short, .lzcnt }, .{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none, .lzcnt }, .{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long, .lzcnt }, - .{ .mfence, .zo, &.{}, &.{ 0x0f, 0xae, 0xf0 }, 0, .none, .none }, + .{ .mfence, .z, &.{}, &.{ 0x0f, 0xae, 0xf0 }, 0, .none, .none }, .{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .none, .none }, .{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .rex, .none }, @@ -421,15 +421,15 @@ pub const table = [_]Entry{ .{ .movbe, .mr, &.{ .m32, .r32 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none, .movbe }, .{ .movbe, .mr, &.{ .m64, .r64 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .long, .movbe }, - .{ .movs, .zo, &.{ .m8, .m8 }, &.{ 0xa4 }, 0, .none, .none }, - .{ .movs, .zo, &.{ .m16, .m16 }, &.{ 0xa5 }, 0, .short, .none }, - .{ .movs, .zo, &.{ .m32, .m32 }, &.{ 0xa5 }, 0, .none, .none }, - .{ .movs, .zo, &.{ .m64, .m64 }, &.{ 0xa5 }, 0, .long, .none }, + .{ .movs, .z, &.{ .m8, .m8 }, &.{ 0xa4 }, 0, .none, .none }, + .{ .movs, .z, &.{ .m16, .m16 }, &.{ 0xa5 }, 0, .short, .none }, + .{ .movs, .z, &.{ .m32, .m32 }, &.{ 0xa5 }, 0, .none, .none }, + .{ .movs, .z, &.{ .m64, .m64 }, &.{ 0xa5 }, 0, .long, .none }, - .{ .movsb, .zo, &.{}, &.{ 0xa4 }, 0, .none, .none }, - .{ .movsw, .zo, &.{}, &.{ 0xa5 }, 0, .short, .none }, - .{ .movsd, .zo, &.{}, &.{ 0xa5 }, 0, .none, .none }, - .{ .movsq, .zo, &.{}, &.{ 0xa5 }, 0, .long, .none }, + .{ .movsb, .z, &.{}, &.{ 0xa4 }, 0, .none, .none }, + .{ .movsw, .z, &.{}, &.{ 0xa5 }, 0, .short, .none }, + .{ .movsd, .z, &.{}, &.{ 0xa5 }, 0, .none, .none }, + .{ .movsq, .z, &.{}, &.{ 0xa5 }, 0, .long, .none }, .{ .movsx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xbe }, 0, .short, .none }, .{ .movsx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xbe }, 0, .rex_short, .none }, @@ -465,7 +465,7 @@ pub const table = [_]Entry{ .{ .neg, .m, &.{ .rm32 }, &.{ 0xf7 }, 3, .none, .none }, .{ .neg, .m, &.{ .rm64 }, &.{ 0xf7 }, 3, .long, .none }, - .{ .nop, .zo, &.{}, &.{ 0x90 }, 0, .none, .none }, + .{ .nop, .z, &.{}, &.{ 0x90 }, 0, .none, .none }, .{ .not, .m, &.{ .rm8 }, &.{ 0xf6 }, 2, .none, .none }, .{ .not, .m, &.{ .rm8 }, &.{ 0xf6 }, 2, .rex, .none }, @@ -496,7 +496,7 @@ pub const table = [_]Entry{ .{ .@"or", .rm, &.{ .r32, .rm32 }, &.{ 0x0b }, 0, .none, .none }, .{ .@"or", .rm, &.{ .r64, .rm64 }, &.{ 0x0b }, 0, .long, .none }, - .{ .pause, .zo, &.{}, &.{ 0xf3, 0x90 }, 0, .none, .none }, + .{ .pause, .z, &.{}, &.{ 0xf3, 0x90 }, 0, .none, .none }, .{ .pop, .o, &.{ .r16 }, &.{ 0x58 }, 0, .short, .none }, .{ .pop, .o, &.{ .r64 }, &.{ 0x58 }, 0, .none, .none }, @@ -507,7 +507,7 @@ pub const table = [_]Entry{ .{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none, .popcnt }, .{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long, .popcnt }, - .{ .popfq, .zo, &.{}, &.{ 0x9d }, 0, .none, .none }, + .{ .popfq, .z, &.{}, &.{ 0x9d }, 0, .none, .none }, .{ .push, .o, &.{ .r16 }, &.{ 0x50 }, 0, .short, .none }, .{ .push, .o, &.{ .r64 }, &.{ 0x50 }, 0, .none, .none }, @@ -517,9 +517,9 @@ pub const table = [_]Entry{ .{ .push, .i, &.{ .imm16 }, &.{ 0x68 }, 0, .short, .none }, .{ .push, .i, &.{ .imm32 }, &.{ 0x68 }, 0, .none, .none }, - .{ .pushfq, .zo, &.{}, &.{ 0x9c }, 0, .none, .none }, + .{ .pushfq, .z, &.{}, &.{ 0x9c }, 0, .none, .none }, - .{ .ret, .zo, &.{}, &.{ 0xc3 }, 0, .none, .none }, + .{ .ret, .z, &.{}, &.{ 0xc3 }, 0, .none, .none }, .{ .rcl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 2, .none, .none }, .{ .rcl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 2, .rex, .none }, @@ -640,15 +640,15 @@ pub const table = [_]Entry{ .{ .sbb, .rm, &.{ .r32, .rm32 }, &.{ 0x1b }, 0, .none, .none }, .{ .sbb, .rm, &.{ .r64, .rm64 }, &.{ 0x1b }, 0, .long, .none }, - .{ .scas, .zo, &.{ .m8 }, &.{ 0xae }, 0, .none, .none }, - .{ .scas, .zo, &.{ .m16 }, &.{ 0xaf }, 0, .short, .none }, - .{ .scas, .zo, &.{ .m32 }, &.{ 0xaf }, 0, .none, .none }, - .{ .scas, .zo, &.{ .m64 }, &.{ 0xaf }, 0, .long, .none }, + .{ .scas, .z, &.{ .m8 }, &.{ 0xae }, 0, .none, .none }, + .{ .scas, .z, &.{ .m16 }, &.{ 0xaf }, 0, .short, .none }, + .{ .scas, .z, &.{ .m32 }, &.{ 0xaf }, 0, .none, .none }, + .{ .scas, .z, &.{ .m64 }, &.{ 0xaf }, 0, .long, .none }, - .{ .scasb, .zo, &.{}, &.{ 0xae }, 0, .none, .none }, - .{ .scasw, .zo, &.{}, &.{ 0xaf }, 0, .short, .none }, - .{ .scasd, .zo, &.{}, &.{ 0xaf }, 0, .none, .none }, - .{ .scasq, .zo, &.{}, &.{ 0xaf }, 0, .long, .none }, + .{ .scasb, .z, &.{}, &.{ 0xae }, 0, .none, .none }, + .{ .scasw, .z, &.{}, &.{ 0xaf }, 0, .short, .none }, + .{ .scasd, .z, &.{}, &.{ 0xaf }, 0, .none, .none }, + .{ .scasq, .z, &.{}, &.{ 0xaf }, 0, .long, .none }, .{ .seta, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .none, .none }, .{ .seta, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .rex, .none }, @@ -711,7 +711,7 @@ pub const table = [_]Entry{ .{ .setz, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .none, .none }, .{ .setz, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .rex, .none }, - .{ .sfence, .zo, &.{}, &.{ 0x0f, 0xae, 0xf8 }, 0, .none, .none }, + .{ .sfence, .z, &.{}, &.{ 0x0f, 0xae, 0xf8 }, 0, .none, .none }, .{ .shl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .none, .none }, .{ .shl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .rex, .none }, @@ -759,25 +759,25 @@ pub const table = [_]Entry{ .{ .shrd, .mrc, &.{ .rm32, .r32, .cl }, &.{ 0x0f, 0xad }, 0, .none, .none }, .{ .shrd, .mrc, &.{ .rm64, .r64, .cl }, &.{ 0x0f, 0xad }, 0, .long, .none }, - .{ .stac, .zo, &.{}, &.{ 0x0f, 0x01, 0xcb }, 0, .none, .smap }, + .{ .stac, .z, &.{}, &.{ 0x0f, 0x01, 0xcb }, 0, .none, .smap }, - .{ .stc, .zo, &.{}, &.{ 0xf9 }, 0, .none, .none }, + .{ .stc, .z, &.{}, &.{ 0xf9 }, 0, .none, .none }, - .{ .std, .zo, &.{}, &.{ 0xfd }, 0, .none, .none }, + .{ .std, .z, &.{}, &.{ 0xfd }, 0, .none, .none }, - .{ .sti, .zo, &.{}, &.{ 0xfb }, 0, .none, .none }, + .{ .sti, .z, &.{}, &.{ 0xfb }, 0, .none, .none }, - .{ .stui, .zo, &.{}, &.{ 0xf3, 0x0f, 0x01, 0xef }, 0, .none, .uintr }, + .{ .stui, .z, &.{}, &.{ 0xf3, 0x0f, 0x01, 0xef }, 0, .none, .uintr }, - .{ .stos, .zo, &.{ .m8 }, &.{ 0xaa }, 0, .none, .none }, - .{ .stos, .zo, &.{ .m16 }, &.{ 0xab }, 0, .short, .none }, - .{ .stos, .zo, &.{ .m32 }, &.{ 0xab }, 0, .none, .none }, - .{ .stos, .zo, &.{ .m64 }, &.{ 0xab }, 0, .long, .none }, + .{ .stos, .z, &.{ .m8 }, &.{ 0xaa }, 0, .none, .none }, + .{ .stos, .z, &.{ .m16 }, &.{ 0xab }, 0, .short, .none }, + .{ .stos, .z, &.{ .m32 }, &.{ 0xab }, 0, .none, .none }, + .{ .stos, .z, &.{ .m64 }, &.{ 0xab }, 0, .long, .none }, - .{ .stosb, .zo, &.{}, &.{ 0xaa }, 0, .none, .none }, - .{ .stosw, .zo, &.{}, &.{ 0xab }, 0, .short, .none }, - .{ .stosd, .zo, &.{}, &.{ 0xab }, 0, .none, .none }, - .{ .stosq, .zo, &.{}, &.{ 0xab }, 0, .long, .none }, + .{ .stosb, .z, &.{}, &.{ 0xaa }, 0, .none, .none }, + .{ .stosw, .z, &.{}, &.{ 0xab }, 0, .short, .none }, + .{ .stosd, .z, &.{}, &.{ 0xab }, 0, .none, .none }, + .{ .stosq, .z, &.{}, &.{ 0xab }, 0, .long, .none }, .{ .sub, .zi, &.{ .al, .imm8 }, &.{ 0x2c }, 0, .none, .none }, .{ .sub, .zi, &.{ .ax, .imm16 }, &.{ 0x2d }, 0, .short, .none }, @@ -802,7 +802,7 @@ pub const table = [_]Entry{ .{ .sub, .rm, &.{ .r32, .rm32 }, &.{ 0x2b }, 0, .none, .none }, .{ .sub, .rm, &.{ .r64, .rm64 }, &.{ 0x2b }, 0, .long, .none }, - .{ .syscall, .zo, &.{}, &.{ 0x0f, 0x05 }, 0, .none, .none }, + .{ .syscall, .z, &.{}, &.{ 0x0f, 0x05 }, 0, .none, .none }, .{ .@"test", .zi, &.{ .al, .imm8 }, &.{ 0xa8 }, 0, .none, .none }, .{ .@"test", .zi, &.{ .ax, .imm16 }, &.{ 0xa9 }, 0, .short, .none }, @@ -823,7 +823,7 @@ pub const table = [_]Entry{ .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .bmi }, .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long, .bmi }, - .{ .ud2, .zo, &.{}, &.{ 0x0f, 0x0b }, 0, .none, .none }, + .{ .ud2, .z, &.{}, &.{ 0x0f, 0x0b }, 0, .none, .none }, .{ .xadd, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xc0 }, 0, .none, .none }, .{ .xadd, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xc0 }, 0, .rex, .none }, @@ -831,12 +831,12 @@ pub const table = [_]Entry{ .{ .xadd, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xc1 }, 0, .none, .none }, .{ .xadd, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xc1 }, 0, .long, .none }, - .{ .xchg, .o, &.{ .ax, .r16 }, &.{ 0x90 }, 0, .short, .none }, - .{ .xchg, .o, &.{ .r16, .ax }, &.{ 0x90 }, 0, .short, .none }, - .{ .xchg, .o, &.{ .eax, .r32 }, &.{ 0x90 }, 0, .none, .none }, - .{ .xchg, .o, &.{ .rax, .r64 }, &.{ 0x90 }, 0, .long, .none }, - .{ .xchg, .o, &.{ .r32, .eax }, &.{ 0x90 }, 0, .none, .none }, - .{ .xchg, .o, &.{ .r64, .rax }, &.{ 0x90 }, 0, .long, .none }, + .{ .xchg, .zo, &.{ .ax, .r16 }, &.{ 0x90 }, 0, .short, .none }, + .{ .xchg, .oz, &.{ .r16, .ax }, &.{ 0x90 }, 0, .short, .none }, + .{ .xchg, .zo, &.{ .eax, .r32 }, &.{ 0x90 }, 0, .none, .none }, + .{ .xchg, .zo, &.{ .rax, .r64 }, &.{ 0x90 }, 0, .long, .none }, + .{ .xchg, .oz, &.{ .r32, .eax }, &.{ 0x90 }, 0, .none, .none }, + .{ .xchg, .oz, &.{ .r64, .rax }, &.{ 0x90 }, 0, .long, .none }, .{ .xchg, .mr, &.{ .rm8, .r8 }, &.{ 0x86 }, 0, .none, .none }, .{ .xchg, .mr, &.{ .rm8, .r8 }, &.{ 0x86 }, 0, .rex, .none }, .{ .xchg, .rm, &.{ .r8, .rm8 }, &.{ 0x86 }, 0, .none, .none }, @@ -848,7 +848,7 @@ pub const table = [_]Entry{ .{ .xchg, .rm, &.{ .r32, .rm32 }, &.{ 0x87 }, 0, .none, .none }, .{ .xchg, .rm, &.{ .r64, .rm64 }, &.{ 0x87 }, 0, .long, .none }, - .{ .xgetbv, .zo, &.{}, &.{ 0x0f, 0x01, 0xd0 }, 0, .none, .none }, + .{ .xgetbv, .z, &.{}, &.{ 0x0f, 0x01, 0xd0 }, 0, .none, .none }, .{ .xor, .zi, &.{ .al, .imm8 }, &.{ 0x34 }, 0, .none, .none }, .{ .xor, .zi, &.{ .ax, .imm16 }, &.{ 0x35 }, 0, .short, .none }, @@ -874,9 +874,9 @@ pub const table = [_]Entry{ .{ .xor, .rm, &.{ .r64, .rm64 }, &.{ 0x33 }, 0, .long, .none }, // X87 - .{ .fabs, .zo, &.{}, &.{ 0xd9, 0xe1 }, 0, .none, .x87 }, + .{ .fabs, .z, &.{}, &.{ 0xd9, 0xe1 }, 0, .none, .x87 }, - .{ .fchs, .zo, &.{}, &.{ 0xd9, 0xe0 }, 0, .none, .x87 }, + .{ .fchs, .z, &.{}, &.{ 0xd9, 0xe0 }, 0, .none, .x87 }, .{ .ffree, .o, &.{ .st }, &.{ 0xdd, 0xc0 }, 0, .none, .x87 }, From 094ac8c3dc7945c8452931aed28d6e0b3e26769b Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Fri, 3 Jan 2025 05:44:03 -0500 Subject: [PATCH 16/25] x86_64: fix f16 miscomp exposed by new calling convention --- src/arch/x86_64/CodeGen.zig | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index aaf7847384..40d62102a8 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -17443,21 +17443,22 @@ fn genBinOp( .float => switch (lhs_ty.floatBits(self.target.*)) { 16 => { assert(self.hasFeature(.f16c)); - const tmp_reg = - (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128(); + const lhs_reg = if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); + + const tmp_reg = (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128(); const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( .{ .vp_w, .insr }, dst_reg, - dst_reg, + lhs_reg, try src_mcv.mem(self, .{ .size = .word }), .u(1), ) else try self.asmRegisterRegisterRegister( .{ .vp_, .unpcklwd }, dst_reg, - dst_reg, + lhs_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? else @@ -17885,25 +17886,24 @@ fn genBinOp( .float => switch (lhs_ty.childType(zcu).floatBits(self.target.*)) { 16 => tag: { assert(self.hasFeature(.f16c)); + const lhs_reg = if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); switch (lhs_ty.vectorLen(zcu)) { 1 => { - const tmp_reg = (try self.register_manager.allocReg( - null, - abi.RegisterClass.sse, - )).to128(); + const tmp_reg = + (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128(); const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( .{ .vp_w, .insr }, dst_reg, - dst_reg, + lhs_reg, try src_mcv.mem(self, .{ .size = .word }), .u(1), ) else try self.asmRegisterRegisterRegister( .{ .vp_, .unpcklwd }, dst_reg, - dst_reg, + lhs_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? else @@ -17941,15 +17941,16 @@ fn genBinOp( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate( + if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( .{ .vp_d, .insr }, dst_reg, + lhs_reg, try src_mcv.mem(self, .{ .size = .dword }), .u(1), ) else try self.asmRegisterRegisterRegister( .{ .v_ps, .unpckl }, dst_reg, - dst_reg, + lhs_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? else @@ -17992,7 +17993,7 @@ fn genBinOp( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, lhs_reg); if (src_mcv.isBase()) try self.asmRegisterMemory( .{ .v_ps, .cvtph2 }, tmp_reg, @@ -18035,7 +18036,7 @@ fn genBinOp( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg.to256(), dst_reg); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg.to256(), lhs_reg); if (src_mcv.isBase()) try self.asmRegisterMemory( .{ .v_ps, .cvtph2 }, tmp_reg, @@ -18196,8 +18197,7 @@ fn genBinOp( switch (mir_tag[1]) { else => if (self.hasFeature(.avx)) { - const lhs_reg = - if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); + const lhs_reg = if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); if (src_mcv.isBase()) try self.asmRegisterRegisterMemory( mir_tag, dst_reg, From 870443f7fabefa753a1653a275172a4a04fe8c34 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Fri, 3 Jan 2025 07:12:01 -0500 Subject: [PATCH 17/25] x86_64: implement passing undefined as a call arg with the new cc --- src/arch/x86_64/CodeGen.zig | 67 +++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 40d62102a8..2f37d5eee5 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -4004,10 +4004,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }); try slot.moveTo(inst, cg); }, - .call => try cg.airCall(inst, .auto), - .call_always_tail => try cg.airCall(inst, .always_tail), - .call_never_tail => try cg.airCall(inst, .never_tail), - .call_never_inline => try cg.airCall(inst, .never_inline), + .call => try cg.airCall(inst, .auto, .{ .safety = true }), + .call_always_tail => try cg.airCall(inst, .always_tail, .{ .safety = true }), + .call_never_tail => try cg.airCall(inst, .never_tail, .{ .safety = true }), + .call_never_inline => try cg.airCall(inst, .never_inline, .{ .safety = true }), .clz => |air_tag| if (use_old) try cg.airClz(inst) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; @@ -10136,7 +10136,7 @@ fn airFptrunc(self: *CodeGen, inst: Air.Inst.Index) !void { floatCompilerRtAbiName(src_bits), floatCompilerRtAbiName(dst_bits), }) catch unreachable, - } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }}); + } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }}, .{}); } const src_mcv = try self.resolveInst(ty_op.operand); @@ -10240,7 +10240,7 @@ fn airFpext(self: *CodeGen, inst: Air.Inst.Index) !void { floatCompilerRtAbiName(src_bits), floatCompilerRtAbiName(dst_bits), }) catch unreachable, - } }, &.{src_scalar_ty}, &.{.{ .air_ref = ty_op.operand }}); + } }, &.{src_scalar_ty}, &.{.{ .air_ref = ty_op.operand }}, .{}); } const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu)); @@ -10969,6 +10969,7 @@ fn airMulDivBinOp(self: *CodeGen, inst: Air.Inst.Index) !void { } }, &.{ src_ty, src_ty }, &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs } }, + .{}, ); break :result if (signed) switch (tag) { .div_floor => { @@ -10998,6 +10999,7 @@ fn airMulDivBinOp(self: *CodeGen, inst: Air.Inst.Index) !void { } }, &.{ src_ty, src_ty }, &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs } }, + .{}, ); try self.asmRegisterMemory( .{ ._, .sub }, @@ -11247,7 +11249,7 @@ fn airMulSat(self: *CodeGen, inst: Air.Inst.Index) !void { .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs }, overflow.address(), - }); + }, .{}); const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_mcv.register_pair); defer for (dst_locks) |lock| self.register_manager.unlockReg(lock); @@ -11748,7 +11750,7 @@ fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs }, overflow.address(), - }); + }, .{}); const dst_mcv = try self.allocRegOrMem(inst, false); try self.genSetMem( @@ -14413,7 +14415,7 @@ fn genRoundLibcall(self: *CodeGen, ty: Type, src_mcv: MCValue, mode: RoundMode) }, floatLibcAbiSuffix(ty), }) catch unreachable, - } }, &.{ty}, &.{src_mcv}); + } }, &.{ty}, &.{src_mcv}, .{}); } fn genRound(self: *CodeGen, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: RoundMode) !void { @@ -14684,7 +14686,7 @@ fn airSqrt(self: *CodeGen, inst: Air.Inst.Index) !void { floatLibcAbiPrefix(ty), floatLibcAbiSuffix(ty), }) catch unreachable, - } }, &.{ty}, &.{.{ .air_ref = un_op }}); + } }, &.{ty}, &.{.{ .air_ref = un_op }}, .{}); } }, else => {}, @@ -14850,7 +14852,7 @@ fn airUnaryMath(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { }, floatLibcAbiSuffix(ty), }) catch unreachable, - } }, &.{ty}, &.{.{ .air_ref = un_op }}); + } }, &.{ty}, &.{.{ .air_ref = un_op }}, .{}); return self.finishAir(inst, result, .{ un_op, .none, .none }); } @@ -16689,7 +16691,7 @@ fn genMulDivBinOp( lhs_mcv.address(), rhs_mcv.address(), .{ .immediate = src_info.bits }, - }); + }, .{}); return dst_mcv; }, }, @@ -16913,7 +16915,7 @@ fn genBinOp( .return_type = lhs_ty.toIntern(), .param_types = &.{ lhs_ty.toIntern(), rhs_ty.toIntern() }, .callee = callee, - } }, &.{ lhs_ty, rhs_ty }, &.{ .{ .air_ref = lhs_air }, .{ .air_ref = rhs_air } }); + } }, &.{ lhs_ty, rhs_ty }, &.{ .{ .air_ref = lhs_air }, .{ .air_ref = rhs_air } }, .{}); return switch (air_tag) { .mod => result: { const adjusted: MCValue = if (type_needs_libcall) adjusted: { @@ -16927,7 +16929,7 @@ fn genBinOp( .callee = std.fmt.bufPrint(&add_callee_buf, "__add{c}f3", .{ floatCompilerRtAbiName(float_bits), }) catch unreachable, - } }, &.{ lhs_ty, rhs_ty }, &.{ result, .{ .air_ref = rhs_air } }); + } }, &.{ lhs_ty, rhs_ty }, &.{ result, .{ .air_ref = rhs_air } }, .{}); } else switch (float_bits) { 16, 32, 64 => adjusted: { const dst_reg = switch (result) { @@ -17026,7 +17028,7 @@ fn genBinOp( .return_type = lhs_ty.toIntern(), .param_types = &.{ lhs_ty.toIntern(), rhs_ty.toIntern() }, .callee = callee, - } }, &.{ lhs_ty, rhs_ty }, &.{ adjusted, .{ .air_ref = rhs_air } }); + } }, &.{ lhs_ty, rhs_ty }, &.{ adjusted, .{ .air_ref = rhs_air } }, .{}); }, .div_trunc, .div_floor => try self.genRoundLibcall(lhs_ty, result, .{ .mode = switch (air_tag) { @@ -19305,7 +19307,7 @@ fn airFrameAddress(self: *CodeGen, inst: Air.Inst.Index) !void { return self.finishAir(inst, dst_mcv, .{ .none, .none, .none }); } -fn airCall(self: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModifier) !void { +fn airCall(self: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModifier, opts: CopyOptions) !void { if (modifier == .always_tail) return self.fail("TODO implement tail calls for x86_64", .{}); const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; @@ -19329,7 +19331,7 @@ fn airCall(self: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModif defer allocator.free(arg_vals); for (arg_vals, arg_refs) |*arg_val, arg_ref| arg_val.* = .{ .air_ref = arg_ref }; - const ret = try self.genCall(.{ .air = pl_op.operand }, arg_tys, arg_vals); + const ret = try self.genCall(.{ .air = pl_op.operand }, arg_tys, arg_vals, opts); var bt = self.liveness.iterateBigTomb(inst); try self.feed(&bt, pl_op.operand); @@ -19347,7 +19349,7 @@ fn genCall(self: *CodeGen, info: union(enum) { lib: ?[]const u8 = null, callee: []const u8, }, -}, arg_types: []const Type, args: []const MCValue) !MCValue { +}, arg_types: []const Type, args: []const MCValue, opts: CopyOptions) !MCValue { const pt = self.pt; const zcu = pt.zcu; const ip = &zcu.intern_pool; @@ -19431,12 +19433,12 @@ fn genCall(self: *CodeGen, info: union(enum) { }, .indirect => |reg_off| { frame_index.* = try self.allocFrameIndex(.initType(arg_ty, zcu)); - try self.genSetMem(.{ .frame = frame_index.* }, 0, arg_ty, src_arg, .{}); + try self.genSetMem(.{ .frame = frame_index.* }, 0, arg_ty, src_arg, opts); try self.register_manager.getReg(reg_off.reg, null); try reg_locks.append(self.register_manager.lockReg(reg_off.reg)); }, .load_frame => { - try self.genCopy(arg_ty, dst_arg, src_arg, .{}); + try self.genCopy(arg_ty, dst_arg, src_arg, opts); try self.freeValue(src_arg); }, .elementwise_regs_then_frame => |regs_frame_addr| { @@ -19513,22 +19515,20 @@ fn genCall(self: *CodeGen, info: union(enum) { switch (dst_arg) { .none, .load_frame => {}, .register => |dst_reg| switch (fn_info.cc) { - else => try self.genSetReg( - registerAlias(dst_reg, @intCast(arg_ty.abiSize(zcu))), - arg_ty, - src_arg, - .{}, - ), + else => try self.genSetReg(registerAlias( + dst_reg, + @intCast(arg_ty.abiSize(zcu)), + ), arg_ty, src_arg, opts), .x86_64_sysv, .x86_64_win => { const promoted_ty = self.promoteInt(arg_ty); const promoted_abi_size: u32 = @intCast(promoted_ty.abiSize(zcu)); const dst_alias = registerAlias(dst_reg, promoted_abi_size); - try self.genSetReg(dst_alias, promoted_ty, src_arg, .{}); + try self.genSetReg(dst_alias, promoted_ty, src_arg, opts); if (promoted_ty.toIntern() != arg_ty.toIntern()) try self.truncateRegister(arg_ty, dst_alias); }, }, - .register_pair => try self.genCopy(arg_ty, dst_arg, src_arg, .{}), + .register_pair => try self.genCopy(arg_ty, dst_arg, src_arg, opts), .indirect => |reg_off| try self.genSetReg(reg_off.reg, .usize, .{ .lea_frame = .{ .index = frame_index, .off = -reg_off.off }, }, .{}), @@ -19756,7 +19756,7 @@ fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !v }, floatCompilerRtAbiName(float_bits), }) catch unreachable, - } }, &.{ ty, ty }, &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs } }); + } }, &.{ ty, ty }, &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs } }, .{}); try self.genBinOpMir(.{ ._, .@"test" }, .i32, ret, ret); break :result switch (op) { .eq => .e, @@ -22305,6 +22305,7 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C }, opts), inline .register_pair, .register_triple, .register_quadruple => |dst_regs, dst_tag| { const src_info: ?struct { addr_reg: Register, addr_lock: RegisterLock } = src_info: switch (src_mcv) { + .undef, .memory, .indirect, .load_frame => null, .register => |src_reg| switch (dst_regs[0].class()) { .general_purpose => switch (src_reg.class()) { else => unreachable, @@ -22344,7 +22345,6 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C } return; }, - .memory, .indirect, .load_frame => null, .load_symbol, .load_direct, .load_got, .load_tlv => { const src_addr_reg = (try self.register_manager.allocReg(null, abi.RegisterClass.gp)).to64(); @@ -22375,6 +22375,7 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C if (is_hazard) hazard_count += 1; if (is_hazard != emit_hazard) continue; try self.genSetReg(dst_reg, dst_ty, switch (src_mcv) { + .undef => if (opts.safety and part_i > 0) .{ .register = dst_regs[0] } else .undef, dst_tag => |src_regs| .{ .register = src_regs[part_i] }, .memory, .indirect, .load_frame => src_mcv.address().offset(part_disp).deref(), .load_symbol, .load_direct, .load_got, .load_tlv => .{ .indirect = .{ @@ -23410,7 +23411,7 @@ fn airFloatFromInt(self: *CodeGen, inst: Air.Inst.Index) !void { intCompilerRtAbiName(src_bits), floatCompilerRtAbiName(dst_bits), }) catch unreachable, - } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }}); + } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }}, .{}); } const src_mcv = try self.resolveInst(ty_op.operand); @@ -23490,7 +23491,7 @@ fn airIntFromFloat(self: *CodeGen, inst: Air.Inst.Index) !void { floatCompilerRtAbiName(src_bits), intCompilerRtAbiName(dst_bits), }) catch unreachable, - } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }}); + } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }}, .{}); } const src_mcv = try self.resolveInst(ty_op.operand); @@ -26230,7 +26231,7 @@ fn airMulAdd(self: *CodeGen, inst: Air.Inst.Index) !void { }) catch unreachable, } }, &.{ ty, ty, ty }, &.{ .{ .air_ref = extra.lhs }, .{ .air_ref = extra.rhs }, .{ .air_ref = pl_op.operand }, - }); + }, .{}); } var mcvs: [3]MCValue = undefined; From 0d9079f46654e39763bbd66090c85cdc466cc18c Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Fri, 3 Jan 2025 19:49:25 -0500 Subject: [PATCH 18/25] x86_64: implement element access --- src/arch/x86_64/CodeGen.zig | 194 +++++++++++++++++++++++++-------- test/behavior/x86_64.zig | 1 + test/behavior/x86_64/build.zig | 1 + test/behavior/x86_64/mem.zig | 31 ++++++ 4 files changed, 179 insertions(+), 48 deletions(-) create mode 100644 test/behavior/x86_64/mem.zig diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 2f37d5eee5..422bb0418f 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2492,8 +2492,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .atomic_store_seq_cst => try cg.airAtomicStore(inst, .seq_cst), .array_elem_val => try cg.airArrayElemVal(inst), - .slice_elem_val => try cg.airSliceElemVal(inst), - .ptr_elem_val => try cg.airPtrElemVal(inst), .optional_payload => try cg.airOptionalPayload(inst), .unwrap_errunion_err => try cg.airUnwrapErrUnionErr(inst), @@ -3995,7 +3993,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { var slot = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .load_frame = .{ .index = .ret_addr, } }); - while (try slot.toAnyReg(cg)) {} + while (try slot.toRegClass(true, .general_purpose, cg)) {} try slot.moveTo(inst, cg); }, .frame_addr => if (use_old) try cg.airFrameAddress(inst) else { @@ -9445,7 +9443,111 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { try ops[0].toOffset(0, cg); try ops[0].moveTo(inst, cg); }, - .slice_elem_ptr, .ptr_elem_ptr => |tag| if (use_old) switch (tag) { + .slice_elem_val, .ptr_elem_val => |air_tag| if (use_old) switch (air_tag) { + else => unreachable, + .slice_elem_val => try cg.airSliceElemVal(inst), + .ptr_elem_val => try cg.airPtrElemVal(inst), + } else { + const bin_op = air_datas[@intFromEnum(inst)].bin_op; + var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); + switch (air_tag) { + else => unreachable, + .slice_elem_val => try ops[0].toLimb(0, cg), + .ptr_elem_val => {}, + } + var res: [1]Temp = undefined; + const res_ty = cg.typeOfIndex(inst); + cg.select(&res, &.{res_ty}, &ops, comptime &.{ .{ + .dst_constraints = .{.{ .int = .byte }}, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32 } }, + .{ .src = .{ .to_gpr, .to_gpr } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .leai(.byte, .src0, .src1), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .int = .word }}, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32 } }, + .{ .src = .{ .to_gpr, .to_gpr } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .leasi(.word, .src0, .@"2", .src1), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .int = .dword }}, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32 } }, + .{ .src = .{ .to_gpr, .to_gpr } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0d, .leasi(.dword, .src0, .@"4", .src1), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .dst_constraints = .{.{ .int = .qword }}, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32 } }, + .{ .src = .{ .to_gpr, .to_gpr } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0q, .leasi(.qword, .src0, .@"8", .src1), ._, ._ }, + } }, + } }) catch |err| switch (err) { + error.SelectFailed => switch (res_ty.abiSize(zcu)) { + 0 => res[0] = try cg.tempFromValue(res_ty, .none), + else => |elem_size| { + while (true) for (&ops) |*op| { + if (try op.toRegClass(true, .general_purpose, cg)) break; + } else break; + const lhs_reg = ops[0].unwrap(cg).temp.tracking(cg).short.register.to64(); + const rhs_reg = ops[1].unwrap(cg).temp.tracking(cg).short.register.to64(); + if (!std.math.isPowerOfTwo(elem_size)) { + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterRegisterImmediate( + .{ .i_, .mul }, + rhs_reg, + rhs_reg, + .u(elem_size), + ); + try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ + .base = .{ .reg = lhs_reg }, + .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, + }); + } else if (elem_size > 8) { + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterImmediate( + .{ ._l, .sh }, + rhs_reg, + .u(std.math.log2_int(u64, elem_size)), + ); + try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ + .base = .{ .reg = lhs_reg }, + .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, + }); + } else try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ + .base = .{ .reg = lhs_reg }, + .mod = .{ .rm = .{ + .size = .qword, + .index = rhs_reg, + .scale = .fromFactor(@intCast(elem_size)), + } }, + }); + res[0] = try ops[0].load(res_ty, cg); + }, + }, + else => |e| return e, + }; + if (ops[0].index != res[0].index) try ops[0].die(cg); + if (ops[1].index != res[0].index) try ops[1].die(cg); + try res[0].moveTo(inst, cg); + }, + .slice_elem_ptr, .ptr_elem_ptr => |air_tag| if (use_old) switch (air_tag) { else => unreachable, .slice_elem_ptr => try cg.airSliceElemPtr(inst), .ptr_elem_ptr => try cg.airPtrElemPtr(inst), @@ -9453,7 +9555,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data; var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); - switch (tag) { + switch (air_tag) { else => unreachable, .slice_elem_ptr => try ops[0].toLimb(0, cg), .ptr_elem_ptr => {}, @@ -9463,7 +9565,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { const elem_size = dst_ty.childType(zcu).abiSize(zcu); if (elem_size == 0) break :zero_offset; while (true) for (&ops) |*op| { - if (try op.toAnyReg(cg)) break; + if (try op.toRegClass(true, .general_purpose, cg)) break; } else break; const lhs_reg = ops[0].unwrap(cg).temp.tracking(cg).short.register.to64(); const rhs_reg = ops[1].unwrap(cg).temp.tracking(cg).short.register.to64(); @@ -27718,6 +27820,7 @@ const Temp = struct { }, }; const new_temp_index = cg.next_temp_index; + try cg.register_manager.getReg(new_reg, new_temp_index.toIndex()); cg.temp_type[@intFromEnum(new_temp_index)] = ty; try cg.genSetReg(new_reg, ty, val, .{}); new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); @@ -27727,27 +27830,6 @@ const Temp = struct { return true; } - fn toAnyReg(temp: *Temp, cg: *CodeGen) !bool { - const val, const ty = switch (temp.unwrap(cg)) { - .ref => |ref| .{ temp.tracking(cg).short, cg.typeOf(ref) }, - .temp => |temp_index| val: { - const temp_tracking = temp_index.tracking(cg); - if (temp_tracking.short == .register) return false; - break :val .{ temp_tracking.short, temp_index.typeOf(cg) }; - }, - }; - const new_temp_index = cg.next_temp_index; - cg.temp_type[@intFromEnum(new_temp_index)] = ty; - const new_reg = - try cg.register_manager.allocReg(new_temp_index.toIndex(), cg.regSetForType(ty)); - try cg.genSetReg(new_reg, ty, val, .{}); - new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); - try temp.die(cg); - cg.next_temp_index = @enumFromInt(@intFromEnum(new_temp_index) + 1); - temp.* = .{ .index = new_temp_index.toIndex() }; - return true; - } - fn toRegClass(temp: *Temp, mut: bool, rc: Register.Class, cg: *CodeGen) !bool { const val = temp.tracking(cg).short; if (!mut or temp.isMut(cg)) switch (val) { @@ -27769,7 +27851,7 @@ const Temp = struct { fn toPair(first_temp: *Temp, second_temp: *Temp, cg: *CodeGen) !void { while (true) for ([_]*Temp{ first_temp, second_temp }) |part_temp| { - if (try part_temp.toAnyReg(cg)) break; + if (try part_temp.toRegClass(true, .general_purpose, cg)) break; } else break; const first_temp_tracking = first_temp.unwrap(cg).temp.tracking(cg); const second_temp_tracking = second_temp.unwrap(cg).temp.tracking(cg); @@ -27824,12 +27906,12 @@ const Temp = struct { .load_got, .load_tlv, .load_frame, - => return temp.toAnyReg(cg), + => return temp.toRegClass(true, .general_purpose, cg), .lea_symbol => |sym_off| { const off = sym_off.off; if (off == 0) return false; try temp.toOffset(-off, cg); - while (try temp.toAnyReg(cg)) {} + while (try temp.toRegClass(true, .general_purpose, cg)) {} try temp.toOffset(off, cg); return true; }, @@ -27868,24 +27950,16 @@ const Temp = struct { } fn load(ptr: *Temp, val_ty: Type, cg: *CodeGen) !Temp { - const val_abi_size: u32 = @intCast(val_ty.abiSize(cg.pt.zcu)); const val = try cg.tempAlloc(val_ty); switch (val.tracking(cg).short) { else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), .register => |val_reg| { while (try ptr.toLea(cg)) {} - switch (val_reg.class()) { - .general_purpose => try cg.asmRegisterMemory( - .{ ._, .mov }, - registerAlias(val_reg, val_abi_size), - try ptr.tracking(cg).short.deref().mem(cg, .{ .size = cg.memSize(val_ty) }), - ), - else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), - } + try cg.genSetReg(val_reg, val_ty, ptr.tracking(cg).short.deref(), .{}); }, .load_frame => |val_frame_addr| { var val_ptr = try cg.tempFromValue(.usize, .{ .lea_frame = val_frame_addr }); - var len = try cg.tempFromValue(.usize, .{ .immediate = val_abi_size }); + var len = try cg.tempFromValue(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) }); try val_ptr.memcpy(ptr, &len, cg); try val_ptr.die(cg); try len.die(cg); @@ -27908,7 +27982,7 @@ const Temp = struct { ); } else continue :val .{ .register = undefined }, .register => { - while (try ptr.toLea(cg) or try val.toAnyReg(cg)) {} + while (try ptr.toLea(cg) or try val.toRegClass(true, .general_purpose, cg)) {} const val_reg = val.tracking(cg).short.register; switch (val_reg.class()) { .general_purpose => try cg.asmMemoryRegister( @@ -28224,6 +28298,7 @@ const Select = struct { any_int, any_signed_int, any_float, + po2_any, bool_vec: Memory.Size, vec: Memory.Size, signed_int_vec: Memory.Size, @@ -28250,15 +28325,17 @@ const Select = struct { unsigned_or_exact_remainder_int: struct { of: Memory.Size, is: Memory.Size }, signed_int: Memory.Size, unsigned_int: Memory.Size, + elem_int: Memory.Size, fn accepts(constraint: Constraint, ty: Type, cg: *CodeGen) bool { const zcu = cg.pt.zcu; switch (constraint) { .any => return true, - .any_bool_vec => return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type, + .any_bool_vec => return ty.isVector(zcu) and ty.childType(zcu).toIntern() == .bool_type, .any_int => return ty.toIntern() == .bool_type or ty.isPtrAtRuntime(zcu) or ty.isAbiInt(zcu), .any_signed_int => return ty.isAbiInt(zcu) and ty.intInfo(zcu).signedness == .signed, - .any_float => return ty.scalarType(zcu).isRuntimeFloat(), + .any_float => return ty.isRuntimeFloat(), + .po2_any => return std.math.isPowerOfTwo(ty.abiSize(zcu)), .bool_vec => |size| return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type and size.bitSize(cg.target) >= ty.vectorLen(zcu), .vec => |size| return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() != .bool_type and @@ -28434,6 +28511,12 @@ const Select = struct { const int_info = ty.intInfo(zcu); return int_info.signedness == .unsigned and size.bitSize(cg.target) >= int_info.bits; }, + .elem_int => |size| { + const elem_ty = ty.childType(zcu); + if (elem_ty.toIntern() == .bool_type) return true; + if (elem_ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); + return elem_ty.isAbiInt(zcu) and size.bitSize(cg.target) >= elem_ty.intInfo(zcu).bits; + }, } } }; @@ -29107,7 +29190,14 @@ const Select = struct { const UnsignedImm = @Type(.{ .int = .{ .signedness = .unsigned, .bits = @typeInfo(SignedImm).int.bits }, }); - return op.imm + @as(i5, op.adjust.factor) * op.adjust.scale.toFactor() * @as(SignedImm, switch (op.adjust.amount) { + return switch (op.index.ref) { + else => |ref| switch (ref.deref(s).tracking(s.cg).short) { + else => unreachable, + .immediate => |imm| op.index.scale.toFactor() * @as(i32, @intCast(imm)), + .register => 0, + }, + .none => 0, + } + @as(i5, op.adjust.factor) * op.adjust.scale.toFactor() * @as(SignedImm, switch (op.adjust.amount) { .none => 0, .ptr_size => @divExact(s.cg.target.ptrBitWidth(), 8), .ptr_bit_size => s.cg.target.ptrBitWidth(), @@ -29120,7 +29210,7 @@ const Select = struct { op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu), @divExact(op.base.size.bitSize(s.cg.target), 8), )), - .src0_elem_size => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)), + .src0_elem_size => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).childType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)), .smin => @as(SignedImm, std.math.minInt(SignedImm)) >> @truncate( -%op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu), ), @@ -29130,7 +29220,7 @@ const Select = struct { .umax => @bitCast(@as(UnsignedImm, std.math.maxInt(UnsignedImm)) >> @truncate( -%op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu), )), - }); + }) + op.imm; } fn lower(op: Select.Operand, s: *Select) !CodeGen.Operand { @@ -29160,7 +29250,11 @@ const Select = struct { .mod = .{ .rm = .{ .size = op.base.size, .index = switch (op.index.ref) { - else => |ref| registerAlias(ref.deref(s).tracking(s.cg).short.register, @divExact(s.cg.target.ptrBitWidth(), 8)), + else => |ref| switch (ref.deref(s).tracking(s.cg).short) { + else => unreachable, + .immediate => .none, + .register => |index_reg| registerAlias(index_reg, @divExact(s.cg.target.ptrBitWidth(), 8)), + }, .none => .none, }, .scale = op.index.scale, @@ -29170,7 +29264,11 @@ const Select = struct { .mem => .{ .mem = try op.base.ref.deref(s).tracking(s.cg).short.mem(s.cg, .{ .size = op.base.size, .index = switch (op.index.ref) { - else => |ref| registerAlias(ref.deref(s).tracking(s.cg).short.register, @divExact(s.cg.target.ptrBitWidth(), 8)), + else => |ref| switch (ref.deref(s).tracking(s.cg).short) { + else => unreachable, + .immediate => .none, + .register => |index_reg| registerAlias(index_reg, @divExact(s.cg.target.ptrBitWidth(), 8)), + }, .none => .none, }, .scale = op.index.scale, diff --git a/test/behavior/x86_64.zig b/test/behavior/x86_64.zig index f72fa79ca5..b055c76518 100644 --- a/test/behavior/x86_64.zig +++ b/test/behavior/x86_64.zig @@ -5,4 +5,5 @@ test { if (builtin.zig_backend != .stage2_x86_64) return error.SkipZigTest; if (builtin.object_format == .coff) return error.SkipZigTest; _ = @import("x86_64/math.zig"); + _ = @import("x86_64/mem.zig"); } diff --git a/test/behavior/x86_64/build.zig b/test/behavior/x86_64/build.zig index dccda7236b..2e579f4b6e 100644 --- a/test/behavior/x86_64/build.zig +++ b/test/behavior/x86_64/build.zig @@ -88,6 +88,7 @@ pub fn build(b: *std.Build) void { const cpu = query.serializeCpuAlloc(b.allocator) catch @panic("OOM"); for ([_][]const u8{ "math.zig", + "mem.zig", }) |path| { const test_mod = b.createModule(.{ .root_source_file = b.path(path), diff --git a/test/behavior/x86_64/mem.zig b/test/behavior/x86_64/mem.zig new file mode 100644 index 0000000000..853f960c3d --- /dev/null +++ b/test/behavior/x86_64/mem.zig @@ -0,0 +1,31 @@ +fn access(comptime array: anytype) !void { + var slice: []const @typeInfo(@TypeOf(array)).array.child = undefined; + slice = &array; + inline for (0.., &array) |ct_index, *elem| { + var rt_index: usize = undefined; + rt_index = ct_index; + if (&slice.ptr[ct_index] != elem) return error.Unexpected; + if (&slice[ct_index] != elem) return error.Unexpected; + if (&slice.ptr[rt_index] != elem) return error.Unexpected; + if (&slice[rt_index] != elem) return error.Unexpected; + if (slice.ptr[ct_index] != elem.*) return error.Unexpected; + if (slice[ct_index] != elem.*) return error.Unexpected; + if (slice.ptr[rt_index] != elem.*) return error.Unexpected; + if (slice[rt_index] != elem.*) return error.Unexpected; + } +} +test access { + try access([3]u8{ 0xdb, 0xef, 0xbd }); + try access([3]u16{ 0x340e, 0x3654, 0x88d7 }); + try access([3]u32{ 0xd424c2c0, 0x2d6ac466, 0x5a0cfaba }); + try access([3]u64{ + 0x9327a4f5221666a6, + 0x5c34d3ddd84a8b12, + 0xbae087f39f649260, + }); + try access([3]u128{ + 0x601cf010065444d4d42d5536dd9b95db, + 0xa03f592fcaa22d40af23a0c735531e3c, + 0x5da44907b31602b95c2d93f0b582ceab, + }); +} From 3240adfa16cd09d6664223975776c80990cb55cc Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sun, 5 Jan 2025 01:10:52 -0500 Subject: [PATCH 19/25] x86_64: implement pointer addition and subtraction --- src/arch/x86_64/CodeGen.zig | 462 ++++++++++++++++++++++++++++++----- test/behavior/x86_64/mem.zig | 10 +- 2 files changed, 412 insertions(+), 60 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 422bb0418f..d13c34c633 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2404,8 +2404,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .max, => |air_tag| try cg.airBinOp(inst, air_tag), - .ptr_add, .ptr_sub => |air_tag| try cg.airPtrArithmetic(inst, air_tag), - .shr, .shr_exact => try cg.airShlShrBinOp(inst), .shl, .shl_exact => try cg.airShlShrBinOp(inst), @@ -2524,14 +2522,262 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { => return cg.fail("TODO implement optimized float mode", .{}), .arg => try cg.airDbgArg(inst), + .ptr_add => |air_tag| if (use_old) try cg.airPtrArithmetic(inst, air_tag) else { + const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; + const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data; + var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); + try ops[0].toSlicePtr(cg); + var res: [1]Temp = undefined; + cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, comptime &.{ .{ + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32 } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .dst0p, .leaa(.none, .src0, .add_src0_elem_size_times_src1), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .elem_size_is = 1 }}, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .src1), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .elem_size_is = 2 }}, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"2", .src1), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .elem_size_is = 2 + 1 }}, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"2", .src1), ._, ._ }, + .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .elem_size_is = 4 }}, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"4", .src1), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .elem_size_is = 4 + 1 }}, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr } }, + }, + .dst_temps = .{.{ .ref = .src1 }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"4", .src1), ._, ._ }, + .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .dst_constraints = .{.{ .elem_size_is = 8 }}, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"8", .src1), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .dst_constraints = .{.{ .elem_size_is = 8 + 1 }}, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr } }, + }, + .dst_temps = .{.{ .ref = .src1 }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"8", .src1), ._, ._ }, + .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.po2_elem_size}, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_mut_gpr } }, + }, + .dst_temps = .{.{ .ref = .src1 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._l, .sh, .src1p, .sa(.none, .add_log2_src0_elem_size), ._, ._ }, + .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .src1), ._, ._ }, + } }, + }, .{ + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, .i_, .mul, .dst0p, .src1p, .sa(.none, .add_src0_elem_size), ._ }, + .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ }, + } }, + } }) catch |err| switch (err) { + error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{ + @tagName(air_tag), + cg.typeOf(bin_op.lhs).fmt(pt), + ops[0].tracking(cg), + ops[1].tracking(cg), + }), + else => |e| return e, + }; + for (ops) |op| for (res) |r| { + if (op.index == r.index) break; + } else try op.die(cg); + try res[0].moveTo(inst, cg); + }, + .ptr_sub => |air_tag| if (use_old) try cg.airPtrArithmetic(inst, air_tag) else { + const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; + const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data; + var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); + try ops[0].toSlicePtr(cg); + var res: [1]Temp = undefined; + cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, comptime &.{ .{ + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32 } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .dst0p, .leaa(.none, .src0, .sub_src0_elem_size_times_src1), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .elem_size_is = 1 }}, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_mut_gpr } }, + }, + .dst_temps = .{.{ .ref = .src1 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .neg, .src1p, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .src1), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .elem_size_is = 2 }}, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_mut_gpr } }, + }, + .dst_temps = .{.{ .ref = .src1 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .neg, .src1p, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"2", .src1), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .elem_size_is = 2 + 1 }}, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"2", .src1), ._, ._ }, + .{ ._, ._, .neg, .dst0p, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .elem_size_is = 4 }}, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_mut_gpr } }, + }, + .dst_temps = .{.{ .ref = .src1 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .neg, .src1p, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"4", .src1), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .elem_size_is = 4 + 1 }}, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"4", .src1), ._, ._ }, + .{ ._, ._, .neg, .dst0p, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .dst_constraints = .{.{ .elem_size_is = 8 }}, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_mut_gpr } }, + }, + .dst_temps = .{.{ .ref = .src1 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .neg, .src1p, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0p, .leasi(.none, .src0, .@"8", .src1), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .dst_constraints = .{.{ .elem_size_is = 8 + 1 }}, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .dst0p, .leasi(.none, .src1, .@"8", .src1), ._, ._ }, + .{ ._, ._, .neg, .dst0p, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.po2_elem_size}, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_mut_gpr } }, + }, + .dst_temps = .{.{ .ref = .src1 }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._l, .sa, .src1p, .sa(.none, .add_log2_src0_elem_size), ._, ._ }, + .{ ._, ._, .neg, .src1p, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .src1), ._, ._ }, + } }, + }, .{ + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, .i_, .mul, .dst0p, .src1p, .sa(.none, .sub_src0_elem_size), ._ }, + .{ ._, ._, .lea, .dst0p, .leai(.none, .src0, .dst0), ._, ._ }, + } }, + } }) catch |err| switch (err) { + error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{ + @tagName(air_tag), + cg.typeOf(bin_op.lhs).fmt(pt), + ops[0].tracking(cg), + ops[1].tracking(cg), + }), + else => |e| return e, + }; + for (ops) |op| for (res) |r| { + if (op.index == r.index) break; + } else try op.die(cg); + try res[0].moveTo(inst, cg); + }, .alloc => if (use_old) try cg.airAlloc(inst) else { var slot = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .lea_frame = .{ .index = try cg.allocMemPtr(inst), } }); try slot.moveTo(inst, cg); }, - .inferred_alloc => unreachable, - .inferred_alloc_comptime => unreachable, + .inferred_alloc, .inferred_alloc_comptime => unreachable, .ret_ptr => if (use_old) try cg.airRetPtr(inst) else { var slot = switch (cg.ret_mcv.long) { else => unreachable, @@ -2901,8 +3147,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }), else => |e| return e, }; - if (ops[0].index != res[0].index) try ops[0].die(cg); - if (ops[1].index != res[0].index) try ops[1].die(cg); + for (ops) |op| for (res) |r| { + if (op.index == r.index) break; + } else try op.die(cg); try res[0].moveTo(inst, cg); }, .not => |air_tag| if (use_old) try cg.airUnOp(inst, air_tag) else { @@ -3953,7 +4200,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }), else => |e| return e, }; - if (ops[0].index != res[0].index) try ops[0].die(cg); + for (ops) |op| for (res) |r| { + if (op.index == r.index) break; + } else try op.die(cg); try res[0].moveTo(inst, cg); }, @@ -5016,6 +5265,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, @@ -5042,6 +5292,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, .{ .@"0:", ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, @@ -5067,6 +5318,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, @@ -5094,6 +5346,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, .{ .@"0:", ._, .mov, .dst0d, .si(-1), ._, ._ }, @@ -5120,6 +5373,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, @@ -5146,6 +5400,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, .{ .@"0:", ._, .lzcnt, .dst0q, .memi(.src0q, .tmp0), ._, ._ }, @@ -5171,6 +5426,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, .{ .@"0:", ._, .xor, .dst0d, .dst0d, ._, ._ }, @@ -5198,6 +5454,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, .{ .@"0:", ._, .mov, .dst0d, .si(-1), ._, ._ }, @@ -5224,6 +5481,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, @@ -5253,6 +5511,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, @@ -5281,6 +5540,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_size), ._, ._ }, .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ }, @@ -5309,6 +5569,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, @@ -5338,6 +5599,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, @@ -5366,6 +5628,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, .{ ._, ._, .mov, .dst0q, .ua(.src0, .add_umax), ._, ._ }, @@ -5394,6 +5657,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, @@ -5419,6 +5683,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, @@ -5444,6 +5709,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, @@ -5469,6 +5735,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, @@ -5494,6 +5761,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, @@ -5519,6 +5787,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, @@ -5544,6 +5813,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, @@ -5569,6 +5839,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, @@ -5594,6 +5865,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, @@ -5622,6 +5894,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, @@ -5650,6 +5923,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, @@ -5678,6 +5952,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, @@ -5706,6 +5981,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, @@ -5732,6 +6008,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0b, .tmp0, .add_len), ._, ._ }, @@ -5759,6 +6036,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, @@ -5787,6 +6065,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, @@ -5815,6 +6094,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, @@ -5843,6 +6123,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, @@ -5871,6 +6152,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, @@ -5897,6 +6179,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .movzx, .tmp1d, .memsia(.src0w, .@"2", .tmp0, .add_2_len), ._, ._ }, @@ -5924,6 +6207,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, @@ -5952,6 +6236,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, @@ -5980,6 +6265,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, @@ -6008,6 +6294,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, @@ -6036,6 +6323,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, @@ -6062,6 +6350,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .mov, .tmp1d, .memsia(.src0d, .@"4", .tmp0, .add_4_len), ._, ._ }, @@ -6089,6 +6378,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, @@ -6117,6 +6407,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ ._, ._, .mov, .tmp1d, .si(0xff), ._, ._ }, @@ -6145,6 +6436,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, @@ -6173,6 +6465,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, @@ -6201,6 +6494,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, @@ -6228,6 +6522,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ .@"0:", ._, .mov, .tmp1q, .ua(.src0, .add_umax), ._, ._ }, @@ -6256,6 +6551,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, @@ -6292,6 +6588,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, @@ -6327,6 +6624,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, @@ -6362,6 +6660,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, @@ -6398,6 +6697,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, @@ -6433,6 +6733,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, @@ -6468,6 +6769,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, @@ -6504,6 +6806,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, @@ -6539,6 +6842,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, @@ -6574,6 +6878,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, @@ -6610,6 +6915,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, @@ -6645,6 +6951,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_len), ._, ._ }, .{ ._, ._, .lea, .tmp1q, .mem(.src0), ._, ._ }, @@ -6672,7 +6979,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }), else => |e| return e, }; - if (ops[0].index != res[0].index) try ops[0].die(cg); + for (ops) |op| for (res) |r| { + if (op.index == r.index) break; + } else try op.die(cg); try res[0].moveTo(inst, cg); }, @@ -8480,8 +8789,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .gte => unreachable, .gt => unreachable, } - if (ops[0].index != res[0].index) try ops[0].die(cg); - if (ops[1].index != res[0].index) try ops[1].die(cg); + for (ops) |op| for (res) |r| { + if (op.index == r.index) break; + } else try op.die(cg); try res[0].moveTo(inst, cg); }, @@ -8678,8 +8988,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }), else => |e| return e, }; - if (ops[0].index != res[0].index) try ops[0].die(cg); - if (ops[1].index != res[0].index) try ops[1].die(cg); + for (ops) |op| for (res) |r| { + if (op.index == r.index) break; + } else try op.die(cg); try res[0].moveTo(inst, cg); }, .cmp_eq, @@ -9155,8 +9466,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }), else => |e| return e, }; - if (ops[0].index != res[0].index) try ops[0].die(cg); - if (ops[1].index != res[0].index) try ops[1].die(cg); + for (ops) |op| for (res) |r| { + if (op.index == r.index) break; + } else try op.die(cg); try res[0].moveTo(inst, cg); }, @@ -9294,7 +9606,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .int_from_ptr => if (use_old) try cg.airIntFromPtr(inst) else { const un_op = air_datas[@intFromEnum(inst)].un_op; var ops = try cg.tempsFromOperands(inst, .{un_op}); - try ops[0].toLimb(0, cg); + try ops[0].toSlicePtr(cg); try ops[0].moveTo(inst, cg); }, .int_from_bool => if (use_old) try cg.airIntFromBool(inst) else { @@ -9422,13 +9734,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .slice_len => if (use_old) try cg.airSliceLen(inst) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].toLimb(1, cg); + try ops[0].toSliceLen(cg); try ops[0].moveTo(inst, cg); }, .slice_ptr => if (use_old) try cg.airSlicePtr(inst) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].toLimb(0, cg); + try ops[0].toSlicePtr(cg); try ops[0].moveTo(inst, cg); }, .ptr_slice_len_ptr => if (use_old) try cg.airPtrSliceLenPtr(inst) else { @@ -9450,17 +9762,21 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } else { const bin_op = air_datas[@intFromEnum(inst)].bin_op; var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); - switch (air_tag) { - else => unreachable, - .slice_elem_val => try ops[0].toLimb(0, cg), - .ptr_elem_val => {}, - } + try ops[0].toSlicePtr(cg); var res: [1]Temp = undefined; const res_ty = cg.typeOfIndex(inst); cg.select(&res, &.{res_ty}, &ops, comptime &.{ .{ .dst_constraints = .{.{ .int = .byte }}, .patterns = &.{ .{ .src = .{ .to_gpr, .simm32 } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .leaa(.byte, .src0, .add_src0_elem_size_times_src1), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .int = .byte }}, + .patterns = &.{ .{ .src = .{ .to_gpr, .to_gpr } }, }, .dst_temps = .{.{ .rc = .general_purpose }}, @@ -9471,6 +9787,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_constraints = .{.{ .int = .word }}, .patterns = &.{ .{ .src = .{ .to_gpr, .simm32 } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .leaa(.word, .src0, .add_src0_elem_size_times_src1), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .int = .word }}, + .patterns = &.{ .{ .src = .{ .to_gpr, .to_gpr } }, }, .dst_temps = .{.{ .rc = .general_purpose }}, @@ -9481,6 +9805,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_constraints = .{.{ .int = .dword }}, .patterns = &.{ .{ .src = .{ .to_gpr, .simm32 } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0d, .leaa(.dword, .src0, .add_src0_elem_size_times_src1), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{.{ .int = .dword }}, + .patterns = &.{ .{ .src = .{ .to_gpr, .to_gpr } }, }, .dst_temps = .{.{ .rc = .general_purpose }}, @@ -9488,10 +9820,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .dst0d, .leasi(.dword, .src0, .@"4", .src1), ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null, null, null }, .dst_constraints = .{.{ .int = .qword }}, .patterns = &.{ .{ .src = .{ .to_gpr, .simm32 } }, + }, + .dst_temps = .{.{ .rc = .general_purpose }}, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0q, .leaa(.qword, .src0, .add_src0_elem_size_times_src1), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .dst_constraints = .{.{ .int = .qword }}, + .patterns = &.{ .{ .src = .{ .to_gpr, .to_gpr } }, }, .dst_temps = .{.{ .rc = .general_purpose }}, @@ -9543,8 +9883,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, else => |e| return e, }; - if (ops[0].index != res[0].index) try ops[0].die(cg); - if (ops[1].index != res[0].index) try ops[1].die(cg); + for (ops) |op| for (res) |r| { + if (op.index == r.index) break; + } else try op.die(cg); try res[0].moveTo(inst, cg); }, .slice_elem_ptr, .ptr_elem_ptr => |air_tag| if (use_old) switch (air_tag) { @@ -9555,11 +9896,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data; var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); - switch (air_tag) { - else => unreachable, - .slice_elem_ptr => try ops[0].toLimb(0, cg), - .ptr_elem_ptr => {}, - } + try ops[0].toSlicePtr(cg); const dst_ty = cg.typeOfIndex(inst); if (dst_ty.ptrInfo(zcu).flags.vector_index == .none) zero_offset: { const elem_size = dst_ty.childType(zcu).abiSize(zcu); @@ -27687,9 +28024,9 @@ const Temp = struct { temp.* = new_temp; } - fn getLimb(temp: Temp, limb_index: u28, cg: *CodeGen) !Temp { + fn getLimb(temp: Temp, limb_ty: Type, limb_index: u28, cg: *CodeGen) !Temp { const new_temp_index = cg.next_temp_index; - cg.temp_type[@intFromEnum(new_temp_index)] = .usize; + cg.temp_type[@intFromEnum(new_temp_index)] = limb_ty; switch (temp.tracking(cg).short) { else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), .immediate => |imm| { @@ -27759,7 +28096,7 @@ const Temp = struct { return .{ .index = new_temp_index.toIndex() }; } - fn toLimb(temp: *Temp, limb_index: u28, cg: *CodeGen) !void { + fn toLimb(temp: *Temp, limb_ty: Type, limb_index: u28, cg: *CodeGen) !void { switch (temp.unwrap(cg)) { .ref => {}, .temp => |temp_index| { @@ -27768,7 +28105,7 @@ const Temp = struct { else => {}, .register, .lea_symbol, .lea_frame => { assert(limb_index == 0); - cg.temp_type[@intFromEnum(temp_index)] = .usize; + cg.temp_type[@intFromEnum(temp_index)] = limb_ty; return; }, .register_pair => |regs| { @@ -27780,7 +28117,7 @@ const Temp = struct { for (regs, 0..) |reg, reg_index| if (reg_index != limb_index) cg.register_manager.freeReg(reg); temp_tracking.* = .init(.{ .register = regs[limb_index] }); - cg.temp_type[@intFromEnum(temp_index)] = .usize; + cg.temp_type[@intFromEnum(temp_index)] = limb_ty; return; }, .load_symbol => |sym_off| { @@ -27789,7 +28126,7 @@ const Temp = struct { .sym_index = sym_off.sym_index, .off = sym_off.off + @as(u31, limb_index) * 8, } }); - cg.temp_type[@intFromEnum(temp_index)] = .usize; + cg.temp_type[@intFromEnum(temp_index)] = limb_ty; return; }, .load_frame => |frame_addr| if (!frame_addr.index.isNamed()) { @@ -27798,17 +28135,26 @@ const Temp = struct { .index = frame_addr.index, .off = frame_addr.off + @as(u31, limb_index) * 8, } }); - cg.temp_type[@intFromEnum(temp_index)] = .usize; + cg.temp_type[@intFromEnum(temp_index)] = limb_ty; return; }, } }, } - const new_temp = try temp.getLimb(limb_index, cg); + const new_temp = try temp.getLimb(limb_ty, limb_index, cg); try temp.die(cg); temp.* = new_temp; } + fn toSlicePtr(temp: *Temp, cg: *CodeGen) !void { + const temp_ty = temp.typeOf(cg); + if (temp_ty.isSlice(cg.pt.zcu)) try temp.toLimb(temp_ty.slicePtrFieldType(cg.pt.zcu), 0, cg); + } + + fn toSliceLen(temp: *Temp, cg: *CodeGen) !void { + try temp.toLimb(.usize, 1, cg); + } + fn toReg(temp: *Temp, new_reg: Register, cg: *CodeGen) !bool { const val, const ty = val_ty: switch (temp.unwrap(cg)) { .ref => |ref| .{ temp.tracking(cg).short, cg.typeOf(ref) }, @@ -28325,6 +28671,8 @@ const Select = struct { unsigned_or_exact_remainder_int: struct { of: Memory.Size, is: Memory.Size }, signed_int: Memory.Size, unsigned_int: Memory.Size, + elem_size_is: u8, + po2_elem_size, elem_int: Memory.Size, fn accepts(constraint: Constraint, ty: Type, cg: *CodeGen) bool { @@ -28511,8 +28859,10 @@ const Select = struct { const int_info = ty.intInfo(zcu); return int_info.signedness == .unsigned and size.bitSize(cg.target) >= int_info.bits; }, + .elem_size_is => |size| return size == ty.elemType2(zcu).abiSize(zcu), + .po2_elem_size => return std.math.isPowerOfTwo(ty.elemType2(zcu).abiSize(zcu)), .elem_int => |size| { - const elem_ty = ty.childType(zcu); + const elem_ty = ty.elemType2(zcu); if (elem_ty.toIntern() == .bool_type) return true; if (elem_ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); return elem_ty.isAbiInt(zcu) and size.bitSize(cg.target) >= elem_ty.intInfo(zcu).bits; @@ -28794,6 +29144,8 @@ const Select = struct { len, elem_limbs, src0_elem_size, + src0_elem_size_times_src1, + log2_src0_elem_size, smin, smax, umax, @@ -28818,7 +29170,13 @@ const Select = struct { const add_len: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .len }; const sub_len: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .len }; const add_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_elem_size }; + const add_2_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"2", .amount = .src0_elem_size }; + const add_4_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"4", .amount = .src0_elem_size }; + const add_8_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"8", .amount = .src0_elem_size }; const sub_src0_elem_size: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_elem_size }; + const add_src0_elem_size_times_src1: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .src0_elem_size_times_src1 }; + const sub_src0_elem_size_times_src1: Adjust = .{ .factor = -1, .scale = .@"1", .amount = .src0_elem_size_times_src1 }; + const add_log2_src0_elem_size: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .log2_src0_elem_size }; const add_elem_limbs: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .elem_limbs }; const add_umax: Adjust = .{ .factor = 1, .scale = .@"1", .amount = .umax }; }; @@ -29190,14 +29548,7 @@ const Select = struct { const UnsignedImm = @Type(.{ .int = .{ .signedness = .unsigned, .bits = @typeInfo(SignedImm).int.bits }, }); - return switch (op.index.ref) { - else => |ref| switch (ref.deref(s).tracking(s.cg).short) { - else => unreachable, - .immediate => |imm| op.index.scale.toFactor() * @as(i32, @intCast(imm)), - .register => 0, - }, - .none => 0, - } + @as(i5, op.adjust.factor) * op.adjust.scale.toFactor() * @as(SignedImm, switch (op.adjust.amount) { + return @as(i5, op.adjust.factor) * op.adjust.scale.toFactor() * @as(SignedImm, switch (op.adjust.amount) { .none => 0, .ptr_size => @divExact(s.cg.target.ptrBitWidth(), 8), .ptr_bit_size => s.cg.target.ptrBitWidth(), @@ -29210,7 +29561,10 @@ const Select = struct { op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu), @divExact(op.base.size.bitSize(s.cg.target), 8), )), - .src0_elem_size => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).childType(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)), + .src0_elem_size => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)), + .src0_elem_size_times_src1 => @intCast(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu) * + Select.Operand.Ref.src1.deref(s).tracking(s.cg).short.immediate), + .log2_src0_elem_size => @intCast(std.math.log2(Select.Operand.Ref.src0.deref(s).typeOf(s.cg).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu))), .smin => @as(SignedImm, std.math.minInt(SignedImm)) >> @truncate( -%op.base.ref.deref(s).typeOf(s.cg).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu), ), @@ -29250,11 +29604,7 @@ const Select = struct { .mod = .{ .rm = .{ .size = op.base.size, .index = switch (op.index.ref) { - else => |ref| switch (ref.deref(s).tracking(s.cg).short) { - else => unreachable, - .immediate => .none, - .register => |index_reg| registerAlias(index_reg, @divExact(s.cg.target.ptrBitWidth(), 8)), - }, + else => |ref| registerAlias(ref.deref(s).tracking(s.cg).short.register, @divExact(s.cg.target.ptrBitWidth(), 8)), .none => .none, }, .scale = op.index.scale, @@ -29264,11 +29614,7 @@ const Select = struct { .mem => .{ .mem = try op.base.ref.deref(s).tracking(s.cg).short.mem(s.cg, .{ .size = op.base.size, .index = switch (op.index.ref) { - else => |ref| switch (ref.deref(s).tracking(s.cg).short) { - else => unreachable, - .immediate => .none, - .register => |index_reg| registerAlias(index_reg, @divExact(s.cg.target.ptrBitWidth(), 8)), - }, + else => |ref| registerAlias(ref.deref(s).tracking(s.cg).short.register, @divExact(s.cg.target.ptrBitWidth(), 8)), .none => .none, }, .scale = op.index.scale, diff --git a/test/behavior/x86_64/mem.zig b/test/behavior/x86_64/mem.zig index 853f960c3d..768273f48a 100644 --- a/test/behavior/x86_64/mem.zig +++ b/test/behavior/x86_64/mem.zig @@ -4,13 +4,19 @@ fn access(comptime array: anytype) !void { inline for (0.., &array) |ct_index, *elem| { var rt_index: usize = undefined; rt_index = ct_index; + if (&(slice.ptr + ct_index)[0] != elem) return error.Unexpected; + if (&(slice.ptr + rt_index)[0] != elem) return error.Unexpected; + if (&slice.ptr[ct_index..][0] != elem) return error.Unexpected; + if (&slice.ptr[rt_index..][0] != elem) return error.Unexpected; if (&slice.ptr[ct_index] != elem) return error.Unexpected; - if (&slice[ct_index] != elem) return error.Unexpected; if (&slice.ptr[rt_index] != elem) return error.Unexpected; + if (&slice[ct_index..].ptr[0] != elem) return error.Unexpected; + if (&slice[rt_index..].ptr[0] != elem) return error.Unexpected; + if (&slice[ct_index] != elem) return error.Unexpected; if (&slice[rt_index] != elem) return error.Unexpected; if (slice.ptr[ct_index] != elem.*) return error.Unexpected; - if (slice[ct_index] != elem.*) return error.Unexpected; if (slice.ptr[rt_index] != elem.*) return error.Unexpected; + if (slice[ct_index] != elem.*) return error.Unexpected; if (slice[rt_index] != elem.*) return error.Unexpected; } } From 5069f574f43f1c9fe7ced12f9fd5f05bdedf6abb Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Wed, 8 Jan 2025 06:38:27 -0500 Subject: [PATCH 20/25] x86_64: remove pointless jump to epilogue --- src/arch/x86_64/CodeGen.zig | 66 ++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index d13c34c633..169bb0d5e3 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -73,7 +73,7 @@ end_di_column: u32, /// The value is an offset into the `Function` `code` from the beginning. /// To perform the reloc, write 32-bit signed little-endian integer /// which is a relative jump, based on the address following the reloc. -exitlude_jump_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .empty, +epilogue_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .empty, reused_operands: std.StaticBitSet(Liveness.bpi - 1) = undefined, const_tracking: ConstTrackingMap = .empty, @@ -928,7 +928,7 @@ pub fn generate( function.blocks.deinit(gpa); function.inst_tracking.deinit(gpa); function.const_tracking.deinit(gpa); - function.exitlude_jump_relocs.deinit(gpa); + function.epilogue_relocs.deinit(gpa); function.mir_instructions.deinit(gpa); function.mir_extra.deinit(gpa); function.mir_table.deinit(gpa); @@ -2193,24 +2193,24 @@ fn gen(self: *CodeGen) InnerError!void { try self.genBody(self.air.getMainBody()); - // TODO can single exitlude jump reloc be elided? What if it is not at the end of the code? - // Example: - // pub fn main() void { - // maybeErr() catch return; - // unreachable; - // } - // Eliding the reloc will cause a miscompilation in this case. - for (self.exitlude_jump_relocs.items) |jmp_reloc| { - self.mir_instructions.items(.data)[jmp_reloc].inst.inst = - @intCast(self.mir_instructions.len); - } + const epilogue = if (self.epilogue_relocs.items.len > 0) epilogue: { + const epilogue_relocs_last_index = self.epilogue_relocs.items.len - 1; + for (if (self.epilogue_relocs.items[epilogue_relocs_last_index] == self.mir_instructions.len - 1) epilogue_relocs: { + _ = self.mir_instructions.pop(); + break :epilogue_relocs self.epilogue_relocs.items[0..epilogue_relocs_last_index]; + } else self.epilogue_relocs.items) |epilogue_reloc| self.performReloc(epilogue_reloc); - try self.asmPseudo(.pseudo_dbg_epilogue_begin_none); - const backpatch_stack_dealloc = try self.asmPlaceholder(); - const backpatch_pop_callee_preserved_regs = try self.asmPlaceholder(); - try self.asmRegister(.{ ._, .pop }, .rbp); - try self.asmPseudoRegisterImmediate(.pseudo_cfi_def_cfa_ri_s, .rsp, .s(8)); - try self.asmOpOnly(.{ ._, .ret }); + try self.asmPseudo(.pseudo_dbg_epilogue_begin_none); + const backpatch_stack_dealloc = try self.asmPlaceholder(); + const backpatch_pop_callee_preserved_regs = try self.asmPlaceholder(); + try self.asmRegister(.{ ._, .pop }, .rbp); + try self.asmPseudoRegisterImmediate(.pseudo_cfi_def_cfa_ri_s, .rsp, .s(8)); + try self.asmOpOnly(.{ ._, .ret }); + break :epilogue .{ + .backpatch_stack_dealloc = backpatch_stack_dealloc, + .backpatch_pop_callee_preserved_regs = backpatch_pop_callee_preserved_regs, + }; + } else null; const frame_layout = try self.computeFrameLayout(fn_info.cc); const need_frame_align = frame_layout.stack_mask != std.math.maxInt(u32); @@ -2280,8 +2280,8 @@ fn gen(self: *CodeGen) InnerError!void { }); } } - if (need_frame_align or need_stack_adjust) { - self.mir_instructions.set(backpatch_stack_dealloc, switch (-frame_layout.save_reg_list.size(self.target)) { + if (epilogue) |e| if (need_frame_align or need_stack_adjust) { + self.mir_instructions.set(e.backpatch_stack_dealloc, switch (-frame_layout.save_reg_list.size(self.target)) { 0 => .{ .tag = .mov, .ops = .rr, @@ -2305,14 +2305,14 @@ fn gen(self: *CodeGen) InnerError!void { } }, }, }); - } + }; if (need_save_reg) { self.mir_instructions.set(backpatch_push_callee_preserved_regs, .{ .tag = .pseudo, .ops = .pseudo_push_reg_list, .data = .{ .reg_list = frame_layout.save_reg_list }, }); - self.mir_instructions.set(backpatch_pop_callee_preserved_regs, .{ + if (epilogue) |e| self.mir_instructions.set(e.backpatch_pop_callee_preserved_regs, .{ .tag = .pseudo, .ops = .pseudo_pop_reg_list, .data = .{ .reg_list = frame_layout.save_reg_list }, @@ -10007,8 +10007,8 @@ fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void { const ret_reg = param_regs[0]; const enum_mcv = MCValue{ .register = param_regs[1] }; - const exitlude_jump_relocs = try self.gpa.alloc(Mir.Inst.Index, enum_ty.enumFieldCount(zcu)); - defer self.gpa.free(exitlude_jump_relocs); + const epilogue_relocs = try self.gpa.alloc(Mir.Inst.Index, enum_ty.enumFieldCount(zcu)); + defer self.gpa.free(epilogue_relocs); const data_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); const data_lock = self.register_manager.lockRegAssumeUnused(data_reg); @@ -10017,7 +10017,7 @@ fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void { var data_off: i32 = 0; const tag_names = enum_ty.enumFields(zcu); - for (exitlude_jump_relocs, 0..) |*exitlude_jump_reloc, tag_index| { + for (epilogue_relocs, 0..) |*epilogue_reloc, tag_index| { const tag_name_len = tag_names.get(ip)[tag_index].length(ip); const tag_val = try pt.enumValueFieldIndex(enum_ty, @intCast(tag_index)); const tag_mcv = try self.genTypedValue(tag_val); @@ -10033,7 +10033,7 @@ fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void { ); try self.genSetMem(.{ .reg = ret_reg }, 8, .usize, .{ .immediate = tag_name_len }, .{}); - exitlude_jump_reloc.* = try self.asmJmpReloc(undefined); + epilogue_reloc.* = try self.asmJmpReloc(undefined); self.performReloc(skip_reloc); data_off += @intCast(tag_name_len + 1); @@ -10041,7 +10041,7 @@ fn genLazy(self: *CodeGen, lazy_sym: link.File.LazySymbol) InnerError!void { try self.asmOpOnly(.{ ._, .ud2 }); - for (exitlude_jump_relocs) |reloc| self.performReloc(reloc); + for (epilogue_relocs) |reloc| self.performReloc(reloc); try self.asmOpOnly(.{ ._, .ret }); }, else => return self.fail( @@ -20114,7 +20114,7 @@ fn airRet(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void { // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction // which is available if the jump is 127 bytes or less forward. const jmp_reloc = try self.asmJmpReloc(undefined); - try self.exitlude_jump_relocs.append(self.gpa, jmp_reloc); + try self.epilogue_relocs.append(self.gpa, jmp_reloc); } fn airRetLoad(self: *CodeGen, inst: Air.Inst.Index) !void { @@ -20134,7 +20134,7 @@ fn airRetLoad(self: *CodeGen, inst: Air.Inst.Index) !void { // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction // which is available if the jump is 127 bytes or less forward. const jmp_reloc = try self.asmJmpReloc(undefined); - try self.exitlude_jump_relocs.append(self.gpa, jmp_reloc); + try self.epilogue_relocs.append(self.gpa, jmp_reloc); } fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !void { @@ -24486,7 +24486,7 @@ fn airMemset(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void { self.register_manager.lockRegAssumeUnused(dst_regs[0]), self.register_manager.lockRegAssumeUnused(dst_regs[1]), }, - else => .{ null, null }, + else => @splat(null), }; for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock); @@ -24625,7 +24625,7 @@ fn airMemcpy(self: *CodeGen, inst: Air.Inst.Index) !void { self.register_manager.lockRegAssumeUnused(dst_regs[0]), self.register_manager.lockReg(dst_regs[1]), }, - else => .{ null, null }, + else => @splat(null), }; for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock); @@ -24636,7 +24636,7 @@ fn airMemcpy(self: *CodeGen, inst: Air.Inst.Index) !void { self.register_manager.lockRegAssumeUnused(src_regs[0]), self.register_manager.lockRegAssumeUnused(src_regs[1]), }, - else => .{ null, null }, + else => @splat(null), }; for (src_locks) |src_lock| if (src_lock) |lock| self.register_manager.unlockReg(lock); From 63730441d01a860a53d190e88079452947a4149d Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Wed, 8 Jan 2025 06:38:47 -0500 Subject: [PATCH 21/25] x86_64: implement union access --- lib/std/debug.zig | 1 + lib/std/debug/NoPanic.zig | 59 ++++++++ src/arch/x86_64/CodeGen.zig | 295 +++++++++++++++++++++++++++--------- test/behavior/union.zig | 2 +- 4 files changed, 286 insertions(+), 71 deletions(-) create mode 100644 lib/std/debug/NoPanic.zig diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 02eb60d6a7..cb294bf660 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -23,6 +23,7 @@ pub const Coverage = @import("debug/Coverage.zig"); pub const FormattedPanic = @import("debug/FormattedPanic.zig"); pub const SimplePanic = @import("debug/SimplePanic.zig"); +pub const NoPanic = @import("debug/NoPanic.zig"); /// Unresolved source locations can be represented with a single `usize` that /// corresponds to a virtual memory address of the program counter. Combined diff --git a/lib/std/debug/NoPanic.zig b/lib/std/debug/NoPanic.zig new file mode 100644 index 0000000000..04ae79b8cc --- /dev/null +++ b/lib/std/debug/NoPanic.zig @@ -0,0 +1,59 @@ +//! This namespace can be used with `pub const Panic = std.debug.NoPanic;` in the root file. +//! It emits as little code as possible, for testing purposes. +//! +//! For a functional alternative, see `std.debug.FormattedPanic`. + +const std = @import("../std.zig"); + +pub fn call(_: []const u8, _: ?*std.builtin.StackTrace, _: ?usize) noreturn { + @branchHint(.cold); + @trap(); +} + +pub inline fn sentinelMismatch(_: anytype, _: anytype) noreturn { + @branchHint(.cold); + @trap(); +} + +pub inline fn unwrapError(_: ?*std.builtin.StackTrace, _: anyerror) noreturn { + @branchHint(.cold); + @trap(); +} + +pub inline fn outOfBounds(_: usize, _: usize) noreturn { + @branchHint(.cold); + @trap(); +} + +pub inline fn startGreaterThanEnd(_: usize, _: usize) noreturn { + @branchHint(.cold); + @trap(); +} + +pub inline fn inactiveUnionField(_: anytype, _: anytype) noreturn { + @branchHint(.cold); + @trap(); +} + +pub const messages = struct { + pub const reached_unreachable = ""; + pub const unwrap_null = ""; + pub const cast_to_null = ""; + pub const incorrect_alignment = ""; + pub const invalid_error_code = ""; + pub const cast_truncated_data = ""; + pub const negative_to_unsigned = ""; + pub const integer_overflow = ""; + pub const shl_overflow = ""; + pub const shr_overflow = ""; + pub const divide_by_zero = ""; + pub const exact_division_remainder = ""; + pub const integer_part_out_of_bounds = ""; + pub const corrupt_switch = ""; + pub const shift_rhs_too_big = ""; + pub const invalid_enum_value = ""; + pub const for_len_mismatch = ""; + pub const memcpy_len_mismatch = ""; + pub const memcpy_alias = ""; + pub const noreturn_returned = ""; +}; diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 169bb0d5e3..d2e14a7f3e 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2467,8 +2467,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .memcpy => try cg.airMemcpy(inst), .memset => try cg.airMemset(inst, false), .memset_safe => try cg.airMemset(inst, true), - .set_union_tag => try cg.airSetUnionTag(inst), - .get_union_tag => try cg.airGetUnionTag(inst), .ctz => try cg.airCtz(inst), .popcount => try cg.airPopCount(inst), .byte_swap => try cg.airByteSwap(inst), @@ -2480,7 +2478,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .shuffle => try cg.airShuffle(inst), .reduce => try cg.airReduce(inst), .aggregate_init => try cg.airAggregateInit(inst), - .union_init => try cg.airUnionInit(inst), .prefetch => try cg.airPrefetch(inst), .mul_add => try cg.airMulAdd(inst), @@ -2528,7 +2525,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); try ops[0].toSlicePtr(cg); var res: [1]Temp = undefined; - cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, comptime &.{ .{ + cg.select(&res, &.{ty_pl.ty.toType()}, &ops, comptime &.{ .{ .patterns = &.{ .{ .src = .{ .to_gpr, .simm32 } }, }, @@ -2645,7 +2642,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); try ops[0].toSlicePtr(cg); var res: [1]Temp = undefined; - cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, comptime &.{ .{ + cg.select(&res, &.{ty_pl.ty.toType()}, &ops, comptime &.{ .{ .patterns = &.{ .{ .src = .{ .to_gpr, .simm32 } }, }, @@ -2772,20 +2769,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { try res[0].moveTo(inst, cg); }, .alloc => if (use_old) try cg.airAlloc(inst) else { - var slot = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .lea_frame = .{ + const ty = air_datas[@intFromEnum(inst)].ty; + var slot = try cg.tempInit(ty, .{ .lea_frame = .{ .index = try cg.allocMemPtr(inst), } }); try slot.moveTo(inst, cg); }, .inferred_alloc, .inferred_alloc_comptime => unreachable, .ret_ptr => if (use_old) try cg.airRetPtr(inst) else { + const ty = air_datas[@intFromEnum(inst)].ty; var slot = switch (cg.ret_mcv.long) { else => unreachable, - .none => try cg.tempFromValue(cg.typeOfIndex(inst), .{ .lea_frame = .{ + .none => try cg.tempInit(ty, .{ .lea_frame = .{ .index = try cg.allocMemPtr(inst), } }), .load_frame => slot: { - var slot = try cg.tempFromValue(cg.typeOfIndex(inst), cg.ret_mcv.long); + var slot = try cg.tempInit(ty, cg.ret_mcv.long); try slot.toOffset(cg.ret_mcv.short.indirect.off, cg); break :slot slot; }, @@ -2797,7 +2796,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { const bin_op = air_datas[@intFromEnum(inst)].bin_op; var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); var res: [1]Temp = undefined; - cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Mir.Inst.Tag, switch (air_tag) { + cg.select(&res, &.{cg.typeOf(bin_op.lhs)}, &ops, switch (@as(Mir.Inst.Tag, switch (air_tag) { else => unreachable, .bit_and => .@"and", .bit_or => .@"or", @@ -3156,7 +3155,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { const ty_op = air_datas[@intFromEnum(inst)].ty_op; var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); var res: [1]Temp = undefined; - cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, comptime &.{ .{ + cg.select(&res, &.{ty_op.ty.toType()}, &ops, comptime &.{ .{ .src_constraints = .{ .{ .signed_or_exact_int = .byte }, .any }, .patterns = &.{ .{ .src = .{ .mut_mem, .none } }, @@ -4239,14 +4238,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .trap => try cg.asmOpOnly(.{ ._, .ud2 }), .breakpoint => try cg.asmOpOnly(.{ ._, .int3 }), .ret_addr => if (use_old) try cg.airRetAddr(inst) else { - var slot = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .load_frame = .{ + var slot = try cg.tempInit(.usize, .{ .load_frame = .{ .index = .ret_addr, } }); while (try slot.toRegClass(true, .general_purpose, cg)) {} try slot.moveTo(inst, cg); }, .frame_addr => if (use_old) try cg.airFrameAddress(inst) else { - var slot = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .lea_frame = .{ + var slot = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = .base_ptr, } }); try slot.moveTo(inst, cg); @@ -4260,7 +4259,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { const ty_op = air_datas[@intFromEnum(inst)].ty_op; var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); var res: [1]Temp = undefined; - cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, comptime &.{ .{ + cg.select(&res, &.{ty_op.ty.toType()}, &ops, comptime &.{ .{ .required_features = .{ .slow_incdec, null, null, null }, .src_constraints = .{ .{ .exact_signed_int = 1 }, .any }, .patterns = &.{ @@ -6997,7 +6996,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { switch (extra.compareOperator()) { .lt => unreachable, .lte => unreachable, - .eq, .neq => |cmp_op| cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Condition, switch (cmp_op) { + .eq, .neq => |cmp_op| cg.select(&res, &.{ty_pl.ty.toType()}, &ops, switch (@as(Condition, switch (cmp_op) { else => unreachable, .eq => .e, .neq => .ne, @@ -8825,7 +8824,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unsigned; var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); var res: [1]Temp = undefined; - cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Condition, switch (signedness) { + cg.select(&res, &.{.bool}, &ops, switch (@as(Condition, switch (signedness) { .signed => switch (air_tag) { else => unreachable, .cmp_lt, .cmp_lt_optimized => .l, @@ -9011,7 +9010,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }); var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); var res: [1]Temp = undefined; - cg.select(&res, &.{cg.typeOfIndex(inst)}, &ops, switch (@as(Condition, switch (air_tag) { + cg.select(&res, &.{.bool}, &ops, switch (@as(Condition, switch (air_tag) { else => unreachable, .cmp_eq, .cmp_eq_optimized => .e, .cmp_neq, .cmp_neq_optimized => .ne, @@ -9540,7 +9539,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .fromSize(opt_child_abi_size) }), .u(0), ); - var is_null = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .eflags = .e }); + var is_null = try cg.tempInit(.bool, .{ .eflags = .e }); try ops[0].die(cg); try is_null.moveTo(inst, cg); }, @@ -9563,7 +9562,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .fromSize(opt_child_abi_size) }), .u(0), ); - var is_non_null = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .eflags = .ne }); + var is_non_null = try cg.tempInit(.bool, .{ .eflags = .ne }); try ops[0].die(cg); try is_non_null.moveTo(inst, cg); }, @@ -9581,7 +9580,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = cg.memSize(eu_err_ty) }), .u(0), ); - var is_err = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .eflags = .ne }); + var is_err = try cg.tempInit(.bool, .{ .eflags = .ne }); try ops[0].die(cg); try is_err.moveTo(inst, cg); }, @@ -9599,7 +9598,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { try ops[0].tracking(cg).short.deref().mem(cg, .{ .size = cg.memSize(eu_err_ty) }), .u(0), ); - var is_non_err = try cg.tempFromValue(cg.typeOfIndex(inst), .{ .eflags = .e }); + var is_non_err = try cg.tempInit(.bool, .{ .eflags = .e }); try ops[0].die(cg); try is_non_err.moveTo(inst, cg); }, @@ -9631,8 +9630,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { const opt_child_ty = opt_ty.optionalChild(zcu); const opt_child_abi_size: i32 = @intCast(opt_child_ty.abiSize(zcu)); try ops[0].toOffset(opt_child_abi_size, cg); - var has_value = try cg.tempFromValue(.bool, .{ .immediate = 1 }); - try ops[0].store(&has_value, cg); + var has_value = try cg.tempInit(.bool, .{ .immediate = 1 }); + try ops[0].store(0, &has_value, cg); try has_value.die(cg); try ops[0].toOffset(-opt_child_abi_size, cg); } @@ -9654,7 +9653,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu)); var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); try ops[0].toOffset(eu_err_off, cg); - var err = try ops[0].load(eu_ty.errorUnionSet(zcu), cg); + var err = try ops[0].load(0, eu_ty.errorUnionSet(zcu), cg); try ops[0].die(cg); try err.moveTo(inst, cg); }, @@ -9667,8 +9666,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { const eu_pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(eu_pl_ty, zcu)); var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); try ops[0].toOffset(eu_err_off, cg); - var no_err = try cg.tempFromValue(eu_err_ty, .{ .immediate = 0 }); - try ops[0].store(&no_err, cg); + var no_err = try cg.tempInit(eu_err_ty, .{ .immediate = 0 }); + try ops[0].store(0, &no_err, cg); try no_err.die(cg); try ops[0].toOffset(eu_pl_off - eu_err_off, cg); try ops[0].moveTo(inst, cg); @@ -9679,7 +9678,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { var ops = try cg.tempsFromOperands(inst, .{extra.struct_operand}); try ops[0].toOffset(cg.fieldOffset( cg.typeOf(extra.struct_operand), - cg.typeOfIndex(inst), + ty_pl.ty.toType(), extra.field_index, ), cg); try ops[0].moveTo(inst, cg); @@ -9689,7 +9688,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); try ops[0].toOffset(cg.fieldOffset( cg.typeOf(ty_op.operand), - cg.typeOfIndex(inst), + ty_op.ty.toType(), 0, ), cg); try ops[0].moveTo(inst, cg); @@ -9699,7 +9698,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); try ops[0].toOffset(cg.fieldOffset( cg.typeOf(ty_op.operand), - cg.typeOfIndex(inst), + ty_op.ty.toType(), 1, ), cg); try ops[0].moveTo(inst, cg); @@ -9709,7 +9708,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); try ops[0].toOffset(cg.fieldOffset( cg.typeOf(ty_op.operand), - cg.typeOfIndex(inst), + ty_op.ty.toType(), 2, ), cg); try ops[0].moveTo(inst, cg); @@ -9719,11 +9718,29 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); try ops[0].toOffset(cg.fieldOffset( cg.typeOf(ty_op.operand), - cg.typeOfIndex(inst), + ty_op.ty.toType(), 3, ), cg); try ops[0].moveTo(inst, cg); }, + .set_union_tag => if (use_old) try cg.airSetUnionTag(inst) else { + const bin_op = air_datas[@intFromEnum(inst)].bin_op; + const union_ty = cg.typeOf(bin_op.lhs).childType(zcu); + var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); + const union_layout = union_ty.unionGetLayout(zcu); + if (union_layout.tag_size > 0) try ops[0].store(@intCast(union_layout.tagOffset()), &ops[1], cg); + for (ops) |op| try op.die(cg); + }, + .get_union_tag => if (use_old) try cg.airGetUnionTag(inst) else { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + const union_ty = cg.typeOf(ty_op.operand); + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + const union_layout = union_ty.unionGetLayout(zcu); + assert(union_layout.tag_size > 0); + var res = try ops[0].read(@intCast(union_layout.tagOffset()), ty_op.ty.toType(), cg); + for (ops) |op| if (op.index != res.index) try op.die(cg); + try res.moveTo(inst, cg); + }, .slice => if (use_old) try cg.airSlice(inst) else { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data; @@ -9764,7 +9781,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); try ops[0].toSlicePtr(cg); var res: [1]Temp = undefined; - const res_ty = cg.typeOfIndex(inst); + const res_ty = cg.typeOf(bin_op.lhs).elemType2(zcu); cg.select(&res, &.{res_ty}, &ops, comptime &.{ .{ .dst_constraints = .{.{ .int = .byte }}, .patterns = &.{ @@ -9840,7 +9857,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, } }) catch |err| switch (err) { error.SelectFailed => switch (res_ty.abiSize(zcu)) { - 0 => res[0] = try cg.tempFromValue(res_ty, .none), + 0 => res[0] = try cg.tempInit(res_ty, .none), else => |elem_size| { while (true) for (&ops) |*op| { if (try op.toRegClass(true, .general_purpose, cg)) break; @@ -9878,7 +9895,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .scale = .fromFactor(@intCast(elem_size)), } }, }); - res[0] = try ops[0].load(res_ty, cg); + res[0] = try ops[0].load(0, res_ty, cg); }, }, else => |e| return e, @@ -9897,7 +9914,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data; var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); try ops[0].toSlicePtr(cg); - const dst_ty = cg.typeOfIndex(inst); + const dst_ty = ty_pl.ty.toType(); if (dst_ty.ptrInfo(zcu).flags.vector_index == .none) zero_offset: { const elem_size = dst_ty.childType(zcu).abiSize(zcu); if (elem_size == 0) break :zero_offset; @@ -9944,19 +9961,38 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .array_to_slice => if (use_old) try cg.airArrayToSlice(inst) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); - var len = try cg.tempFromValue(.usize, .{ + var len = try cg.tempInit(.usize, .{ .immediate = cg.typeOf(ty_op.operand).childType(zcu).arrayLen(zcu), }); try ops[0].toPair(&len, cg); try ops[0].moveTo(inst, cg); }, .error_set_has_value => return cg.fail("TODO implement error_set_has_value", .{}), + .union_init => if (use_old) try cg.airUnionInit(inst) else { + const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; + const extra = cg.air.extraData(Air.UnionInit, ty_pl.payload).data; + const union_ty = ty_pl.ty.toType(); + var ops = try cg.tempsFromOperands(inst, .{extra.init}); + var res = try cg.tempAllocMem(union_ty); + const union_layout = union_ty.unionGetLayout(zcu); + if (union_layout.tag_size > 0) { + var tag_temp = try cg.tempFromValue(try pt.enumValueFieldIndex( + union_ty.unionTagTypeSafety(zcu).?, + extra.field_index, + )); + try res.write(@intCast(union_layout.tagOffset()), &tag_temp, cg); + try tag_temp.die(cg); + } + try res.write(@intCast(union_layout.payloadOffset()), &ops[0], cg); + try ops[0].die(cg); + try res.moveTo(inst, cg); + }, .field_parent_ptr => if (use_old) try cg.airFieldParentPtr(inst) else { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; const extra = cg.air.extraData(Air.FieldParentPtr, ty_pl.payload).data; var ops = try cg.tempsFromOperands(inst, .{extra.field_ptr}); try ops[0].toOffset(-cg.fieldOffset( - cg.typeOfIndex(inst), + ty_pl.ty.toType(), cg.typeOf(extra.field_ptr), extra.field_index, ), cg); @@ -10273,7 +10309,7 @@ fn allocRegOrMemAdvanced(self: *CodeGen, ty: Type, inst: ?Air.Inst.Index, reg_ok }; if (reg_ok) need_mem: { - if (abi_size <= @as(u32, switch (ty.zigTypeTag(zcu)) { + if (std.math.isPowerOfTwo(abi_size) and abi_size <= @as(u32, switch (ty.zigTypeTag(zcu)) { .float => switch (ty.floatBits(self.target.*)) { 16, 32, 64, 128 => 16, 80 => break :need_mem, @@ -11042,7 +11078,8 @@ fn airTrunc(self: *CodeGen, inst: Air.Inst.Index) !void { if (src_mcv.getReg()) |reg| self.register_manager.lockRegAssumeUnused(reg) else null; defer if (src_lock) |lock| self.register_manager.unlockReg(lock); - const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + const dst_mcv = if (src_mcv.isRegister() and src_mcv.getReg().?.class() == self.regClassForType(dst_ty) and + self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else if (dst_abi_size <= 8) try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv) @@ -15513,12 +15550,12 @@ fn airLoad(self: *CodeGen, inst: Air.Inst.Index) !void { const ptr_ty = self.typeOf(ty_op.operand); const elem_size = elem_ty.abiSize(zcu); - const elem_rc = self.regSetForType(elem_ty); - const ptr_rc = self.regSetForType(ptr_ty); + const elem_rs = self.regSetForType(elem_ty); + const ptr_rs = self.regSetForType(ptr_ty); const ptr_mcv = try self.resolveInst(ty_op.operand); - const dst_mcv = if (elem_size <= 8 and elem_rc.supersetOf(ptr_rc) and - self.reuseOperand(inst, ty_op.operand, 0, ptr_mcv)) + const dst_mcv = if (elem_size <= 8 and std.math.isPowerOfTwo(elem_size) and + elem_rs.supersetOf(ptr_rs) and self.reuseOperand(inst, ty_op.operand, 0, ptr_mcv)) // The MCValue that holds the pointer can be re-used as the value. ptr_mcv else @@ -28295,17 +28332,19 @@ const Temp = struct { return true; } - fn load(ptr: *Temp, val_ty: Type, cg: *CodeGen) !Temp { + fn load(ptr: *Temp, disp: i32, val_ty: Type, cg: *CodeGen) !Temp { const val = try cg.tempAlloc(val_ty); - switch (val.tracking(cg).short) { + const val_mcv = val.tracking(cg).short; + switch (val_mcv) { else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), .register => |val_reg| { while (try ptr.toLea(cg)) {} - try cg.genSetReg(val_reg, val_ty, ptr.tracking(cg).short.deref(), .{}); + try cg.genSetReg(val_reg, val_ty, ptr.tracking(cg).short.offset(disp).deref(), .{}); }, - .load_frame => |val_frame_addr| { - var val_ptr = try cg.tempFromValue(.usize, .{ .lea_frame = val_frame_addr }); - var len = try cg.tempFromValue(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) }); + .memory, .indirect, .load_frame, .load_symbol => { + try ptr.toOffset(disp, cg); + var val_ptr = try cg.tempInit(.usize, val_mcv.address()); + var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) }); try val_ptr.memcpy(ptr, &len, cg); try val_ptr.die(cg); try len.die(cg); @@ -28314,27 +28353,38 @@ const Temp = struct { return val; } - fn store(ptr: *Temp, val: *Temp, cg: *CodeGen) !void { + fn store(ptr: *Temp, disp: i32, val: *Temp, cg: *CodeGen) !void { const val_ty = val.typeOf(cg); - const val_abi_size: u32 = @intCast(val_ty.abiSize(cg.pt.zcu)); val: switch (val.tracking(cg).short) { else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), - .immediate => |imm| if (std.math.cast(i32, imm)) |s| { + .immediate => |val_imm| { + const val_op: Immediate = if (std.math.cast(u32, val_imm)) |val_uimm32| + .u(val_uimm32) + else if (std.math.cast(i32, @as(i64, @bitCast(val_imm)))) |val_simm32| + .s(val_simm32) + else + continue :val .{ .register = undefined }; while (try ptr.toLea(cg)) {} try cg.asmMemoryImmediate( .{ ._, .mov }, - try ptr.tracking(cg).short.deref().mem(cg, .{ .size = cg.memSize(val_ty) }), - .s(s), + try ptr.tracking(cg).short.deref().mem(cg, .{ + .size = cg.memSize(val_ty), + .disp = disp, + }), + val_op, ); - } else continue :val .{ .register = undefined }, + }, .register => { while (try ptr.toLea(cg) or try val.toRegClass(true, .general_purpose, cg)) {} const val_reg = val.tracking(cg).short.register; switch (val_reg.class()) { .general_purpose => try cg.asmMemoryRegister( .{ ._, .mov }, - try ptr.tracking(cg).short.deref().mem(cg, .{ .size = cg.memSize(val_ty) }), - registerAlias(val_reg, val_abi_size), + try ptr.tracking(cg).short.deref().mem(cg, .{ + .size = cg.memSize(val_ty), + .disp = disp, + }), + registerAlias(val_reg, @intCast(val_ty.abiSize(cg.pt.zcu))), ), else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), } @@ -28342,6 +28392,111 @@ const Temp = struct { } } + fn read(src: *Temp, disp: i32, val_ty: Type, cg: *CodeGen) !Temp { + const val = try cg.tempAlloc(val_ty); + while (try src.toBase(cg)) {} + const val_mcv = val.tracking(cg).short; + switch (val_mcv) { + else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), + .register => |val_reg| try src.readReg(disp, val_ty, registerAlias( + val_reg, + @intCast(val_ty.abiSize(cg.pt.zcu)), + ), cg), + } + return val; + } + + fn readReg(src: Temp, disp: i32, dst_ty: Type, dst_reg: Register, cg: *CodeGen) !void { + const strat = try cg.moveStrategy(dst_ty, dst_reg.class(), false); + try strat.read(cg, dst_reg, try src.tracking(cg).short.mem(cg, .{ + .size = .fromBitSize(@min(8 * dst_ty.abiSize(cg.pt.zcu), dst_reg.bitSize())), + .disp = disp, + })); + } + + fn write(dst: *Temp, disp: i32, val: *Temp, cg: *CodeGen) !void { + const val_ty = val.typeOf(cg); + while (try dst.toBase(cg)) {} + val_to_gpr: while (true) : (while (try val.toRegClass(false, .general_purpose, cg)) {}) { + const val_mcv = val.tracking(cg).short; + switch (val_mcv) { + else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), + .immediate => |val_imm| { + const val_op: Immediate = if (std.math.cast(u32, val_imm)) |val_uimm32| + .u(val_uimm32) + else if (std.math.cast(i32, @as(i64, @bitCast(val_imm)))) |val_simm32| + .s(val_simm32) + else + continue :val_to_gpr; + try cg.asmMemoryImmediate( + .{ ._, .mov }, + try dst.tracking(cg).short.mem(cg, .{ + .size = cg.memSize(val_ty), + .disp = disp, + }), + val_op, + ); + }, + .register => |val_reg| try dst.writeReg(disp, val_ty, registerAlias( + val_reg, + @intCast(val_ty.abiSize(cg.pt.zcu)), + ), cg), + inline .register_pair, .register_triple, .register_quadruple => |val_regs| { + var part_disp = disp; + for (val_regs) |val_reg| { + try dst.writeReg(part_disp, val_ty, val_reg, cg); + part_disp += @divExact(val_reg.bitSize(), 8); + } + }, + .register_offset => |val_reg_off| switch (val_reg_off.off) { + 0 => try dst.writeReg(disp, val_ty, registerAlias( + val_reg_off.reg, + @intCast(val_ty.abiSize(cg.pt.zcu)), + ), cg), + else => continue :val_to_gpr, + }, + .lea_frame, .lea_symbol => continue :val_to_gpr, + .memory, .indirect, .load_frame, .load_symbol => { + var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address().offset(disp)); + var val_ptr = try cg.tempInit(.usize, val_mcv.address()); + var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) }); + try dst_ptr.memcpy(&val_ptr, &len, cg); + try dst_ptr.die(cg); + try val_ptr.die(cg); + try len.die(cg); + }, + } + break; + } + } + + fn writeReg(dst: Temp, disp: i32, src_ty: Type, src_reg: Register, cg: *CodeGen) !void { + const src_rc = src_reg.class(); + const src_abi_size = src_ty.abiSize(cg.pt.zcu); + const strat = try cg.moveStrategy(src_ty, src_rc, false); + if (src_rc == .x87 or std.math.isPowerOfTwo(src_abi_size)) { + try strat.write(cg, try dst.tracking(cg).short.mem(cg, .{ + .size = .fromBitSize(@min(8 * src_abi_size, src_reg.bitSize())), + .disp = disp, + }), src_reg); + } else { + const frame_alloc: FrameAlloc = .initSpill(src_ty, cg.pt.zcu); + const frame_index = try cg.allocFrameIndex(frame_alloc); + const frame_size: Memory.Size = .fromSize(frame_alloc.abi_size); + try strat.write(cg, .{ + .base = .{ .frame = frame_index }, + .mod = .{ .rm = .{ .size = frame_size } }, + }, src_reg); + var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address()); + var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } }); + var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size }); + try dst_ptr.memcpy(&src_ptr, &len, cg); + try dst_ptr.die(cg); + try src_ptr.die(cg); + try len.die(cg); + } + } + fn memcpy(dst: *Temp, src: *Temp, len: *Temp, cg: *CodeGen) !void { while (true) for ([_]*Temp{ dst, src, len }, [_]Register{ .rdi, .rsi, .rcx }) |temp, reg| { if (try temp.toReg(reg, cg)) break; @@ -28498,7 +28653,7 @@ fn tempAllocMem(cg: *CodeGen, ty: Type) !Temp { return .{ .index = temp_index.toIndex() }; } -fn tempFromValue(cg: *CodeGen, ty: Type, value: MCValue) !Temp { +fn tempInit(cg: *CodeGen, ty: Type, value: MCValue) !Temp { const temp_index = cg.next_temp_index; temp_index.tracking(cg).* = .init(value); cg.temp_type[@intFromEnum(temp_index)] = ty; @@ -28507,6 +28662,10 @@ fn tempFromValue(cg: *CodeGen, ty: Type, value: MCValue) !Temp { return .{ .index = temp_index.toIndex() }; } +fn tempFromValue(cg: *CodeGen, value: Value) !Temp { + return cg.tempInit(value.typeOf(cg.pt.zcu), try cg.genTypedValue(value)); +} + fn tempFromOperand( cg: *CodeGen, inst: Air.Inst.Index, @@ -28549,7 +28708,7 @@ fn tempFromOperand( else => break :init const_mcv, } }); - return cg.tempFromValue(.fromInterned(ip.typeOf(val)), gop.value_ptr.short); + return cg.tempInit(.fromInterned(ip.typeOf(val)), gop.value_ptr.short); } const temp_index = cg.next_temp_index; @@ -29023,8 +29182,8 @@ const Select = struct { return switch (spec.kind) { .unused => null, .any => try cg.tempAlloc(spec.type), - .cc => |cc| try cg.tempFromValue(spec.type, .{ .eflags = cc }), - .reg => |reg| try cg.tempFromValue(spec.type, .{ .register = reg }), + .cc => |cc| try cg.tempInit(spec.type, .{ .eflags = cc }), + .reg => |reg| try cg.tempInit(spec.type, .{ .register = reg }), .rc => |rc| try cg.tempAllocReg(spec.type, regSetForRegClass(rc)), .rc_mask => |rc_mask| try cg.tempAllocReg(spec.type, regSetForRegClass(rc_mask.rc)), .mem => try cg.tempAllocMem(spec.type), @@ -29081,18 +29240,14 @@ const Select = struct { break :res_scalar .{ scalar_int_ty, try pt.intValue_big(scalar_int_ty, big_int.toConst()) }; }, }; - const res_ty, const res_val: Value = if (vector_len) |len| res: { - const vector_ty = try pt.vectorType(.{ + const res_val: Value = if (vector_len) |len| .fromInterned(try pt.intern(.{ .aggregate = .{ + .ty = (try pt.vectorType(.{ .len = len, .child = res_scalar_ty.toIntern(), - }); - const vector_val = try pt.intern(.{ .aggregate = .{ - .ty = vector_ty.toIntern(), - .storage = .{ .repeated_elem = res_scalar_val.toIntern() }, - } }); - break :res .{ vector_ty, .fromInterned(vector_val) }; - } else .{ res_scalar_ty, res_scalar_val }; - return try cg.tempFromValue(res_ty, try cg.genTypedValue(res_val)); + })).toIntern(), + .storage = .{ .repeated_elem = res_scalar_val.toIntern() }, + } })) else res_scalar_val; + return try cg.tempFromValue(res_val); }, .ref => |ref| ref.deref(s), .ref_mask => |ref_mask| ref_mask.ref.deref(s), diff --git a/test/behavior/union.zig b/test/behavior/union.zig index 6e8c895b1a..ef269c2688 100644 --- a/test/behavior/union.zig +++ b/test/behavior/union.zig @@ -2246,12 +2246,12 @@ test "matching captures causes union equivalence" { } test "signed enum tag with negative value" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; const Enum = enum(i8) { a = -1, From 6d1fc0f51cf835e8ed503a2089450b1f280d1216 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Thu, 9 Jan 2025 06:01:28 -0500 Subject: [PATCH 22/25] x86_64: implement aggregate access --- src/arch/x86_64/CodeGen.zig | 86 +++++++++++++++++++++++++++++-------- 1 file changed, 67 insertions(+), 19 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index d2e14a7f3e..cb168628a7 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2457,7 +2457,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .load => try cg.airLoad(inst), .store => try cg.airStore(inst, false), .store_safe => try cg.airStore(inst, true), - .struct_field_val => try cg.airStructFieldVal(inst), .float_from_int => try cg.airFloatFromInt(inst), .int_from_float => try cg.airIntFromFloat(inst), .cmpxchg_strong => try cg.airCmpxchg(inst), @@ -9723,11 +9722,32 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { ), cg); try ops[0].moveTo(inst, cg); }, + .struct_field_val => if (use_old) try cg.airStructFieldVal(inst) else fallback: { + const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; + const extra = cg.air.extraData(Air.StructField, ty_pl.payload).data; + const agg_ty = cg.typeOf(extra.struct_operand); + const field_ty = ty_pl.ty.toType(); + const field_off: u31 = switch (agg_ty.containerLayout(zcu)) { + .auto, .@"extern" => @intCast(agg_ty.structFieldOffset(extra.field_index, zcu)), + .@"packed" => break :fallback try cg.airStructFieldVal(inst), + }; + if (field_ty.hasRuntimeBitsIgnoreComptime(zcu)) { + var ops = try cg.tempsFromOperands(inst, .{extra.struct_operand}); + var res = try ops[0].read(field_off, field_ty, cg); + for (ops) |op| if (op.index != res.index) try op.die(cg); + try res.moveTo(inst, cg); + } else { + // hack around Sema OPV bugs + const res = try cg.tempInit(field_ty, .none); + try res.moveTo(inst, cg); + } + }, .set_union_tag => if (use_old) try cg.airSetUnionTag(inst) else { const bin_op = air_datas[@intFromEnum(inst)].bin_op; const union_ty = cg.typeOf(bin_op.lhs).childType(zcu); var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); const union_layout = union_ty.unionGetLayout(zcu); + // hack around Sema OPV bugs if (union_layout.tag_size > 0) try ops[0].store(@intCast(union_layout.tagOffset()), &ops[1], cg); for (ops) |op| try op.die(cg); }, @@ -9857,6 +9877,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, } }) catch |err| switch (err) { error.SelectFailed => switch (res_ty.abiSize(zcu)) { + // hack around Sema OPV bugs 0 => res[0] = try cg.tempInit(res_ty, .none), else => |elem_size| { while (true) for (&ops) |*op| { @@ -9917,6 +9938,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { const dst_ty = ty_pl.ty.toType(); if (dst_ty.ptrInfo(zcu).flags.vector_index == .none) zero_offset: { const elem_size = dst_ty.childType(zcu).abiSize(zcu); + // hack around Sema OPV bugs if (elem_size == 0) break :zero_offset; while (true) for (&ops) |*op| { if (try op.toRegClass(true, .general_purpose, cg)) break; @@ -15804,7 +15826,7 @@ fn fieldOffset(self: *CodeGen, ptr_agg_ty: Type, ptr_field_ty: Type, field_index return switch (agg_ty.containerLayout(zcu)) { .auto, .@"extern" => @intCast(agg_ty.structFieldOffset(field_index, zcu)), .@"packed" => @divExact(@as(i32, ptr_agg_ty.ptrInfo(zcu).packed_offset.bit_offset) + - (if (zcu.typeToStruct(agg_ty)) |struct_obj| pt.structPackedFieldBitOffset(struct_obj, field_index) else 0) - + (if (zcu.typeToStruct(agg_ty)) |loaded_struct| pt.structPackedFieldBitOffset(loaded_struct, field_index) else 0) - ptr_field_ty.ptrInfo(zcu).packed_offset.bit_offset, 8), }; } @@ -15828,8 +15850,8 @@ fn airStructFieldVal(self: *CodeGen, inst: Air.Inst.Index) !void { const src_mcv = try self.resolveInst(operand); const field_off: u32 = switch (container_ty.containerLayout(zcu)) { .auto, .@"extern" => @intCast(container_ty.structFieldOffset(extra.field_index, zcu) * 8), - .@"packed" => if (zcu.typeToStruct(container_ty)) |struct_obj| - pt.structPackedFieldBitOffset(struct_obj, extra.field_index) + .@"packed" => if (zcu.typeToStruct(container_ty)) |loaded_struct| + pt.structPackedFieldBitOffset(loaded_struct, extra.field_index) else 0, }; @@ -26448,7 +26470,7 @@ fn airAggregateInit(self: *CodeGen, inst: Air.Inst.Index) !void { .@"struct" => { const frame_index = try self.allocFrameIndex(.initSpill(result_ty, zcu)); if (result_ty.containerLayout(zcu) == .@"packed") { - const struct_obj = zcu.typeToStruct(result_ty).?; + const loaded_struct = zcu.intern_pool.loadStructType(result_ty.toIntern()); try self.genInlineMemset( .{ .lea_frame = .{ .index = frame_index } }, .{ .immediate = 0 }, @@ -26469,7 +26491,7 @@ fn airAggregateInit(self: *CodeGen, inst: Air.Inst.Index) !void { } const elem_abi_size: u32 = @intCast(elem_ty.abiSize(zcu)); const elem_abi_bits = elem_abi_size * 8; - const elem_off = pt.structPackedFieldBitOffset(struct_obj, elem_i); + const elem_off = pt.structPackedFieldBitOffset(loaded_struct, elem_i); const elem_byte_off: i32 = @intCast(elem_off / elem_abi_bits * elem_abi_size); const elem_bit_off = elem_off % elem_abi_bits; const elem_mcv = try self.resolveInst(elem); @@ -26651,9 +26673,9 @@ fn airUnionInit(self: *CodeGen, inst: Air.Inst.Index) !void { const dst_mcv = try self.allocRegOrMem(inst, false); - const union_obj = zcu.typeToUnion(union_ty).?; - const field_name = union_obj.loadTagType(ip).names.get(ip)[extra.field_index]; - const tag_ty: Type = .fromInterned(union_obj.enum_tag_ty); + const loaded_union = zcu.typeToUnion(union_ty).?; + const field_name = loaded_union.loadTagType(ip).names.get(ip)[extra.field_index]; + const tag_ty: Type = .fromInterned(loaded_union.enum_tag_ty); const field_index = tag_ty.enumFieldIndex(field_name, zcu).?; const tag_val = try pt.enumValueFieldIndex(tag_ty, field_index); const tag_int_val = try tag_val.intFromEnum(tag_ty, pt); @@ -28393,17 +28415,43 @@ const Temp = struct { } fn read(src: *Temp, disp: i32, val_ty: Type, cg: *CodeGen) !Temp { - const val = try cg.tempAlloc(val_ty); + var val = try cg.tempAlloc(val_ty); while (try src.toBase(cg)) {} - const val_mcv = val.tracking(cg).short; - switch (val_mcv) { - else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), - .register => |val_reg| try src.readReg(disp, val_ty, registerAlias( - val_reg, - @intCast(val_ty.abiSize(cg.pt.zcu)), - ), cg), + val_to_gpr: while (true) : (while (try val.toRegClass(false, .general_purpose, cg)) {}) { + const val_mcv = val.tracking(cg).short; + switch (val_mcv) { + else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), + .register => |val_reg| try src.readReg(disp, val_ty, registerAlias( + val_reg, + @intCast(val_ty.abiSize(cg.pt.zcu)), + ), cg), + inline .register_pair, .register_triple, .register_quadruple => |val_regs| { + var part_disp = disp; + for (val_regs) |val_reg| { + try src.readReg(disp, val_ty, val_reg, cg); + part_disp += @divExact(val_reg.bitSize(), 8); + } + }, + .register_offset => |val_reg_off| switch (val_reg_off.off) { + 0 => try src.readReg(disp, val_ty, registerAlias( + val_reg_off.reg, + @intCast(val_ty.abiSize(cg.pt.zcu)), + ), cg), + else => continue :val_to_gpr, + }, + .lea_frame, .lea_symbol => continue :val_to_gpr, + .memory, .indirect, .load_frame, .load_symbol => { + var val_ptr = try cg.tempInit(.usize, val_mcv.address()); + var src_ptr = try cg.tempInit(.usize, src.tracking(cg).short.address().offset(disp)); + var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) }); + try val_ptr.memcpy(&src_ptr, &len, cg); + try val_ptr.die(cg); + try src_ptr.die(cg); + try len.die(cg); + }, + } + return val; } - return val; } fn readReg(src: Temp, disp: i32, dst_ty: Type, dst_reg: Register, cg: *CodeGen) !void { @@ -28466,7 +28514,7 @@ const Temp = struct { try len.die(cg); }, } - break; + return; } } From 666d76d85c1dc2f107d0a57b424983082672943e Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Thu, 9 Jan 2025 09:48:29 -0500 Subject: [PATCH 23/25] x86_64: implement load and store --- src/arch/x86_64/CodeGen.zig | 497 ++++++++++++++++++++++++------------ test/behavior/basic.zig | 2 +- 2 files changed, 338 insertions(+), 161 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index cb168628a7..fcd2e4087c 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -257,6 +257,7 @@ pub const MCValue = union(enum) { }; } + // hack around linker relocation bugs fn isBase(mcv: MCValue) bool { return switch (mcv) { .memory, .indirect, .load_frame => true, @@ -2398,8 +2399,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .add_wrap, .sub, .sub_wrap, - .bool_and, - .bool_or, .min, .max, => |air_tag| try cg.airBinOp(inst, air_tag), @@ -2454,9 +2453,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .is_null => try cg.airIsNull(inst), .is_non_err => try cg.airIsNonErr(inst), .is_err => try cg.airIsErr(inst), - .load => try cg.airLoad(inst), - .store => try cg.airStore(inst, false), - .store_safe => try cg.airStore(inst, true), .float_from_int => try cg.airFloatFromInt(inst), .int_from_float => try cg.airIntFromFloat(inst), .cmpxchg_strong => try cg.airCmpxchg(inst), @@ -2791,14 +2787,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { try slot.moveTo(inst, cg); }, .assembly => try cg.airAsm(inst), - .bit_and, .bit_or, .xor => |air_tag| if (use_old) try cg.airBinOp(inst, air_tag) else { + .bit_and, .bit_or, .xor, .bool_and, .bool_or => |air_tag| if (use_old) try cg.airBinOp(inst, air_tag) else { const bin_op = air_datas[@intFromEnum(inst)].bin_op; var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); var res: [1]Temp = undefined; cg.select(&res, &.{cg.typeOf(bin_op.lhs)}, &ops, switch (@as(Mir.Inst.Tag, switch (air_tag) { else => unreachable, - .bit_and => .@"and", - .bit_or => .@"or", + .bit_and, .bool_and => .@"and", + .bit_or, .bool_or => .@"or", .xor => .xor, })) { else => unreachable, @@ -9601,6 +9597,25 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { try ops[0].die(cg); try is_non_err.moveTo(inst, cg); }, + .load => if (use_old) try cg.airLoad(inst) else fallback: { + const ty_op = air_datas[@intFromEnum(inst)].ty_op; + const val_ty = ty_op.ty.toType(); + const ptr_ty = cg.typeOf(ty_op.operand); + const ptr_info = ptr_ty.ptrInfo(zcu); + if (ptr_info.packed_offset.host_size > 0 and + (ptr_info.flags.vector_index == .none or val_ty.toIntern() == .bool_type)) + break :fallback try cg.airLoad(inst); + var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); + var res = try ops[0].load(val_ty, .{ + .disp = switch (ptr_info.flags.vector_index) { + .none => 0, + .runtime => unreachable, + else => |vector_index| @intCast(val_ty.abiSize(zcu) * @intFromEnum(vector_index)), + }, + }, cg); + for (ops) |op| if (op.index != res.index) try op.die(cg); + try res.moveTo(inst, cg); + }, .int_from_ptr => if (use_old) try cg.airIntFromPtr(inst) else { const un_op = air_datas[@intFromEnum(inst)].un_op; var ops = try cg.tempsFromOperands(inst, .{un_op}); @@ -9615,6 +9630,37 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .ret => try cg.airRet(inst, false), .ret_safe => try cg.airRet(inst, true), .ret_load => try cg.airRetLoad(inst), + .store, .store_safe => |air_tag| if (use_old) try cg.airStore(inst, switch (air_tag) { + else => unreachable, + .store => false, + .store_safe => true, + }) else fallback: { + const bin_op = air_datas[@intFromEnum(inst)].bin_op; + const ptr_ty = cg.typeOf(bin_op.lhs); + const ptr_info = ptr_ty.ptrInfo(zcu); + const val_ty = cg.typeOf(bin_op.rhs); + if (ptr_info.packed_offset.host_size > 0 and + (ptr_info.flags.vector_index == .none or val_ty.toIntern() == .bool_type)) + break :fallback try cg.airStore(inst, switch (air_tag) { + else => unreachable, + .store => false, + .store_safe => true, + }); + var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); + try ops[0].store(&ops[1], .{ + .disp = switch (ptr_info.flags.vector_index) { + .none => 0, + .runtime => unreachable, + else => |vector_index| @intCast(val_ty.abiSize(zcu) * @intFromEnum(vector_index)), + }, + .safe = switch (air_tag) { + else => unreachable, + .store => false, + .store_safe => true, + }, + }, cg); + for (ops) |op| try op.die(cg); + }, .unreach => {}, .optional_payload_ptr => if (use_old) try cg.airOptionalPayloadPtr(inst) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; @@ -9630,7 +9676,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { const opt_child_abi_size: i32 = @intCast(opt_child_ty.abiSize(zcu)); try ops[0].toOffset(opt_child_abi_size, cg); var has_value = try cg.tempInit(.bool, .{ .immediate = 1 }); - try ops[0].store(0, &has_value, cg); + try ops[0].store(&has_value, .{}, cg); try has_value.die(cg); try ops[0].toOffset(-opt_child_abi_size, cg); } @@ -9652,7 +9698,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { const eu_err_off: i32 = @intCast(codegen.errUnionErrorOffset(eu_pl_ty, zcu)); var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); try ops[0].toOffset(eu_err_off, cg); - var err = try ops[0].load(0, eu_ty.errorUnionSet(zcu), cg); + var err = try ops[0].load(eu_ty.errorUnionSet(zcu), .{}, cg); try ops[0].die(cg); try err.moveTo(inst, cg); }, @@ -9666,7 +9712,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); try ops[0].toOffset(eu_err_off, cg); var no_err = try cg.tempInit(eu_err_ty, .{ .immediate = 0 }); - try ops[0].store(0, &no_err, cg); + try ops[0].store(&no_err, .{}, cg); try no_err.die(cg); try ops[0].toOffset(eu_pl_off - eu_err_off, cg); try ops[0].moveTo(inst, cg); @@ -9682,43 +9728,29 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { ), cg); try ops[0].moveTo(inst, cg); }, - .struct_field_ptr_index_0 => if (use_old) try cg.airStructFieldPtrIndex(inst, 0) else { + .struct_field_ptr_index_0, + .struct_field_ptr_index_1, + .struct_field_ptr_index_2, + .struct_field_ptr_index_3, + => |air_tag| if (use_old) try cg.airStructFieldPtrIndex(inst, switch (air_tag) { + else => unreachable, + .struct_field_ptr_index_0 => 0, + .struct_field_ptr_index_1 => 1, + .struct_field_ptr_index_2 => 2, + .struct_field_ptr_index_3 => 3, + }) else { const ty_op = air_datas[@intFromEnum(inst)].ty_op; var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); try ops[0].toOffset(cg.fieldOffset( cg.typeOf(ty_op.operand), ty_op.ty.toType(), - 0, - ), cg); - try ops[0].moveTo(inst, cg); - }, - .struct_field_ptr_index_1 => if (use_old) try cg.airStructFieldPtrIndex(inst, 1) else { - const ty_op = air_datas[@intFromEnum(inst)].ty_op; - var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].toOffset(cg.fieldOffset( - cg.typeOf(ty_op.operand), - ty_op.ty.toType(), - 1, - ), cg); - try ops[0].moveTo(inst, cg); - }, - .struct_field_ptr_index_2 => if (use_old) try cg.airStructFieldPtrIndex(inst, 2) else { - const ty_op = air_datas[@intFromEnum(inst)].ty_op; - var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].toOffset(cg.fieldOffset( - cg.typeOf(ty_op.operand), - ty_op.ty.toType(), - 2, - ), cg); - try ops[0].moveTo(inst, cg); - }, - .struct_field_ptr_index_3 => if (use_old) try cg.airStructFieldPtrIndex(inst, 3) else { - const ty_op = air_datas[@intFromEnum(inst)].ty_op; - var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); - try ops[0].toOffset(cg.fieldOffset( - cg.typeOf(ty_op.operand), - ty_op.ty.toType(), - 3, + switch (air_tag) { + else => unreachable, + .struct_field_ptr_index_0 => 0, + .struct_field_ptr_index_1 => 1, + .struct_field_ptr_index_2 => 2, + .struct_field_ptr_index_3 => 3, + }, ), cg); try ops[0].moveTo(inst, cg); }, @@ -9733,7 +9765,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }; if (field_ty.hasRuntimeBitsIgnoreComptime(zcu)) { var ops = try cg.tempsFromOperands(inst, .{extra.struct_operand}); - var res = try ops[0].read(field_off, field_ty, cg); + var res = try ops[0].read(field_ty, .{ .disp = field_off }, cg); for (ops) |op| if (op.index != res.index) try op.die(cg); try res.moveTo(inst, cg); } else { @@ -9748,7 +9780,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); const union_layout = union_ty.unionGetLayout(zcu); // hack around Sema OPV bugs - if (union_layout.tag_size > 0) try ops[0].store(@intCast(union_layout.tagOffset()), &ops[1], cg); + if (union_layout.tag_size > 0) try ops[0].store(&ops[1], .{ + .disp = @intCast(union_layout.tagOffset()), + }, cg); for (ops) |op| try op.die(cg); }, .get_union_tag => if (use_old) try cg.airGetUnionTag(inst) else { @@ -9757,7 +9791,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { var ops = try cg.tempsFromOperands(inst, .{ty_op.operand}); const union_layout = union_ty.unionGetLayout(zcu); assert(union_layout.tag_size > 0); - var res = try ops[0].read(@intCast(union_layout.tagOffset()), ty_op.ty.toType(), cg); + var res = try ops[0].read(ty_op.ty.toType(), .{ + .disp = @intCast(union_layout.tagOffset()), + }, cg); for (ops) |op| if (op.index != res.index) try op.die(cg); try res.moveTo(inst, cg); }, @@ -9916,7 +9952,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .scale = .fromFactor(@intCast(elem_size)), } }, }); - res[0] = try ops[0].load(0, res_ty, cg); + res[0] = try ops[0].load(res_ty, .{}, cg); }, }, else => |e| return e, @@ -10002,10 +10038,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { union_ty.unionTagTypeSafety(zcu).?, extra.field_index, )); - try res.write(@intCast(union_layout.tagOffset()), &tag_temp, cg); + try res.write(&tag_temp, .{ + .disp = @intCast(union_layout.tagOffset()), + }, cg); try tag_temp.die(cg); } - try res.write(@intCast(union_layout.payloadOffset()), &ops[0], cg); + try res.write(&ops[0], .{ + .disp = @intCast(union_layout.payloadOffset()), + }, cg); try ops[0].die(cg); try res.moveTo(inst, cg); }, @@ -28338,6 +28378,7 @@ const Temp = struct { return true; } + // hack around linker relocation bugs fn toBase(temp: *Temp, cg: *CodeGen) !bool { const temp_tracking = temp.tracking(cg); if (temp_tracking.short.isBase()) return false; @@ -28354,17 +28395,38 @@ const Temp = struct { return true; } - fn load(ptr: *Temp, disp: i32, val_ty: Type, cg: *CodeGen) !Temp { + const AccessOptions = struct { + disp: i32 = 0, + safe: bool = false, + }; + + fn load(ptr: *Temp, val_ty: Type, opts: AccessOptions, cg: *CodeGen) !Temp { const val = try cg.tempAlloc(val_ty); + try ptr.toOffset(opts.disp, cg); + while (try ptr.toLea(cg)) {} const val_mcv = val.tracking(cg).short; switch (val_mcv) { else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), - .register => |val_reg| { + .register => |val_reg| try ptr.loadReg(val_ty, registerAlias( + val_reg, + @intCast(val_ty.abiSize(cg.pt.zcu)), + ), cg), + inline .register_pair, + .register_triple, + .register_quadruple, + => |val_regs| for (val_regs) |val_reg| { + try ptr.loadReg(val_ty, val_reg, cg); + try ptr.toOffset(@divExact(val_reg.bitSize(), 8), cg); while (try ptr.toLea(cg)) {} - try cg.genSetReg(val_reg, val_ty, ptr.tracking(cg).short.offset(disp).deref(), .{}); + }, + .register_offset => |val_reg_off| switch (val_reg_off.off) { + 0 => try ptr.loadReg(val_ty, registerAlias( + val_reg_off.reg, + @intCast(val_ty.abiSize(cg.pt.zcu)), + ), cg), + else => unreachable, }, .memory, .indirect, .load_frame, .load_symbol => { - try ptr.toOffset(disp, cg); var val_ptr = try cg.tempInit(.usize, val_mcv.address()); var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) }); try val_ptr.memcpy(ptr, &len, cg); @@ -28375,65 +28437,184 @@ const Temp = struct { return val; } - fn store(ptr: *Temp, disp: i32, val: *Temp, cg: *CodeGen) !void { + fn store(ptr: *Temp, val: *Temp, opts: AccessOptions, cg: *CodeGen) !void { const val_ty = val.typeOf(cg); - val: switch (val.tracking(cg).short) { - else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), - .immediate => |val_imm| { - const val_op: Immediate = if (std.math.cast(u32, val_imm)) |val_uimm32| - .u(val_uimm32) - else if (std.math.cast(i32, @as(i64, @bitCast(val_imm)))) |val_simm32| - .s(val_simm32) - else - continue :val .{ .register = undefined }; - while (try ptr.toLea(cg)) {} - try cg.asmMemoryImmediate( - .{ ._, .mov }, - try ptr.tracking(cg).short.deref().mem(cg, .{ - .size = cg.memSize(val_ty), - .disp = disp, - }), - val_op, - ); - }, - .register => { - while (try ptr.toLea(cg) or try val.toRegClass(true, .general_purpose, cg)) {} - const val_reg = val.tracking(cg).short.register; - switch (val_reg.class()) { - .general_purpose => try cg.asmMemoryRegister( - .{ ._, .mov }, - try ptr.tracking(cg).short.deref().mem(cg, .{ - .size = cg.memSize(val_ty), - .disp = disp, - }), - registerAlias(val_reg, @intCast(val_ty.abiSize(cg.pt.zcu))), - ), - else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), - } - }, - } - } - - fn read(src: *Temp, disp: i32, val_ty: Type, cg: *CodeGen) !Temp { - var val = try cg.tempAlloc(val_ty); - while (try src.toBase(cg)) {} - val_to_gpr: while (true) : (while (try val.toRegClass(false, .general_purpose, cg)) {}) { + try ptr.toOffset(opts.disp, cg); + while (try ptr.toLea(cg)) {} + val_to_gpr: while (true) : (while (try ptr.toLea(cg) or + try val.toRegClass(false, .general_purpose, cg)) + {}) { const val_mcv = val.tracking(cg).short; switch (val_mcv) { else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), - .register => |val_reg| try src.readReg(disp, val_ty, registerAlias( + .undef => if (opts.safe) { + var pat = try cg.tempInit(.u8, .{ .immediate = 0xaa }); + var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) }); + try ptr.memset(&pat, &len, cg); + try pat.die(cg); + try len.die(cg); + }, + .immediate => |val_imm| { + const val_op: Immediate = if (std.math.cast(u31, val_imm)) |val_uimm31| + .u(val_uimm31) + else if (std.math.cast(i32, @as(i64, @bitCast(val_imm)))) |val_simm32| + .s(val_simm32) + else + continue :val_to_gpr; + // hack around linker relocation bugs + switch (ptr.tracking(cg).short) { + else => {}, + .lea_symbol => while (try ptr.toRegClass(false, .general_purpose, cg)) {}, + } + try cg.asmMemoryImmediate( + .{ ._, .mov }, + try ptr.tracking(cg).short.deref().mem(cg, .{ + .size = cg.memSize(val_ty), + }), + val_op, + ); + }, + .eflags => |cc| { + // hack around linker relocation bugs + switch (ptr.tracking(cg).short) { + else => {}, + .lea_symbol => while (try ptr.toRegClass(false, .general_purpose, cg)) {}, + } + try cg.asmSetccMemory( + cc, + try ptr.tracking(cg).short.deref().mem(cg, .{ .size = .byte }), + ); + }, + .register => |val_reg| try ptr.storeReg(val_ty, registerAlias( + val_reg, + @intCast(val_ty.abiSize(cg.pt.zcu)), + ), cg), + inline .register_pair, + .register_triple, + .register_quadruple, + => |val_regs| for (val_regs) |val_reg| { + try ptr.storeReg(val_ty, val_reg, cg); + try ptr.toOffset(@divExact(val_reg.bitSize(), 8), cg); + while (try ptr.toLea(cg)) {} + }, + .register_offset => |val_reg_off| switch (val_reg_off.off) { + 0 => try ptr.storeReg(val_ty, registerAlias( + val_reg_off.reg, + @intCast(val_ty.abiSize(cg.pt.zcu)), + ), cg), + else => continue :val_to_gpr, + }, + .register_overflow => |val_reg_ov| { + const ip = &cg.pt.zcu.intern_pool; + const first_ty: Type = .fromInterned(first_ty: switch (ip.indexToKey(val_ty.toIntern())) { + .tuple_type => |tuple_type| { + const tuple_field_types = tuple_type.types.get(ip); + assert(tuple_field_types.len == 2 and tuple_field_types[1] == .u1_type); + break :first_ty tuple_field_types[0]; + }, + .opt_type => |opt_child| { + assert(!val_ty.optionalReprIsPayload(cg.pt.zcu)); + break :first_ty opt_child; + }, + else => std.debug.panic("{s}: {}\n", .{ @src().fn_name, val_ty.fmt(cg.pt) }), + }); + const first_size: u31 = @intCast(first_ty.abiSize(cg.pt.zcu)); + try ptr.storeReg(first_ty, registerAlias(val_reg_ov.reg, first_size), cg); + try ptr.toOffset(first_size, cg); + try cg.asmSetccMemory( + val_reg_ov.eflags, + try ptr.tracking(cg).short.deref().mem(cg, .{ .size = .byte }), + ); + }, + .lea_frame, .lea_symbol => continue :val_to_gpr, + .memory, .indirect, .load_frame, .load_symbol => { + var val_ptr = try cg.tempInit(.usize, val_mcv.address()); + var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) }); + try ptr.memcpy(&val_ptr, &len, cg); + try val_ptr.die(cg); + try len.die(cg); + }, + } + break; + } + } + + fn read(src: *Temp, val_ty: Type, opts: AccessOptions, cg: *CodeGen) !Temp { + var val = try cg.tempAlloc(val_ty); + while (try src.toBase(cg)) {} + const val_mcv = val.tracking(cg).short; + switch (val_mcv) { + else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), + .register => |val_reg| try src.readReg(opts.disp, val_ty, registerAlias( + val_reg, + @intCast(val_ty.abiSize(cg.pt.zcu)), + ), cg), + inline .register_pair, .register_triple, .register_quadruple => |val_regs| { + var disp = opts.disp; + for (val_regs) |val_reg| { + try src.readReg(disp, val_ty, val_reg, cg); + disp += @divExact(val_reg.bitSize(), 8); + } + }, + .register_offset => |val_reg_off| switch (val_reg_off.off) { + 0 => try src.readReg(opts.disp, val_ty, registerAlias( + val_reg_off.reg, + @intCast(val_ty.abiSize(cg.pt.zcu)), + ), cg), + else => unreachable, + }, + .memory, .indirect, .load_frame, .load_symbol => { + var val_ptr = try cg.tempInit(.usize, val_mcv.address()); + var src_ptr = + try cg.tempInit(.usize, src.tracking(cg).short.address().offset(opts.disp)); + var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) }); + try val_ptr.memcpy(&src_ptr, &len, cg); + try val_ptr.die(cg); + try src_ptr.die(cg); + try len.die(cg); + }, + } + return val; + } + + fn write(dst: *Temp, val: *Temp, opts: AccessOptions, cg: *CodeGen) !void { + const val_ty = val.typeOf(cg); + while (try dst.toBase(cg)) {} + val_to_gpr: while (true) : (while (try dst.toBase(cg) or + try val.toRegClass(false, .general_purpose, cg)) + {}) { + const val_mcv = val.tracking(cg).short; + switch (val_mcv) { + else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), + .immediate => |val_imm| { + const val_op: Immediate = if (std.math.cast(u31, val_imm)) |val_uimm31| + .u(val_uimm31) + else if (std.math.cast(i32, @as(i64, @bitCast(val_imm)))) |val_simm32| + .s(val_simm32) + else + continue :val_to_gpr; + try cg.asmMemoryImmediate( + .{ ._, .mov }, + try dst.tracking(cg).short.mem(cg, .{ + .size = cg.memSize(val_ty), + .disp = opts.disp, + }), + val_op, + ); + }, + .register => |val_reg| try dst.writeReg(opts.disp, val_ty, registerAlias( val_reg, @intCast(val_ty.abiSize(cg.pt.zcu)), ), cg), inline .register_pair, .register_triple, .register_quadruple => |val_regs| { - var part_disp = disp; + var disp = opts.disp; for (val_regs) |val_reg| { - try src.readReg(disp, val_ty, val_reg, cg); - part_disp += @divExact(val_reg.bitSize(), 8); + try dst.writeReg(disp, val_ty, val_reg, cg); + disp += @divExact(val_reg.bitSize(), 8); } }, .register_offset => |val_reg_off| switch (val_reg_off.off) { - 0 => try src.readReg(disp, val_ty, registerAlias( + 0 => try dst.writeReg(opts.disp, val_ty, registerAlias( val_reg_off.reg, @intCast(val_ty.abiSize(cg.pt.zcu)), ), cg), @@ -28441,16 +28622,61 @@ const Temp = struct { }, .lea_frame, .lea_symbol => continue :val_to_gpr, .memory, .indirect, .load_frame, .load_symbol => { + var dst_ptr = + try cg.tempInit(.usize, dst.tracking(cg).short.address().offset(opts.disp)); var val_ptr = try cg.tempInit(.usize, val_mcv.address()); - var src_ptr = try cg.tempInit(.usize, src.tracking(cg).short.address().offset(disp)); var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) }); - try val_ptr.memcpy(&src_ptr, &len, cg); + try dst_ptr.memcpy(&val_ptr, &len, cg); + try dst_ptr.die(cg); try val_ptr.die(cg); - try src_ptr.die(cg); try len.die(cg); }, } - return val; + break; + } + } + + fn loadReg(ptr: *Temp, dst_ty: Type, dst_reg: Register, cg: *CodeGen) !void { + const dst_rc = dst_reg.class(); + const strat = try cg.moveStrategy(dst_ty, dst_rc, false); + // hack around linker relocation bugs + switch (ptr.tracking(cg).short) { + else => {}, + .lea_symbol => |sym_off| if (dst_rc != .general_purpose or sym_off.off != 0) + while (try ptr.toRegClass(false, .general_purpose, cg)) {}, + } + try strat.read(cg, dst_reg, try ptr.tracking(cg).short.deref().mem(cg, .{ + .size = .fromBitSize(@min(8 * dst_ty.abiSize(cg.pt.zcu), dst_reg.bitSize())), + })); + } + + fn storeReg(ptr: *Temp, src_ty: Type, src_reg: Register, cg: *CodeGen) !void { + const src_rc = src_reg.class(); + const src_abi_size = src_ty.abiSize(cg.pt.zcu); + const strat = try cg.moveStrategy(src_ty, src_rc, false); + // hack around linker relocation bugs + switch (ptr.tracking(cg).short) { + else => {}, + .lea_symbol => |sym_off| if (src_rc != .general_purpose or sym_off.off != 0) + while (try ptr.toRegClass(false, .general_purpose, cg)) {}, + } + if (src_rc == .x87 or std.math.isPowerOfTwo(src_abi_size)) { + try strat.write(cg, try ptr.tracking(cg).short.deref().mem(cg, .{ + .size = .fromBitSize(@min(8 * src_abi_size, src_reg.bitSize())), + }), src_reg); + } else { + const frame_alloc: FrameAlloc = .initSpill(src_ty, cg.pt.zcu); + const frame_index = try cg.allocFrameIndex(frame_alloc); + const frame_size: Memory.Size = .fromSize(frame_alloc.abi_size); + try strat.write(cg, .{ + .base = .{ .frame = frame_index }, + .mod = .{ .rm = .{ .size = frame_size } }, + }, src_reg); + var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } }); + var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size }); + try ptr.memcpy(&src_ptr, &len, cg); + try src_ptr.die(cg); + try len.die(cg); } } @@ -28462,62 +28688,6 @@ const Temp = struct { })); } - fn write(dst: *Temp, disp: i32, val: *Temp, cg: *CodeGen) !void { - const val_ty = val.typeOf(cg); - while (try dst.toBase(cg)) {} - val_to_gpr: while (true) : (while (try val.toRegClass(false, .general_purpose, cg)) {}) { - const val_mcv = val.tracking(cg).short; - switch (val_mcv) { - else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), - .immediate => |val_imm| { - const val_op: Immediate = if (std.math.cast(u32, val_imm)) |val_uimm32| - .u(val_uimm32) - else if (std.math.cast(i32, @as(i64, @bitCast(val_imm)))) |val_simm32| - .s(val_simm32) - else - continue :val_to_gpr; - try cg.asmMemoryImmediate( - .{ ._, .mov }, - try dst.tracking(cg).short.mem(cg, .{ - .size = cg.memSize(val_ty), - .disp = disp, - }), - val_op, - ); - }, - .register => |val_reg| try dst.writeReg(disp, val_ty, registerAlias( - val_reg, - @intCast(val_ty.abiSize(cg.pt.zcu)), - ), cg), - inline .register_pair, .register_triple, .register_quadruple => |val_regs| { - var part_disp = disp; - for (val_regs) |val_reg| { - try dst.writeReg(part_disp, val_ty, val_reg, cg); - part_disp += @divExact(val_reg.bitSize(), 8); - } - }, - .register_offset => |val_reg_off| switch (val_reg_off.off) { - 0 => try dst.writeReg(disp, val_ty, registerAlias( - val_reg_off.reg, - @intCast(val_ty.abiSize(cg.pt.zcu)), - ), cg), - else => continue :val_to_gpr, - }, - .lea_frame, .lea_symbol => continue :val_to_gpr, - .memory, .indirect, .load_frame, .load_symbol => { - var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address().offset(disp)); - var val_ptr = try cg.tempInit(.usize, val_mcv.address()); - var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) }); - try dst_ptr.memcpy(&val_ptr, &len, cg); - try dst_ptr.die(cg); - try val_ptr.die(cg); - try len.die(cg); - }, - } - return; - } - } - fn writeReg(dst: Temp, disp: i32, src_ty: Type, src_reg: Register, cg: *CodeGen) !void { const src_rc = src_reg.class(); const src_abi_size = src_ty.abiSize(cg.pt.zcu); @@ -28552,6 +28722,13 @@ const Temp = struct { try cg.asmOpOnly(.{ .@"rep _sb", .mov }); } + fn memset(dst: *Temp, val: *Temp, len: *Temp, cg: *CodeGen) !void { + while (true) for ([_]*Temp{ dst, val, len }, [_]Register{ .rdi, .rax, .rcx }) |temp, reg| { + if (try temp.toReg(reg, cg)) break; + } else break; + try cg.asmOpOnly(.{ .@"rep _sb", .sto }); + } + fn moveTo(temp: Temp, inst: Air.Inst.Index, cg: *CodeGen) !void { if (cg.liveness.isUnused(inst)) try temp.die(cg) else switch (temp.unwrap(cg)) { .ref => { diff --git a/test/behavior/basic.zig b/test/behavior/basic.zig index e639ef68b0..e8f4f15259 100644 --- a/test/behavior/basic.zig +++ b/test/behavior/basic.zig @@ -1169,10 +1169,10 @@ test "arrays and vectors with big integers" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; inline for (.{ u65528, u65529, u65535 }) |Int| { var a: [1]Int = undefined; From c3d33440f0e68f114996dc74ec15e2b9514c4b3e Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Thu, 9 Jan 2025 13:38:25 -0500 Subject: [PATCH 24/25] x86_64: pass more behavior tests --- src/arch/x86_64/CodeGen.zig | 755 +++++++++++++++++---------------- test/behavior/basic.zig | 2 +- test/behavior/cast.zig | 1 - test/behavior/math.zig | 3 +- test/behavior/optional.zig | 2 +- test/behavior/packed-union.zig | 1 - test/behavior/vector.zig | 5 +- 7 files changed, 382 insertions(+), 387 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index fcd2e4087c..d291ec7da9 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -8998,7 +8998,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }) else fallback: { const bin_op = air_datas[@intFromEnum(inst)].bin_op; const scalar_ty = cg.typeOf(bin_op.lhs).scalarType(zcu); - if (scalar_ty.isRuntimeFloat() or ip.isOptionalType(scalar_ty.toIntern())) break :fallback try cg.airCmp(inst, switch (air_tag) { + if (intInfo(scalar_ty, cg) == null) break :fallback try cg.airCmp(inst, switch (air_tag) { else => unreachable, .cmp_eq, .cmp_eq_optimized => .eq, .cmp_neq, .cmp_neq_optimized => .neq, @@ -9012,136 +9012,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { })) { else => unreachable, inline .e, .ne => |cc| comptime &.{ .{ - .required_features = .{ .avx2, null, null, null }, - .src_constraints = .{ .{ .int = .yword }, .{ .int = .yword } }, - .patterns = &.{ - .{ .src = .{ .to_ymm, .mem } }, - .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_ymm, .to_ymm } }, - }, - .extra_temps = .{ - .{ .kind = .{ .rc = .sse } }, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{.{ .cc = cc }}, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, .vp_, .xor, .tmp0y, .src0y, .src1y, ._ }, - .{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ }, - } }, - }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ .{ .int = .yword }, .{ .int = .yword } }, - .patterns = &.{ - .{ .src = .{ .to_ymm, .mem } }, - .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_ymm, .to_ymm } }, - }, - .extra_temps = .{ - .{ .kind = .{ .rc = .sse } }, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{.{ .cc = cc }}, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, .v_pd, .xor, .tmp0y, .src0y, .src1y, ._ }, - .{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ }, - } }, - }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } }, - .patterns = &.{ - .{ .src = .{ .to_xmm, .mem } }, - .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_xmm, .to_xmm } }, - }, - .extra_temps = .{ - .{ .kind = .{ .rc = .sse } }, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{.{ .cc = cc }}, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, .vp_, .xor, .tmp0x, .src0x, .src1x, ._ }, - .{ ._, .vp_, .@"test", .tmp0x, .tmp0x, ._, ._ }, - } }, - }, .{ - .required_features = .{ .sse4_1, null, null, null }, - .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } }, - .patterns = &.{ - .{ .src = .{ .to_mut_xmm, .mem } }, - .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_mut_xmm, .to_xmm } }, - }, - .dst_temps = .{.{ .cc = cc }}, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, .p_, .xor, .src0x, .src1x, ._, ._ }, - .{ ._, .p_, .@"test", .src0x, .src0x, ._, ._ }, - } }, - }, .{ - .required_features = .{ .sse2, null, null, null }, - .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } }, - .patterns = &.{ - .{ .src = .{ .to_mut_xmm, .mem } }, - .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_mut_xmm, .to_xmm } }, - }, - .extra_temps = .{ - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .rc = .sse } }, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{.{ .cc = cc }}, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, - .{ ._, .p_, .xor, .src0x, .src1x, ._, ._ }, - .{ ._, .p_b, .cmpeq, .tmp1x, .src0x, ._, ._ }, - .{ ._, .p_b, .movmsk, .tmp0d, .tmp1x, ._, ._ }, - .{ ._, ._, .xor, .tmp0d, .si(0xffff), ._, ._ }, - } }, - }, .{ - .required_features = .{ .sse, .mmx, null, null }, - .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, - .patterns = &.{ - .{ .src = .{ .to_mut_mm, .mem } }, - .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_mut_mm, .to_mm } }, - }, - .extra_temps = .{ - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .rc = .mmx } }, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{.{ .cc = cc }}, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ }, - .{ ._, .p_, .xor, .src0q, .src1q, ._, ._ }, - .{ ._, .p_b, .cmpeq, .tmp1q, .src0q, ._, ._ }, - .{ ._, .p_b, .movmsk, .tmp0d, .tmp1q, ._, ._ }, - .{ ._, ._, .xor, .tmp0d, .si(0xff), ._, ._ }, - } }, - }, .{ .src_constraints = .{ .{ .int = .byte }, .{ .int = .byte } }, .patterns = &.{ .{ .src = .{ .mem, .imm8 } }, @@ -9206,6 +9076,136 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .each = .{ .once = &.{ .{ ._, ._, .cmp, .src0q, .src1q, ._, ._ }, } }, + }, .{ + .required_features = .{ .sse, .mmx, null, null }, + .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_mut_mm, .mem } }, + .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_mm, .to_mm } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .mmx } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, .p_, .xor, .tmp1q, .tmp1q, ._, ._ }, + .{ ._, .p_, .xor, .src0q, .src1q, ._, ._ }, + .{ ._, .p_b, .cmpeq, .tmp1q, .src0q, ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp0d, .tmp1q, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .si(0xff), ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } }, + .patterns = &.{ + .{ .src = .{ .to_xmm, .mem } }, + .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_xmm, .to_xmm } }, + }, + .extra_temps = .{ + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, .vp_, .xor, .tmp0x, .src0x, .src1x, ._ }, + .{ ._, .vp_, .@"test", .tmp0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } }, + .patterns = &.{ + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, + }, + .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, .p_, .xor, .src0x, .src1x, ._, ._ }, + .{ ._, .p_, .@"test", .src0x, .src0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword } }, + .patterns = &.{ + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, + }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, + .{ ._, .p_, .xor, .src0x, .src1x, ._, ._ }, + .{ ._, .p_b, .cmpeq, .tmp1x, .src0x, ._, ._ }, + .{ ._, .p_b, .movmsk, .tmp0d, .tmp1x, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .si(0xffff), ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .int = .yword }, .{ .int = .yword } }, + .patterns = &.{ + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, + }, + .extra_temps = .{ + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, .vp_, .xor, .tmp0y, .src0y, .src1y, ._ }, + .{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .int = .yword }, .{ .int = .yword } }, + .patterns = &.{ + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, + }, + .extra_temps = .{ + .{ .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .cc = cc }}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, .v_pd, .xor, .tmp0y, .src0y, .src1y, ._ }, + .{ ._, .vp_, .@"test", .tmp0y, .tmp0y, ._, ._ }, + } }, }, .{ .required_features = .{ .avx2, null, null, null }, .src_constraints = .{ @@ -19540,10 +19540,19 @@ fn genIntMulComplexOpMir(self: *CodeGen, dst_ty: Type, dst_mcv: MCValue, src_mcv .air_ref, => unreachable, // unmodifiable destination .register => |dst_reg| { - const dst_alias = registerAlias(dst_reg, abi_size); + const alias_size = switch (abi_size) { + 1 => 4, + else => abi_size, + }; + const dst_alias = registerAlias(dst_reg, alias_size); const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + switch (abi_size) { + 1 => try self.asmRegisterRegister(.{ ._, .movzx }, dst_reg.to32(), dst_reg.to8()), + else => {}, + } + const resolved_src_mcv = switch (src_mcv) { else => src_mcv, .air_ref => |src_ref| try self.resolveInst(src_ref), @@ -19562,19 +19571,20 @@ fn genIntMulComplexOpMir(self: *CodeGen, dst_ty: Type, dst_mcv: MCValue, src_mcv .reserved_frame, .air_ref, => unreachable, - .register => |src_reg| try self.asmRegisterRegister( - .{ .i_, .mul }, - dst_alias, - registerAlias(src_reg, abi_size), - ), + .register => |src_reg| { + switch (abi_size) { + 1 => try self.asmRegisterRegister(.{ ._, .movzx }, src_reg.to32(), src_reg.to8()), + else => {}, + } + try self.asmRegisterRegister( + .{ .i_, .mul }, + dst_alias, + registerAlias(src_reg, alias_size), + ); + }, .immediate => |imm| { - if (std.math.cast(i32, imm)) |small| { - try self.asmRegisterRegisterImmediate( - .{ .i_, .mul }, - dst_alias, - dst_alias, - .s(small), - ); + if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |small| { + try self.asmRegisterRegisterImmediate(.{ .i_, .mul }, dst_alias, dst_alias, .s(small)); } else { const src_reg = try self.copyToTmpRegister(dst_ty, resolved_src_mcv); return self.genIntMulComplexOpMir(dst_ty, dst_mcv, MCValue{ .register = src_reg }); @@ -19591,47 +19601,57 @@ fn genIntMulComplexOpMir(self: *CodeGen, dst_ty: Type, dst_mcv: MCValue, src_mcv .load_tlv, .lea_tlv, .lea_frame, - => try self.asmRegisterRegister( - .{ .i_, .mul }, - dst_alias, - registerAlias(try self.copyToTmpRegister(dst_ty, resolved_src_mcv), abi_size), - ), - .memory, .indirect, .load_frame => try self.asmRegisterMemory( - .{ .i_, .mul }, - dst_alias, - switch (resolved_src_mcv) { - .memory => |addr| .{ - .base = .{ .reg = .ds }, - .mod = .{ .rm = .{ - .size = .fromSize(abi_size), - .disp = std.math.cast(i32, @as(i64, @bitCast(addr))) orelse - return self.asmRegisterRegister( - .{ .i_, .mul }, - dst_alias, - registerAlias( - try self.copyToTmpRegister(dst_ty, resolved_src_mcv), - abi_size, - ), - ), - } }, - }, - .indirect => |reg_off| .{ - .base = .{ .reg = reg_off.reg }, - .mod = .{ .rm = .{ - .size = .fromSize(abi_size), - .disp = reg_off.off, - } }, - }, - .load_frame => |frame_addr| .{ - .base = .{ .frame = frame_addr.index }, - .mod = .{ .rm = .{ - .size = .fromSize(abi_size), - .disp = frame_addr.off, - } }, - }, - else => unreachable, + => { + const src_reg = try self.copyToTmpRegister(dst_ty, resolved_src_mcv); + switch (abi_size) { + 1 => try self.asmRegisterRegister(.{ ._, .movzx }, src_reg.to32(), src_reg.to8()), + else => {}, + } + try self.asmRegisterRegister(.{ .i_, .mul }, dst_alias, registerAlias(src_reg, alias_size)); + }, + .memory, .indirect, .load_frame => switch (abi_size) { + 1 => { + const src_reg = try self.copyToTmpRegister(dst_ty, resolved_src_mcv); + try self.asmRegisterRegister(.{ ._, .movzx }, src_reg.to32(), src_reg.to8()); + try self.asmRegisterRegister(.{ .i_, .mul }, dst_alias, registerAlias(src_reg, alias_size)); }, - ), + else => try self.asmRegisterMemory( + .{ .i_, .mul }, + dst_alias, + switch (resolved_src_mcv) { + .memory => |addr| .{ + .base = .{ .reg = .ds }, + .mod = .{ .rm = .{ + .size = .fromSize(abi_size), + .disp = std.math.cast(i32, @as(i64, @bitCast(addr))) orelse + return self.asmRegisterRegister( + .{ .i_, .mul }, + dst_alias, + registerAlias( + try self.copyToTmpRegister(dst_ty, resolved_src_mcv), + abi_size, + ), + ), + } }, + }, + .indirect => |reg_off| .{ + .base = .{ .reg = reg_off.reg }, + .mod = .{ .rm = .{ + .size = .fromSize(abi_size), + .disp = reg_off.off, + } }, + }, + .load_frame => |frame_addr| .{ + .base = .{ .frame = frame_addr.index }, + .mod = .{ .rm = .{ + .size = .fromSize(abi_size), + .disp = frame_addr.off, + } }, + }, + else => unreachable, + }, + ), + }, } }, .register_pair, .register_triple, .register_quadruple => unreachable, // unimplemented @@ -27952,6 +27972,75 @@ fn promoteVarArg(self: *CodeGen, ty: Type) Type { } } +fn intInfo(ty: Type, cg: *CodeGen) ?std.builtin.Type.Int { + const zcu = cg.pt.zcu; + const ip = &zcu.intern_pool; + var ty_index = ty.ip_index; + while (true) switch (ip.indexToKey(ty_index)) { + .int_type => |int_type| return int_type, + .ptr_type => |ptr_type| return switch (ptr_type.flags.size) { + .one, .many, .c => .{ .signedness = .unsigned, .bits = cg.target.ptrBitWidth() }, + .slice => null, + }, + .opt_type => |opt_child| return if (!Type.fromInterned(opt_child).hasRuntimeBitsIgnoreComptime(zcu)) + .{ .signedness = .unsigned, .bits = 1 } + else switch (ip.indexToKey(opt_child)) { + .ptr_type => |ptr_type| switch (ptr_type.flags.size) { + .one, .many => switch (ptr_type.flags.is_allowzero) { + false => .{ .signedness = .unsigned, .bits = cg.target.ptrBitWidth() }, + true => null, + }, + .slice, .c => null, + }, + else => null, + }, + .error_union_type => |error_union_type| return if (!Type.fromInterned(error_union_type.payload_type) + .hasRuntimeBitsIgnoreComptime(zcu)) .{ .signedness = .unsigned, .bits = zcu.errorSetBits() } else null, + .simple_type => |simple_type| return switch (simple_type) { + .bool => .{ .signedness = .unsigned, .bits = 1 }, + .anyerror => .{ .signedness = .unsigned, .bits = zcu.errorSetBits() }, + .isize => .{ .signedness = .signed, .bits = cg.target.ptrBitWidth() }, + .usize => .{ .signedness = .unsigned, .bits = cg.target.ptrBitWidth() }, + .c_char => .{ .signedness = cg.target.charSignedness(), .bits = cg.target.cTypeBitSize(.char) }, + .c_short => .{ .signedness = .signed, .bits = cg.target.cTypeBitSize(.short) }, + .c_ushort => .{ .signedness = .unsigned, .bits = cg.target.cTypeBitSize(.short) }, + .c_int => .{ .signedness = .signed, .bits = cg.target.cTypeBitSize(.int) }, + .c_uint => .{ .signedness = .unsigned, .bits = cg.target.cTypeBitSize(.int) }, + .c_long => .{ .signedness = .signed, .bits = cg.target.cTypeBitSize(.long) }, + .c_ulong => .{ .signedness = .unsigned, .bits = cg.target.cTypeBitSize(.long) }, + .c_longlong => .{ .signedness = .signed, .bits = cg.target.cTypeBitSize(.longlong) }, + .c_ulonglong => .{ .signedness = .unsigned, .bits = cg.target.cTypeBitSize(.longlong) }, + .f16, .f32, .f64, .f80, .f128, .c_longdouble => null, + .anyopaque, + .void, + .type, + .comptime_int, + .comptime_float, + .noreturn, + .null, + .undefined, + .enum_literal, + .adhoc_inferred_error_set, + .generic_poison, + => unreachable, + }, + .struct_type => { + const loaded_struct = ip.loadStructType(ty_index); + switch (loaded_struct.layout) { + .auto, .@"extern" => return null, + .@"packed" => ty_index = loaded_struct.backingIntTypeUnordered(ip), + } + }, + .union_type => return switch (ip.loadUnionType(ty_index).flagsUnordered(ip).layout) { + .auto, .@"extern" => null, + .@"packed" => .{ .signedness = .unsigned, .bits = @intCast(ty.bitSize(zcu)) }, + }, + .enum_type => ty_index = ip.loadEnumType(ty_index).tag_ty, + .error_set_type, .inferred_error_set_type => return .{ .signedness = .unsigned, .bits = zcu.errorSetBits() }, + else => return null, + }; +} + const Temp = struct { index: Air.Inst.Index, @@ -29061,197 +29150,112 @@ const Select = struct { fn accepts(constraint: Constraint, ty: Type, cg: *CodeGen) bool { const zcu = cg.pt.zcu; - switch (constraint) { - .any => return true, - .any_bool_vec => return ty.isVector(zcu) and ty.childType(zcu).toIntern() == .bool_type, - .any_int => return ty.toIntern() == .bool_type or ty.isPtrAtRuntime(zcu) or ty.isAbiInt(zcu), - .any_signed_int => return ty.isAbiInt(zcu) and ty.intInfo(zcu).signedness == .signed, - .any_float => return ty.isRuntimeFloat(), - .po2_any => return std.math.isPowerOfTwo(ty.abiSize(zcu)), - .bool_vec => |size| return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type and + return switch (constraint) { + .any => true, + .any_bool_vec => ty.isVector(zcu) and ty.childType(zcu).toIntern() == .bool_type, + .any_int => intInfo(ty, cg) != null, + .any_signed_int => if (intInfo(ty, cg)) |int_info| int_info.signedness == .signed else false, + .any_float => ty.isRuntimeFloat(), + .po2_any => std.math.isPowerOfTwo(ty.abiSize(zcu)), + .bool_vec => |size| ty.isVector(zcu) and ty.scalarType(zcu).toIntern() == .bool_type and size.bitSize(cg.target) >= ty.vectorLen(zcu), - .vec => |size| return ty.isVector(zcu) and ty.scalarType(zcu).toIntern() != .bool_type and + .vec => |size| ty.isVector(zcu) and ty.scalarType(zcu).toIntern() != .bool_type and size.bitSize(cg.target) >= ty.abiSize(zcu), - .signed_int_vec => |size| { - if (!ty.isVector(zcu) or size.bitSize(cg.target) < 8 * ty.abiSize(zcu)) return false; - const scalar_ty = ty.scalarType(zcu); - return scalar_ty.isAbiInt(zcu) and scalar_ty.intInfo(zcu).signedness == .signed; - }, - .signed_int_or_full_vec => |size| { - if (!ty.isVector(zcu) or size.bitSize(cg.target) < 8 * ty.abiSize(zcu)) return false; - const scalar_ty = ty.scalarType(zcu); - if (scalar_ty.isPtrAtRuntime(zcu)) return true; - if (!scalar_ty.isAbiInt(zcu)) return false; - const scalar_int_info = scalar_ty.intInfo(zcu); - return switch (scalar_int_info.signedness) { - .signed => true, - .unsigned => scalar_int_info.bits >= 8 and std.math.isPowerOfTwo(scalar_int_info.bits), - }; - }, - .unsigned_int_vec => |size| { - if (!ty.isVector(zcu) or size.bitSize(cg.target) < ty.bitSize(zcu)) return false; - const scalar_ty = ty.scalarType(zcu); - if (scalar_ty.isPtrAtRuntime(zcu)) return true; - return scalar_ty.isAbiInt(zcu) and scalar_ty.intInfo(zcu).signedness == .unsigned; - }, - .int_or_vec => |size| { - if (ty.isVector(zcu)) return ty.scalarType(zcu).toIntern() != .bool_type and - size.bitSize(cg.target) >= 8 * ty.abiSize(zcu); - if (ty.toIntern() == .bool_type) return true; - if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); - return ty.isAbiInt(zcu) and size.bitSize(cg.target) >= ty.intInfo(zcu).bits; - }, - .exact_remainder_int_or_vec => |of_is| { - if (ty.isVector(zcu)) return ty.scalarType(zcu).toIntern() != .bool_type and - of_is.is.bitSize(cg.target) == (8 * ty.abiSize(zcu) - 1) % of_is.of.bitSize(cg.target) + 1; - if (ty.isPtrAtRuntime(zcu)) - return of_is.is.bitSize(cg.target) == (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; - if (!ty.isAbiInt(zcu)) return false; - return of_is.is.bitSize(cg.target) == (ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1; - }, - .int => |size| { - if (ty.toIntern() == .bool_type) return true; - if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); - return ty.isAbiInt(zcu) and size.bitSize(cg.target) >= ty.intInfo(zcu).bits; - }, - .scalar_int => |size| { - const scalar_ty = ty.scalarType(zcu); - if (scalar_ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); - return scalar_ty.isAbiInt(zcu) and size.bitSize(cg.target) >= scalar_ty.intInfo(zcu).bits; - }, - .scalar_signed_int => |size| { - const scalar_ty = ty.scalarType(zcu); - if (!scalar_ty.isAbiInt(zcu)) return false; - const scalar_int_info = scalar_ty.intInfo(zcu); - return scalar_int_info.signedness == .signed and size.bitSize(cg.target) >= scalar_int_info.bits; - }, - .scalar_unsigned_int => |size| { - const scalar_ty = ty.scalarType(zcu); - if (scalar_ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); - if (!scalar_ty.isAbiInt(zcu)) return false; - const scalar_int_info = scalar_ty.intInfo(zcu); - return scalar_int_info.signedness == .unsigned and size.bitSize(cg.target) >= scalar_int_info.bits; - }, - .scalar_remainder_int => |of_is| { - const scalar_ty = ty.scalarType(zcu); - if (scalar_ty.isPtrAtRuntime(zcu)) - return of_is.is.bitSize(cg.target) >= (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; - if (!scalar_ty.isAbiInt(zcu)) return false; - return of_is.is.bitSize(cg.target) >= (scalar_ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1; - }, - .exact_int => |bit_size| { - if (ty.toIntern() == .bool_type) return bit_size == 1; - if (ty.isPtrAtRuntime(zcu)) return bit_size == cg.target.ptrBitWidth(); - return ty.isAbiInt(zcu) and bit_size == ty.intInfo(zcu).bits; - }, - .exact_signed_int => |bit_size| { - if (!ty.isAbiInt(zcu)) return false; - const int_info = ty.intInfo(zcu); - return int_info.signedness == .signed and bit_size == int_info.bits; - }, - .exact_unsigned_int => |bit_size| { - if (ty.toIntern() == .bool_type) return bit_size == 1; - if (ty.isPtrAtRuntime(zcu)) return bit_size == cg.target.ptrBitWidth(); - if (!ty.isAbiInt(zcu)) return false; - const int_info = ty.intInfo(zcu); - return int_info.signedness == .unsigned and bit_size == int_info.bits; - }, - .signed_or_exact_int => |size| { - if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) == cg.target.ptrBitWidth(); - if (!ty.isAbiInt(zcu)) return false; - const int_info = ty.intInfo(zcu); - return switch (int_info.signedness) { - .signed => size.bitSize(cg.target) >= int_info.bits, - .unsigned => size.bitSize(cg.target) == int_info.bits, - }; - }, - .unsigned_or_exact_int => |size| { - if (ty.toIntern() == .bool_type or ty.isPtrAtRuntime(zcu)) return true; - if (!ty.isAbiInt(zcu)) return false; - const int_info = ty.intInfo(zcu); - return switch (int_info.signedness) { - .signed => size.bitSize(cg.target) == int_info.bits, - .unsigned => size.bitSize(cg.target) >= int_info.bits, - }; - }, - .po2_int => |size| { - if (ty.toIntern() == .bool_type) return true; - if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); - if (!ty.isAbiInt(zcu)) return false; - const bit_size = ty.intInfo(zcu).bits; - return std.math.isPowerOfTwo(bit_size) and size.bitSize(cg.target) >= bit_size; - }, - .signed_po2_int => |size| { - if (!ty.isAbiInt(zcu)) return false; - const int_info = ty.intInfo(zcu); - return int_info.signedness == .signed and std.math.isPowerOfTwo(int_info.bits) and - size.bitSize(cg.target) >= int_info.bits; - }, - .unsigned_po2_or_exact_int => |size| { - if (ty.toIntern() == .bool_type) return true; - if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); - if (!ty.isAbiInt(zcu)) return false; - const int_info = ty.intInfo(zcu); - return switch (int_info.signedness) { - .signed => size.bitSize(cg.target) == int_info.bits, - .unsigned => std.math.isPowerOfTwo(int_info.bits) and size.bitSize(cg.target) >= int_info.bits, - }; - }, - .remainder_int => |of_is| { - if (ty.toIntern() == .bool_type) return true; - if (ty.isPtrAtRuntime(zcu)) - return of_is.is.bitSize(cg.target) >= (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; - if (!ty.isAbiInt(zcu)) return false; - return of_is.is.bitSize(cg.target) >= (ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1; - }, - .exact_remainder_int => |of_is| { - if (ty.isPtrAtRuntime(zcu)) - return of_is.is.bitSize(cg.target) == (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; - if (!ty.isAbiInt(zcu)) return false; - return of_is.is.bitSize(cg.target) == (ty.intInfo(zcu).bits - 1) % of_is.of.bitSize(cg.target) + 1; - }, - .signed_or_exact_remainder_int => |of_is| { - if (ty.isPtrAtRuntime(zcu)) - return of_is.is.bitSize(cg.target) == (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; - if (!ty.isAbiInt(zcu)) return false; - const int_info = ty.intInfo(zcu); - return switch (int_info.signedness) { - .signed => of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1, - .unsigned => of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1, - }; - }, - .unsigned_or_exact_remainder_int => |of_is| { - if (ty.toIntern() == .bool_type) return true; - if (ty.isPtrAtRuntime(zcu)) - return of_is.is.bitSize(cg.target) >= (cg.target.ptrBitWidth() - 1) % of_is.of.bitSize(cg.target) + 1; - if (!ty.isAbiInt(zcu)) return false; - const int_info = ty.intInfo(zcu); - return switch (int_info.signedness) { - .signed => of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1, - .unsigned => of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1, - }; - }, - .signed_int => |size| { - if (!ty.isAbiInt(zcu)) return false; - const int_info = ty.intInfo(zcu); - return int_info.signedness == .signed and size.bitSize(cg.target) >= int_info.bits; - }, - .unsigned_int => |size| { - if (ty.toIntern() == .bool_type) return true; - if (ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); - if (!ty.isAbiInt(zcu)) return false; - const int_info = ty.intInfo(zcu); - return int_info.signedness == .unsigned and size.bitSize(cg.target) >= int_info.bits; - }, - .elem_size_is => |size| return size == ty.elemType2(zcu).abiSize(zcu), - .po2_elem_size => return std.math.isPowerOfTwo(ty.elemType2(zcu).abiSize(zcu)), - .elem_int => |size| { - const elem_ty = ty.elemType2(zcu); - if (elem_ty.toIntern() == .bool_type) return true; - if (elem_ty.isPtrAtRuntime(zcu)) return size.bitSize(cg.target) >= cg.target.ptrBitWidth(); - return elem_ty.isAbiInt(zcu) and size.bitSize(cg.target) >= elem_ty.intInfo(zcu).bits; - }, - } + .signed_int_vec => |size| ty.isVector(zcu) and size.bitSize(cg.target) >= 8 * ty.abiSize(zcu) and + if (intInfo(ty.childType(zcu), cg)) |int_info| int_info.signedness == .signed else false, + .signed_int_or_full_vec => |size| ty.isVector(zcu) and size.bitSize(cg.target) >= 8 * ty.abiSize(zcu) and + if (intInfo(ty.childType(zcu), cg)) |int_info| switch (int_info.signedness) { + .signed => true, + .unsigned => int_info.bits >= 8 and std.math.isPowerOfTwo(int_info.bits), + } else false, + .unsigned_int_vec => |size| ty.isVector(zcu) and size.bitSize(cg.target) >= 8 * ty.abiSize(zcu) and + if (intInfo(ty.childType(zcu), cg)) |int_info| int_info.signedness == .unsigned else false, + .int_or_vec => |size| if (intInfo(ty, cg)) |int_info| + size.bitSize(cg.target) >= int_info.bits + else + ty.isVector(zcu) and size.bitSize(cg.target) >= 8 * ty.abiSize(zcu), + .exact_remainder_int_or_vec => |of_is| if (intInfo(ty, cg)) |int_info| + of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1 + else + ty.isVector(zcu) and ty.childType(zcu).toIntern() != .bool_type and + of_is.is.bitSize(cg.target) == (8 * ty.abiSize(zcu) - 1) % of_is.of.bitSize(cg.target) + 1, + .int => |size| if (intInfo(ty, cg)) |int_info| size.bitSize(cg.target) >= int_info.bits else false, + .scalar_int => |size| if (intInfo(ty.scalarType(zcu), cg)) |int_info| + size.bitSize(cg.target) >= int_info.bits + else + false, + .scalar_signed_int => |size| if (intInfo(ty.scalarType(zcu), cg)) |int_info| switch (int_info.signedness) { + .signed => size.bitSize(cg.target) >= int_info.bits, + .unsigned => false, + } else false, + .scalar_unsigned_int => |size| if (intInfo(ty.scalarType(zcu), cg)) |int_info| switch (int_info.signedness) { + .signed => false, + .unsigned => size.bitSize(cg.target) >= int_info.bits, + } else false, + .scalar_remainder_int => |of_is| if (intInfo(ty.scalarType(zcu), cg)) |int_info| + of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1 + else + false, + .exact_int => |bit_size| if (intInfo(ty, cg)) |int_info| bit_size == int_info.bits else false, + .exact_signed_int => |bit_size| if (intInfo(ty, cg)) |int_info| switch (int_info.signedness) { + .signed => bit_size == int_info.bits, + .unsigned => false, + } else false, + .exact_unsigned_int => |bit_size| if (intInfo(ty, cg)) |int_info| switch (int_info.signedness) { + .signed => false, + .unsigned => bit_size == int_info.bits, + } else false, + .signed_or_exact_int => |size| if (intInfo(ty, cg)) |int_info| switch (int_info.signedness) { + .signed => size.bitSize(cg.target) >= int_info.bits, + .unsigned => size.bitSize(cg.target) == int_info.bits, + } else false, + .unsigned_or_exact_int => |size| if (intInfo(ty, cg)) |int_info| switch (int_info.signedness) { + .signed => size.bitSize(cg.target) == int_info.bits, + .unsigned => size.bitSize(cg.target) >= int_info.bits, + } else false, + .po2_int => |size| if (intInfo(ty, cg)) |int_info| + std.math.isPowerOfTwo(int_info.bits) and size.bitSize(cg.target) >= int_info.bits + else + false, + .signed_po2_int => |size| if (intInfo(ty, cg)) |int_info| switch (int_info.signedness) { + .signed => std.math.isPowerOfTwo(int_info.bits) and size.bitSize(cg.target) >= int_info.bits, + .unsigned => false, + } else false, + .unsigned_po2_or_exact_int => |size| if (intInfo(ty, cg)) |int_info| switch (int_info.signedness) { + .signed => size.bitSize(cg.target) == int_info.bits, + .unsigned => std.math.isPowerOfTwo(int_info.bits) and size.bitSize(cg.target) >= int_info.bits, + } else false, + .remainder_int => |of_is| if (intInfo(ty, cg)) |int_info| + of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1 + else + false, + .exact_remainder_int => |of_is| if (intInfo(ty, cg)) |int_info| + of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1 + else + false, + .signed_or_exact_remainder_int => |of_is| if (intInfo(ty, cg)) |int_info| switch (int_info.signedness) { + .signed => of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1, + .unsigned => of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1, + } else false, + .unsigned_or_exact_remainder_int => |of_is| if (intInfo(ty, cg)) |int_info| switch (int_info.signedness) { + .signed => of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1, + .unsigned => of_is.is.bitSize(cg.target) >= (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1, + } else false, + .signed_int => |size| if (intInfo(ty, cg)) |int_info| switch (int_info.signedness) { + .signed => size.bitSize(cg.target) >= int_info.bits, + .unsigned => false, + } else false, + .unsigned_int => |size| if (intInfo(ty, cg)) |int_info| switch (int_info.signedness) { + .signed => false, + .unsigned => size.bitSize(cg.target) >= int_info.bits, + } else false, + .elem_size_is => |size| size == ty.elemType2(zcu).abiSize(zcu), + .po2_elem_size => std.math.isPowerOfTwo(ty.elemType2(zcu).abiSize(zcu)), + .elem_int => |size| if (intInfo(ty.elemType2(zcu), cg)) |elem_int_info| + size.bitSize(cg.target) >= elem_int_info.bits + else + false, + }; } }; @@ -29431,10 +29435,7 @@ const Select = struct { }), }, else => { - const scalar_info: InternPool.Key.IntType = if (scalar_ty.isAbiInt(zcu)) - scalar_ty.intInfo(zcu) - else - .{ .signedness = .unsigned, .bits = @intCast(scalar_ty.bitSize(zcu)) }; + const scalar_info = intInfo(scalar_ty, cg).?; const scalar_int_ty = try pt.intType(scalar_info.signedness, scalar_info.bits); if (scalar_info.bits <= 64) { const int_val: i64 = switch (spec.kind) { diff --git a/test/behavior/basic.zig b/test/behavior/basic.zig index e8f4f15259..94e68e62e4 100644 --- a/test/behavior/basic.zig +++ b/test/behavior/basic.zig @@ -1169,10 +1169,10 @@ test "arrays and vectors with big integers" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; inline for (.{ u65528, u65529, u65535 }) |Int| { var a: [1]Int = undefined; diff --git a/test/behavior/cast.zig b/test/behavior/cast.zig index 84c634bb0e..8c2be13923 100644 --- a/test/behavior/cast.zig +++ b/test/behavior/cast.zig @@ -2539,7 +2539,6 @@ test "@intFromBool on vector" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; const S = struct { diff --git a/test/behavior/math.zig b/test/behavior/math.zig index ffd0310ab9..fb325a95b6 100644 --- a/test/behavior/math.zig +++ b/test/behavior/math.zig @@ -472,7 +472,6 @@ test "division" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; @@ -588,7 +587,7 @@ fn testFloatDivision() !void { } test "large integer division" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; diff --git a/test/behavior/optional.zig b/test/behavior/optional.zig index 967bc8cf85..8dfc8ad22f 100644 --- a/test/behavior/optional.zig +++ b/test/behavior/optional.zig @@ -57,7 +57,7 @@ fn testNullPtrsEql() !void { test "optional with zero-bit type" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; const S = struct { diff --git a/test/behavior/packed-union.zig b/test/behavior/packed-union.zig index 701c0484a4..b70a16f354 100644 --- a/test/behavior/packed-union.zig +++ b/test/behavior/packed-union.zig @@ -138,7 +138,6 @@ test "packed union initialized with a runtime value" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; const Fields = packed struct { diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index 6af4b5b4b6..2dbd1de8eb 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -205,7 +205,6 @@ test "array vector coercion - odd sizes" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; @@ -308,7 +307,6 @@ test "tuple to vector" { test "vector casts of sizes not divisible by 8" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -1363,7 +1361,7 @@ test "load packed vector element" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; var x: @Vector(2, u15) = .{ 1, 4 }; @@ -1411,7 +1409,6 @@ test "store vector with memset" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO From 8c8dfb35f398407319764f0f8998de34c5247ed6 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Tue, 14 Jan 2025 17:56:25 -0500 Subject: [PATCH 25/25] x86_64: fix crashes compiling the compiler and tests --- lib/std/Thread.zig | 4 +- lib/std/Thread/Pool.zig | 6 +- lib/std/crypto/aes/aesni.zig | 2 +- src/arch/x86_64/CodeGen.zig | 720 ++++++++++++++++++---------------- src/arch/x86_64/abi.zig | 8 +- src/main.zig | 8 + src/register_manager.zig | 78 ++-- test/behavior/x86_64/math.zig | 10 + 8 files changed, 446 insertions(+), 390 deletions(-) diff --git a/lib/std/Thread.zig b/lib/std/Thread.zig index 69dbcf3947..9650bf8373 100644 --- a/lib/std/Thread.zig +++ b/lib/std/Thread.zig @@ -372,9 +372,11 @@ pub const SpawnConfig = struct { // https://github.com/ziglang/zig/issues/157 /// Size in bytes of the Thread's stack - stack_size: usize = 16 * 1024 * 1024, + stack_size: usize = default_stack_size, /// The allocator to be used to allocate memory for the to-be-spawned thread allocator: ?std.mem.Allocator = null, + + pub const default_stack_size = 16 * 1024 * 1024; }; pub const SpawnError = error{ diff --git a/lib/std/Thread/Pool.zig b/lib/std/Thread/Pool.zig index 86bac7ce46..874050a35f 100644 --- a/lib/std/Thread/Pool.zig +++ b/lib/std/Thread/Pool.zig @@ -27,6 +27,7 @@ pub const Options = struct { allocator: std.mem.Allocator, n_jobs: ?usize = null, track_ids: bool = false, + stack_size: usize = std.Thread.SpawnConfig.default_stack_size, }; pub fn init(pool: *Pool, options: Options) !void { @@ -54,7 +55,10 @@ pub fn init(pool: *Pool, options: Options) !void { errdefer pool.join(spawned); for (pool.threads) |*thread| { - thread.* = try std.Thread.spawn(.{}, worker, .{pool}); + thread.* = try std.Thread.spawn(.{ + .stack_size = options.stack_size, + .allocator = allocator, + }, worker, .{pool}); spawned += 1; } } diff --git a/lib/std/crypto/aes/aesni.zig b/lib/std/crypto/aes/aesni.zig index 2793ff4184..fbf3e37300 100644 --- a/lib/std/crypto/aes/aesni.zig +++ b/lib/std/crypto/aes/aesni.zig @@ -4,7 +4,7 @@ const mem = std.mem; const debug = std.debug; const has_vaes = builtin.cpu.arch == .x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .vaes); -const has_avx512f = builtin.cpu.arch == .x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f); +const has_avx512f = builtin.cpu.arch == .x86_64 and builtin.zig_backend != .stage2_x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f); /// A single AES block. pub const Block = struct { diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index d291ec7da9..f30ecf34df 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -634,42 +634,14 @@ const InstTracking = struct { } fn reuseFrame(self: *InstTracking) void { - switch (self.long) { - .reserved_frame => |index| self.long = .{ .load_frame = .{ .index = index } }, - else => {}, - } - self.short = switch (self.long) { - .none, - .unreach, - .undef, - .immediate, - .memory, - .load_direct, - .lea_direct, - .load_got, - .lea_got, - .load_tlv, - .lea_tlv, - .load_frame, - .lea_frame, - .load_symbol, - .lea_symbol, - => self.long, - .dead, - .eflags, - .register, - .register_pair, - .register_triple, - .register_quadruple, - .register_offset, - .register_overflow, - .register_mask, - .indirect, - .elementwise_regs_then_frame, - .reserved_frame, - .air_ref, - => unreachable, - }; + self.* = .init(switch (self.long) { + .none => switch (self.short) { + .dead => .none, + else => |short| short, + }, + .reserved_frame => |index| .{ .load_frame = .{ .index = index } }, + else => |long| long, + }); } fn trackSpill(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index) !void { @@ -681,6 +653,15 @@ const InstTracking = struct { fn verifyMaterialize(self: InstTracking, target: InstTracking) void { switch (self.long) { .none, + .load_frame, + .reserved_frame, + => switch (target.long) { + .none, + .load_frame, + .reserved_frame, + => {}, + else => unreachable, + }, .unreach, .undef, .immediate, @@ -695,15 +676,6 @@ const InstTracking = struct { .load_symbol, .lea_symbol, => assert(std.meta.eql(self.long, target.long)), - .load_frame, - .reserved_frame, - => switch (target.long) { - .none, - .load_frame, - .reserved_frame, - => {}, - else => unreachable, - }, .dead, .eflags, .register, @@ -754,10 +726,11 @@ const InstTracking = struct { tracking_log.debug("{} => {} (materialize)", .{ inst, self.* }); } - fn resurrect(self: *InstTracking, inst: Air.Inst.Index, scope_generation: u32) void { + fn resurrect(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index, scope_generation: u32) !void { switch (self.short) { .dead => |die_generation| if (die_generation >= scope_generation) { self.reuseFrame(); + try function.getValue(self.short, inst); tracking_log.debug("{} => {} (resurrect)", .{ inst, self.* }); }, else => {}, @@ -767,6 +740,7 @@ const InstTracking = struct { fn die(self: *InstTracking, function: *CodeGen, inst: Air.Inst.Index) !void { if (self.short == .dead) return; try function.freeValue(self.short); + if (self.long == .none) self.long = self.short; self.short = .{ .dead = function.scope_generation }; tracking_log.debug("{} => {} (death)", .{ inst, self.* }); } @@ -2359,7 +2333,7 @@ fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { - @setEvalBranchQuota(1_600); + @setEvalBranchQuota(1_700); const pt = cg.pt; const zcu = pt.zcu; const ip = &zcu.intern_pool; @@ -2520,7 +2494,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); try ops[0].toSlicePtr(cg); var res: [1]Temp = undefined; - cg.select(&res, &.{ty_pl.ty.toType()}, &ops, comptime &.{ .{ + if (ty_pl.ty.toType().elemType2(zcu).hasRuntimeBitsIgnoreComptime(zcu)) cg.select(&res, &.{ty_pl.ty.toType()}, &ops, comptime &.{ .{ .patterns = &.{ .{ .src = .{ .to_gpr, .simm32 } }, }, @@ -2625,7 +2599,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { ops[1].tracking(cg), }), else => |e| return e, - }; + } else { // hack around Sema OPV bugs + res[0] = ops[0]; + } for (ops) |op| for (res) |r| { if (op.index == r.index) break; } else try op.die(cg); @@ -2637,7 +2613,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); try ops[0].toSlicePtr(cg); var res: [1]Temp = undefined; - cg.select(&res, &.{ty_pl.ty.toType()}, &ops, comptime &.{ .{ + if (ty_pl.ty.toType().elemType2(zcu).hasRuntimeBitsIgnoreComptime(zcu)) cg.select(&res, &.{ty_pl.ty.toType()}, &ops, comptime &.{ .{ .patterns = &.{ .{ .src = .{ .to_gpr, .simm32 } }, }, @@ -2757,7 +2733,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { ops[1].tracking(cg), }), else => |e| return e, - }; + } else { + // hack around Sema OPV bugs + res[0] = ops[0]; + } for (ops) |op| for (res) |r| { if (op.index == r.index) break; } else try op.die(cg); @@ -2799,79 +2778,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { })) { else => unreachable, inline .@"and", .@"or", .xor => |mir_tag| comptime &.{ .{ - .required_features = .{ .avx2, null, null, null }, - .src_constraints = .{ .{ .int_or_vec = .yword }, .{ .int_or_vec = .yword } }, - .patterns = &.{ - .{ .src = .{ .to_ymm, .mem } }, - .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_ymm, .to_ymm } }, - }, - .dst_temps = .{.{ .rc = .sse }}, - .each = .{ .once = &.{ - .{ ._, .vp_, mir_tag, .dst0y, .src0y, .src1y, ._ }, - } }, - }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ .{ .int_or_vec = .yword }, .{ .int_or_vec = .yword } }, - .patterns = &.{ - .{ .src = .{ .to_ymm, .mem } }, - .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_ymm, .to_ymm } }, - }, - .dst_temps = .{.{ .rc = .sse }}, - .each = .{ .once = &.{ - .{ ._, .v_pd, mir_tag, .dst0y, .src0y, .src1y, ._ }, - } }, - }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } }, - .patterns = &.{ - .{ .src = .{ .to_xmm, .mem } }, - .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_xmm, .to_xmm } }, - }, - .dst_temps = .{.{ .rc = .sse }}, - .each = .{ .once = &.{ - .{ ._, .vp_, mir_tag, .dst0x, .src0x, .src1x, ._ }, - } }, - }, .{ - .required_features = .{ .sse2, null, null, null }, - .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } }, - .patterns = &.{ - .{ .src = .{ .to_mut_xmm, .mem } }, - .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_mut_xmm, .to_xmm } }, - }, - .dst_temps = .{.{ .ref = .src0 }}, - .each = .{ .once = &.{ - .{ ._, .p_, mir_tag, .dst0x, .src1x, ._, ._ }, - } }, - }, .{ - .required_features = .{ .sse, null, null, null }, - .src_constraints = .{ .{ .int_or_vec = .xword }, .{ .int_or_vec = .xword } }, - .patterns = &.{ - .{ .src = .{ .to_mut_xmm, .mem } }, - .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_mut_xmm, .to_xmm } }, - }, - .dst_temps = .{.{ .ref = .src0 }}, - .each = .{ .once = &.{ - .{ ._, ._ps, mir_tag, .dst0x, .src1x, ._, ._ }, - } }, - }, .{ - .required_features = .{ .mmx, null, null, null }, - .src_constraints = .{ .{ .int_or_vec = .qword }, .{ .int_or_vec = .qword } }, - .patterns = &.{ - .{ .src = .{ .to_mut_mm, .mem } }, - .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_mut_mm, .to_mm } }, - }, - .dst_temps = .{.{ .ref = .src0 }}, - .each = .{ .once = &.{ - .{ ._, .p_, mir_tag, .dst0q, .src1q, ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .{ .int_or_vec = .byte }, .{ .int_or_vec = .byte } }, + .src_constraints = .{ .{ .size = .byte }, .{ .size = .byte } }, .patterns = &.{ .{ .src = .{ .mut_mem, .imm8 } }, .{ .src = .{ .imm8, .mut_mem }, .commute = .{ 0, 1 } }, @@ -2889,7 +2796,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, mir_tag, .dst0b, .src1b, ._, ._ }, } }, }, .{ - .src_constraints = .{ .{ .int_or_vec = .word }, .{ .int_or_vec = .word } }, + .src_constraints = .{ .{ .size = .word }, .{ .size = .word } }, .patterns = &.{ .{ .src = .{ .mut_mem, .imm16 } }, .{ .src = .{ .imm16, .mut_mem }, .commute = .{ 0, 1 } }, @@ -2907,7 +2814,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, mir_tag, .dst0w, .src1w, ._, ._ }, } }, }, .{ - .src_constraints = .{ .{ .int_or_vec = .dword }, .{ .int_or_vec = .dword } }, + .src_constraints = .{ .{ .size = .dword }, .{ .size = .dword } }, .patterns = &.{ .{ .src = .{ .mut_mem, .imm32 } }, .{ .src = .{ .imm32, .mut_mem }, .commute = .{ 0, 1 } }, @@ -2926,7 +2833,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .required_features = .{ .@"64bit", null, null, null }, - .src_constraints = .{ .{ .int_or_vec = .qword }, .{ .int_or_vec = .qword } }, + .src_constraints = .{ .{ .size = .qword }, .{ .size = .qword } }, .patterns = &.{ .{ .src = .{ .mut_mem, .simm32 } }, .{ .src = .{ .simm32, .mut_mem }, .commute = .{ 0, 1 } }, @@ -2944,11 +2851,80 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, mir_tag, .dst0q, .src1q, ._, ._ }, } }, }, .{ - .required_features = .{ .avx2, null, null, null }, - .src_constraints = .{ - .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } }, - .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } }, + .required_features = .{ .mmx, null, null, null }, + .src_constraints = .{ .{ .size = .qword }, .{ .size = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_mut_mm, .mem } }, + .{ .src = .{ .mem, .to_mut_mm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_mm, .to_mm } }, }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, .p_, mir_tag, .dst0q, .src1q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .size = .xword }, .{ .size = .xword } }, + .patterns = &.{ + .{ .src = .{ .to_xmm, .mem } }, + .{ .src = .{ .mem, .to_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_xmm, .to_xmm } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .vp_, mir_tag, .dst0x, .src0x, .src1x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .size = .xword }, .{ .size = .xword } }, + .patterns = &.{ + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, .p_, mir_tag, .dst0x, .src1x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .size = .xword }, .{ .size = .xword } }, + .patterns = &.{ + .{ .src = .{ .to_mut_xmm, .mem } }, + .{ .src = .{ .mem, .to_mut_xmm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_xmm, .to_xmm } }, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._ps, mir_tag, .dst0x, .src1x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .size = .yword }, .{ .size = .yword } }, + .patterns = &.{ + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .vp_, mir_tag, .dst0y, .src0y, .src1y, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .size = .yword }, .{ .size = .yword } }, + .patterns = &.{ + .{ .src = .{ .to_ymm, .mem } }, + .{ .src = .{ .mem, .to_ymm }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_ymm, .to_ymm } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, .v_pd, mir_tag, .dst0y, .src0y, .src1y, ._ }, + } }, + }, .{ + .required_features = .{ .avx2, null, null, null }, + .src_constraints = .{ .{ .multiple_size = .yword }, .{ .multiple_size = .yword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -2972,10 +2948,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ - .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } }, - .{ .exact_remainder_int_or_vec = .{ .of = .yword, .is = .yword } }, - }, + .src_constraints = .{ .{ .multiple_size = .yword }, .{ .multiple_size = .yword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -2999,10 +2972,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ - .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, - .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, - }, + .src_constraints = .{ .{ .multiple_size = .xword }, .{ .multiple_size = .xword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3026,10 +2996,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .required_features = .{ .sse2, null, null, null }, - .src_constraints = .{ - .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, - .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, - }, + .src_constraints = .{ .{ .multiple_size = .xword }, .{ .multiple_size = .xword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3053,10 +3020,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .required_features = .{ .sse, null, null, null }, - .src_constraints = .{ - .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, - .{ .exact_remainder_int_or_vec = .{ .of = .xword, .is = .xword } }, - }, + .src_constraints = .{ .{ .multiple_size = .xword }, .{ .multiple_size = .xword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3080,10 +3044,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .required_features = .{ .mmx, null, null, null }, - .src_constraints = .{ - .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } }, - .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } }, - }, + .src_constraints = .{ .{ .multiple_size = .qword }, .{ .multiple_size = .qword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -3106,10 +3067,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .src_constraints = .{ - .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } }, - .{ .exact_remainder_int_or_vec = .{ .of = .qword, .is = .qword } }, - }, + .src_constraints = .{ .{ .multiple_size = .qword }, .{ .multiple_size = .qword } }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem } }, }, @@ -6983,7 +6941,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; const extra = cg.air.extraData(Air.VectorCmp, ty_pl.payload).data; switch (extra.compareOperator()) { - .eq, .neq => {}, + .eq, .neq => if (cg.typeOf(extra.lhs).scalarType(zcu).isRuntimeFloat()) + break :fallback try cg.airCmpVector(inst), else => break :fallback try cg.airCmpVector(inst), } var ops = try cg.tempsFromOperands(inst, .{ extra.lhs, extra.rhs }); @@ -9763,22 +9722,20 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .auto, .@"extern" => @intCast(agg_ty.structFieldOffset(extra.field_index, zcu)), .@"packed" => break :fallback try cg.airStructFieldVal(inst), }; - if (field_ty.hasRuntimeBitsIgnoreComptime(zcu)) { - var ops = try cg.tempsFromOperands(inst, .{extra.struct_operand}); - var res = try ops[0].read(field_ty, .{ .disp = field_off }, cg); - for (ops) |op| if (op.index != res.index) try op.die(cg); - try res.moveTo(inst, cg); - } else { - // hack around Sema OPV bugs - const res = try cg.tempInit(field_ty, .none); - try res.moveTo(inst, cg); - } + var ops = try cg.tempsFromOperands(inst, .{extra.struct_operand}); + // hack around Sema OPV bugs + var res = if (field_ty.hasRuntimeBitsIgnoreComptime(zcu)) + try ops[0].read(field_ty, .{ .disp = field_off }, cg) + else + try cg.tempInit(field_ty, .none); + for (ops) |op| if (op.index != res.index) try op.die(cg); + try res.moveTo(inst, cg); }, .set_union_tag => if (use_old) try cg.airSetUnionTag(inst) else { const bin_op = air_datas[@intFromEnum(inst)].bin_op; const union_ty = cg.typeOf(bin_op.lhs).childType(zcu); - var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); const union_layout = union_ty.unionGetLayout(zcu); + var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); // hack around Sema OPV bugs if (union_layout.tag_size > 0) try ops[0].store(&ops[1], .{ .disp = @intCast(union_layout.tagOffset()), @@ -9834,11 +9791,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .ptr_elem_val => try cg.airPtrElemVal(inst), } else { const bin_op = air_datas[@intFromEnum(inst)].bin_op; + const res_ty = cg.typeOf(bin_op.lhs).elemType2(zcu); var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); try ops[0].toSlicePtr(cg); var res: [1]Temp = undefined; - const res_ty = cg.typeOf(bin_op.lhs).elemType2(zcu); - cg.select(&res, &.{res_ty}, &ops, comptime &.{ .{ + if (res_ty.hasRuntimeBitsIgnoreComptime(zcu)) cg.select(&res, &.{res_ty}, &ops, comptime &.{ .{ .dst_constraints = .{.{ .int = .byte }}, .patterns = &.{ .{ .src = .{ .to_gpr, .simm32 } }, @@ -9912,51 +9869,51 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .dst0q, .leasi(.qword, .src0, .@"8", .src1), ._, ._ }, } }, } }) catch |err| switch (err) { - error.SelectFailed => switch (res_ty.abiSize(zcu)) { - // hack around Sema OPV bugs - 0 => res[0] = try cg.tempInit(res_ty, .none), - else => |elem_size| { - while (true) for (&ops) |*op| { - if (try op.toRegClass(true, .general_purpose, cg)) break; - } else break; - const lhs_reg = ops[0].unwrap(cg).temp.tracking(cg).short.register.to64(); - const rhs_reg = ops[1].unwrap(cg).temp.tracking(cg).short.register.to64(); - if (!std.math.isPowerOfTwo(elem_size)) { - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterRegisterImmediate( - .{ .i_, .mul }, - rhs_reg, - rhs_reg, - .u(elem_size), - ); - try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ - .base = .{ .reg = lhs_reg }, - .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, - }); - } else if (elem_size > 8) { - try cg.spillEflagsIfOccupied(); - try cg.asmRegisterImmediate( - .{ ._l, .sh }, - rhs_reg, - .u(std.math.log2_int(u64, elem_size)), - ); - try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ - .base = .{ .reg = lhs_reg }, - .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, - }); - } else try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ + error.SelectFailed => { + const elem_size = res_ty.abiSize(zcu); + while (true) for (&ops) |*op| { + if (try op.toRegClass(true, .general_purpose, cg)) break; + } else break; + const lhs_reg = ops[0].unwrap(cg).temp.tracking(cg).short.register.to64(); + const rhs_reg = ops[1].unwrap(cg).temp.tracking(cg).short.register.to64(); + if (!std.math.isPowerOfTwo(elem_size)) { + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterRegisterImmediate( + .{ .i_, .mul }, + rhs_reg, + rhs_reg, + .u(elem_size), + ); + try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ .base = .{ .reg = lhs_reg }, - .mod = .{ .rm = .{ - .size = .qword, - .index = rhs_reg, - .scale = .fromFactor(@intCast(elem_size)), - } }, + .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, }); - res[0] = try ops[0].load(res_ty, .{}, cg); - }, + } else if (elem_size > 8) { + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterImmediate( + .{ ._l, .sh }, + rhs_reg, + .u(std.math.log2_int(u64, elem_size)), + ); + try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ + .base = .{ .reg = lhs_reg }, + .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, + }); + } else try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ + .base = .{ .reg = lhs_reg }, + .mod = .{ .rm = .{ + .size = .qword, + .index = rhs_reg, + .scale = .fromFactor(@intCast(elem_size)), + } }, + }); + res[0] = try ops[0].load(res_ty, .{}, cg); }, else => |e| return e, - }; + } else { + // hack around Sema OPV bugs + res[0] = try cg.tempInit(res_ty, .none); + } for (ops) |op| for (res) |r| { if (op.index == r.index) break; } else try op.die(cg); @@ -10499,7 +10456,7 @@ fn restoreState(self: *CodeGen, state: State, deaths: []const Air.Inst.Index, co if (opts.resurrect) for ( self.inst_tracking.keys()[Temp.Index.max..state.inst_tracking_len], self.inst_tracking.values()[Temp.Index.max..state.inst_tracking_len], - ) |inst, *tracking| tracking.resurrect(inst, state.scope_generation); + ) |inst, *tracking| try tracking.resurrect(self, inst, state.scope_generation); for (deaths) |death| try self.processDeath(death); const ExpectedContents = [@typeInfo(RegisterManager.TrackedRegisters).array.len]RegisterLock; @@ -10879,7 +10836,8 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void { const dst_ty = self.typeOfIndex(inst); const result = @as(?MCValue, result: { - const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu)); + const src_abi_size: u31 = @intCast(src_ty.abiSize(zcu)); + const dst_abi_size: u31 = @intCast(dst_ty.abiSize(zcu)); const src_int_info = src_ty.intInfo(zcu); const dst_int_info = dst_ty.intInfo(zcu); @@ -10890,7 +10848,6 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void { const src_mcv = try self.resolveInst(ty_op.operand); if (dst_ty.isVector(zcu)) { - const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu)); const max_abi_size = @max(dst_abi_size, src_abi_size); if (max_abi_size > self.vectorSize(.int)) break :result null; const has_avx = self.hasFeature(.avx); @@ -11060,7 +11017,8 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void { else => src_int_info.bits, }; - const dst_mcv = if (dst_int_info.bits <= src_storage_bits and + const dst_mcv = if ((if (src_mcv.getReg()) |src_reg| src_reg.class() == .general_purpose else src_abi_size > 8) and + dst_int_info.bits <= src_storage_bits and std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable == std.math.divCeil(u32, src_storage_bits, 64) catch unreachable and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: { @@ -11079,8 +11037,8 @@ fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void { break :result .{ .register = registerAlias(dst_mcv.getReg().?, dst_abi_size) }; } - const src_limbs_len = std.math.divCeil(u16, src_int_info.bits, 64) catch unreachable; - const dst_limbs_len = std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable; + const src_limbs_len = std.math.divCeil(u31, src_abi_size, 8) catch unreachable; + const dst_limbs_len = @divExact(dst_abi_size, 8); const high_mcv: MCValue = if (dst_mcv.isBase()) dst_mcv.address().offset((src_limbs_len - 1) * 8).deref() @@ -12067,6 +12025,7 @@ fn genSetFrameTruncatedOverflowCompare( defer if (src_lock) |lock| self.register_manager.unlockReg(lock); const ty = tuple_ty.fieldType(0, zcu); + const ty_size = ty.abiSize(zcu); const int_info = ty.intInfo(zcu); const hi_bits = (int_info.bits - 1) % 64 + 1; @@ -12100,6 +12059,13 @@ fn genSetFrameTruncatedOverflowCompare( try self.asmSetccRegister(.ne, eq_reg.to8()); try self.genBinOpMir(.{ ._, .@"or" }, .u8, .{ .register = overflow_reg }, .{ .register = eq_reg }); } + try self.genSetMem( + .{ .frame = frame_index }, + @intCast(tuple_ty.structFieldOffset(1, zcu)), + tuple_ty.fieldType(1, zcu), + if (overflow_cc) |_| .{ .register = overflow_reg.to8() } else .{ .eflags = .ne }, + .{}, + ); const payload_off: i32 = @intCast(tuple_ty.structFieldOffset(0, zcu)); if (hi_limb_off > 0) try self.genSetMem( @@ -12116,13 +12082,20 @@ fn genSetFrameTruncatedOverflowCompare( .{ .register = scratch_reg }, .{}, ); - try self.genSetMem( - .{ .frame = frame_index }, - @intCast(tuple_ty.structFieldOffset(1, zcu)), - tuple_ty.fieldType(1, zcu), - if (overflow_cc) |_| .{ .register = overflow_reg.to8() } else .{ .eflags = .ne }, - .{}, - ); + var ext_off: i32 = hi_limb_off + 8; + if (ext_off < ty_size) { + switch (int_info.signedness) { + .signed => try self.asmRegisterImmediate(.{ ._r, .sa }, scratch_reg.to64(), .s(63)), + .unsigned => try self.asmRegisterRegister(.{ ._, .xor }, scratch_reg.to32(), scratch_reg.to32()), + } + while (ext_off < ty_size) : (ext_off += 8) try self.genSetMem( + .{ .frame = frame_index }, + payload_off + ext_off, + limb_ty, + .{ .register = scratch_reg }, + .{}, + ); + } } fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { @@ -13581,9 +13554,12 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { .{ ._, .bt }, .{ .base = .{ .frame = frame_index }, - .mod = .{ .rm = .{ .size = .qword } }, + .mod = .{ .rm = .{ + .size = .qword, + .disp = @intCast(index_imm / 64 * 8), + } }, }, - .u(index_imm), + .u(index_imm % 64), ), else => try self.asmMemoryRegister( .{ ._, .bt }, @@ -13603,8 +13579,11 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { .load_frame => switch (index_mcv) { .immediate => |index_imm| try self.asmMemoryImmediate( .{ ._, .bt }, - try array_mat_mcv.mem(self, .{ .size = .qword }), - .u(index_imm), + try array_mat_mcv.mem(self, .{ + .size = .qword, + .disp = @intCast(index_imm / 64 * 8), + }), + .u(index_imm % 64), ), else => try self.asmMemoryRegister( .{ ._, .bt }, @@ -13622,9 +13601,12 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { .base = .{ .reg = try self.copyToTmpRegister(.usize, array_mat_mcv.address()), }, - .mod = .{ .rm = .{ .size = .qword } }, + .mod = .{ .rm = .{ + .size = .qword, + .disp = @intCast(index_imm / 64 * 8), + } }, }, - .u(index_imm), + .u(index_imm % 64), ), else => try self.asmMemoryRegister( .{ ._, .bt }, @@ -14451,13 +14433,18 @@ fn genByteSwap( return src_mcv; }, 9...16 => { - switch (src_mcv) { + const mat_src_mcv: MCValue = mat_src_mcv: switch (src_mcv) { + .register => { + const frame_index = try self.allocFrameIndex(.initSpill(src_ty, zcu)); + try self.genSetMem(.{ .frame = frame_index }, 0, src_ty, src_mcv, .{}); + break :mat_src_mcv .{ .load_frame = .{ .index = frame_index } }; + }, .register_pair => |src_regs| if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) { for (src_regs) |src_reg| try self.asmRegister(.{ ._, .bswap }, src_reg.to64()); return .{ .register_pair = .{ src_regs[1], src_regs[0] } }; - }, - else => {}, - } + } else src_mcv, + else => src_mcv, + }; const dst_regs = try self.register_manager.allocRegs(2, .{ inst, inst }, abi.RegisterClass.gp); @@ -14465,18 +14452,18 @@ fn genByteSwap( defer for (dst_locks) |lock| self.register_manager.unlockReg(lock); for (dst_regs, 0..) |dst_reg, limb_index| { - if (src_mcv.isBase()) { + if (mat_src_mcv.isBase()) { try self.asmRegisterMemory( .{ ._, if (has_movbe) .movbe else .mov }, dst_reg.to64(), - try src_mcv.address().offset(@intCast(limb_index * 8)).deref().mem(self, .{ .size = .qword }), + try mat_src_mcv.address().offset(@intCast(limb_index * 8)).deref().mem(self, .{ .size = .qword }), ); if (!has_movbe) try self.asmRegister(.{ ._, .bswap }, dst_reg.to64()); } else { try self.asmRegisterRegister( .{ ._, .mov }, dst_reg.to64(), - src_mcv.register_pair[limb_index].to64(), + mat_src_mcv.register_pair[limb_index].to64(), ); try self.asmRegister(.{ ._, .bswap }, dst_reg.to64()); } @@ -15680,6 +15667,15 @@ fn packedStore(self: *CodeGen, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg); defer self.register_manager.unlockReg(ptr_lock); + const mat_src_mcv: MCValue = mat_src_mcv: switch (src_mcv) { + .register => if (src_bit_size > 64) { + const frame_index = try self.allocFrameIndex(.initSpill(src_ty, self.pt.zcu)); + try self.genSetMem(.{ .frame = frame_index }, 0, src_ty, src_mcv, .{}); + break :mat_src_mcv .{ .load_frame = .{ .index = frame_index } }; + } else src_mcv, + else => src_mcv, + }; + var limb_i: u16 = 0; while (limb_i * limb_abi_bits < src_bit_off + src_bit_size) : (limb_i += 1) { const part_bit_off = if (limb_i == 0) src_bit_off else 0; @@ -15712,7 +15708,7 @@ fn packedStore(self: *CodeGen, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - try self.genSetReg(tmp_reg, limb_ty, src_mcv, .{}); + try self.genSetReg(tmp_reg, limb_ty, mat_src_mcv, .{}); switch (limb_i) { 0 => try self.genShiftBinOpMir( .{ ._l, .sh }, @@ -15743,8 +15739,8 @@ fn packedStore(self: *CodeGen, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) defer self.register_manager.unlockReg(tmp_lock); try self.genSetReg(tmp_reg, limb_ty, switch (limb_i) { - 0 => src_mcv, - else => src_mcv.address().offset(limb_i * limb_abi_size).deref(), + 0 => mat_src_mcv, + else => mat_src_mcv.address().offset(limb_i * limb_abi_size).deref(), }, .{}); try self.genBinOpMir(.{ ._, .@"and" }, limb_ty, tmp_mcv, .{ .immediate = part_mask }); try self.asmMemoryRegister( @@ -17228,7 +17224,7 @@ fn genMulDivBinOp( dst_mcv.address(), lhs_mcv.address(), rhs_mcv.address(), - .{ .immediate = src_info.bits }, + .{ .immediate = 8 * src_abi_size }, }, .{}); return dst_mcv; }, @@ -17246,7 +17242,8 @@ fn genMulDivBinOp( const reg_locks = self.register_manager.lockRegs(2, .{ .rax, .rdx }); defer for (reg_locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock); - const signedness = ty.intInfo(zcu).signedness; + const int_info = ty.intInfo(zcu); + const signedness = int_info.signedness; switch (tag) { .mul, .mul_wrap, @@ -17279,6 +17276,15 @@ fn genMulDivBinOp( }, }, ty, lhs_mcv, rhs_mcv); + switch (tag) { + .mul, .rem, .div_trunc, .div_exact => {}, + .mul_wrap => if (dst_ty.intInfo(zcu).bits < 8 * dst_abi_size) try self.truncateRegister( + dst_ty, + if (dst_abi_size <= 8) .rax else .rdx, + ), + else => unreachable, + } + if (dst_abi_size <= 8) return .{ .register = registerAlias(switch (tag) { .mul, .mul_wrap, .div_trunc, .div_exact => .rax, .rem => .rdx, @@ -21954,8 +21960,9 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void { break :arg .{ .indirect = .{ .reg = try self.copyToTmpRegister(.usize, ptr_mcv) } }; }; }; - if (arg_mcv.getReg()) |reg| if (RegisterManager.indexOfRegIntoTracked(reg)) |_| { - _ = self.register_manager.lockReg(reg); + if (arg_mcv.getReg()) |reg| if (RegisterManager.indexOfRegIntoTracked(reg)) |tracked_index| { + try self.register_manager.getRegIndex(tracked_index, if (output == .none) inst else null); + _ = self.register_manager.lockRegIndexAssumeUnused(tracked_index); }; if (!std.mem.eql(u8, name, "_")) arg_map.putAssumeCapacityNoClobber(name, @intCast(args.items.len)); @@ -22881,7 +22888,7 @@ fn genCopy(self: *CodeGen, ty: Type, dst_mcv: MCValue, src_mcv: MCValue, opts: C try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[0].to64(), src_reg.to128()); try self.asmRegisterRegister(.{ ._ps, .movhl }, tmp_reg.to128(), src_reg.to128()); - try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[1].to64(), src_reg.to128()); + try self.asmRegisterRegister(.{ ._q, .mov }, dst_regs[1].to64(), tmp_reg.to128()); } return; } else unreachable, @@ -23831,10 +23838,12 @@ fn airBitCast(self: *CodeGen, inst: Air.Inst.Index) !void { const dst_rc = self.regSetForType(dst_ty); const src_rc = self.regSetForType(src_ty); - const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; + const src_lock = if (src_mcv.getReg()) |src_reg| self.register_manager.lockReg(src_reg) else null; defer if (src_lock) |lock| self.register_manager.unlockReg(lock); - const dst_mcv = if (dst_rc.supersetOf(src_rc) and dst_ty.abiSize(zcu) <= src_ty.abiSize(zcu) and + const dst_mcv = if ((if (src_mcv.getReg()) |src_reg| src_reg.class() == .general_purpose else true) and + dst_rc.supersetOf(src_rc) and dst_ty.abiSize(zcu) <= src_ty.abiSize(zcu) and + dst_ty.abiAlignment(zcu).order(src_ty.abiAlignment(zcu)).compare(.lte) and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: { const dst_mcv = try self.allocRegOrMem(inst, true); try self.genCopy(switch (std.math.order(dst_ty.abiSize(zcu), src_ty.abiSize(zcu))) { @@ -27702,7 +27711,7 @@ fn registerAlias(reg: Register, size_bytes: u32) Register { reg else unreachable, - .x87 => if (size_bytes == 16) + .x87 => if (size_bytes >= 10 and size_bytes <= 16) reg else unreachable, @@ -28574,23 +28583,19 @@ const Temp = struct { try ptr.tracking(cg).short.deref().mem(cg, .{ .size = .byte }), ); }, - .register => |val_reg| try ptr.storeReg(val_ty, registerAlias( + .register => |val_reg| try ptr.storeRegs(val_ty, &.{registerAlias( val_reg, @intCast(val_ty.abiSize(cg.pt.zcu)), - ), cg), + )}, cg), inline .register_pair, .register_triple, .register_quadruple, - => |val_regs| for (val_regs) |val_reg| { - try ptr.storeReg(val_ty, val_reg, cg); - try ptr.toOffset(@divExact(val_reg.bitSize(), 8), cg); - while (try ptr.toLea(cg)) {} - }, + => |val_regs| try ptr.storeRegs(val_ty, &val_regs, cg), .register_offset => |val_reg_off| switch (val_reg_off.off) { - 0 => try ptr.storeReg(val_ty, registerAlias( + 0 => try ptr.storeRegs(val_ty, &.{registerAlias( val_reg_off.reg, @intCast(val_ty.abiSize(cg.pt.zcu)), - ), cg), + )}, cg), else => continue :val_to_gpr, }, .register_overflow => |val_reg_ov| { @@ -28608,7 +28613,7 @@ const Temp = struct { else => std.debug.panic("{s}: {}\n", .{ @src().fn_name, val_ty.fmt(cg.pt) }), }); const first_size: u31 = @intCast(first_ty.abiSize(cg.pt.zcu)); - try ptr.storeReg(first_ty, registerAlias(val_reg_ov.reg, first_size), cg); + try ptr.storeRegs(first_ty, &.{registerAlias(val_reg_ov.reg, first_size)}, cg); try ptr.toOffset(first_size, cg); try cg.asmSetccMemory( val_reg_ov.eflags, @@ -28675,6 +28680,15 @@ const Temp = struct { const val_mcv = val.tracking(cg).short; switch (val_mcv) { else => |mcv| std.debug.panic("{s}: {}\n", .{ @src().fn_name, mcv }), + .undef => if (opts.safe) { + var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address().offset(opts.disp)); + var pat = try cg.tempInit(.u8, .{ .immediate = 0xaa }); + var len = try cg.tempInit(.usize, .{ .immediate = val_ty.abiSize(cg.pt.zcu) }); + try dst_ptr.memset(&pat, &len, cg); + try dst_ptr.die(cg); + try pat.die(cg); + try len.die(cg); + }, .immediate => |val_imm| { const val_op: Immediate = if (std.math.cast(u31, val_imm)) |val_uimm31| .u(val_uimm31) @@ -28691,24 +28705,52 @@ const Temp = struct { val_op, ); }, - .register => |val_reg| try dst.writeReg(opts.disp, val_ty, registerAlias( + .eflags => |cc| try cg.asmSetccMemory( + cc, + try dst.tracking(cg).short.mem(cg, .{ + .size = .byte, + .disp = opts.disp, + }), + ), + .register => |val_reg| try dst.writeRegs(opts.disp, val_ty, &.{registerAlias( val_reg, @intCast(val_ty.abiSize(cg.pt.zcu)), - ), cg), - inline .register_pair, .register_triple, .register_quadruple => |val_regs| { - var disp = opts.disp; - for (val_regs) |val_reg| { - try dst.writeReg(disp, val_ty, val_reg, cg); - disp += @divExact(val_reg.bitSize(), 8); - } - }, + )}, cg), + inline .register_pair, + .register_triple, + .register_quadruple, + => |val_regs| try dst.writeRegs(opts.disp, val_ty, &val_regs, cg), .register_offset => |val_reg_off| switch (val_reg_off.off) { - 0 => try dst.writeReg(opts.disp, val_ty, registerAlias( + 0 => try dst.writeRegs(opts.disp, val_ty, &.{registerAlias( val_reg_off.reg, @intCast(val_ty.abiSize(cg.pt.zcu)), - ), cg), + )}, cg), else => continue :val_to_gpr, }, + .register_overflow => |val_reg_ov| { + const ip = &cg.pt.zcu.intern_pool; + const first_ty: Type = .fromInterned(first_ty: switch (ip.indexToKey(val_ty.toIntern())) { + .tuple_type => |tuple_type| { + const tuple_field_types = tuple_type.types.get(ip); + assert(tuple_field_types.len == 2 and tuple_field_types[1] == .u1_type); + break :first_ty tuple_field_types[0]; + }, + .opt_type => |opt_child| { + assert(!val_ty.optionalReprIsPayload(cg.pt.zcu)); + break :first_ty opt_child; + }, + else => std.debug.panic("{s}: {}\n", .{ @src().fn_name, val_ty.fmt(cg.pt) }), + }); + const first_size: u31 = @intCast(first_ty.abiSize(cg.pt.zcu)); + try dst.writeRegs(opts.disp, first_ty, &.{registerAlias(val_reg_ov.reg, first_size)}, cg); + try cg.asmSetccMemory( + val_reg_ov.eflags, + try dst.tracking(cg).short.mem(cg, .{ + .size = .byte, + .disp = opts.disp + first_size, + }), + ); + }, .lea_frame, .lea_symbol => continue :val_to_gpr, .memory, .indirect, .load_frame, .load_symbol => { var dst_ptr = @@ -28739,33 +28781,47 @@ const Temp = struct { })); } - fn storeReg(ptr: *Temp, src_ty: Type, src_reg: Register, cg: *CodeGen) !void { - const src_rc = src_reg.class(); - const src_abi_size = src_ty.abiSize(cg.pt.zcu); - const strat = try cg.moveStrategy(src_ty, src_rc, false); - // hack around linker relocation bugs - switch (ptr.tracking(cg).short) { - else => {}, - .lea_symbol => |sym_off| if (src_rc != .general_purpose or sym_off.off != 0) - while (try ptr.toRegClass(false, .general_purpose, cg)) {}, - } - if (src_rc == .x87 or std.math.isPowerOfTwo(src_abi_size)) { - try strat.write(cg, try ptr.tracking(cg).short.deref().mem(cg, .{ - .size = .fromBitSize(@min(8 * src_abi_size, src_reg.bitSize())), - }), src_reg); - } else { - const frame_alloc: FrameAlloc = .initSpill(src_ty, cg.pt.zcu); - const frame_index = try cg.allocFrameIndex(frame_alloc); - const frame_size: Memory.Size = .fromSize(frame_alloc.abi_size); - try strat.write(cg, .{ - .base = .{ .frame = frame_index }, - .mod = .{ .rm = .{ .size = frame_size } }, - }, src_reg); - var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } }); - var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size }); - try ptr.memcpy(&src_ptr, &len, cg); - try src_ptr.die(cg); - try len.die(cg); + fn storeRegs(ptr: *Temp, src_ty: Type, src_regs: []const Register, cg: *CodeGen) !void { + var part_disp: u31 = 0; + var deferred_disp: u31 = 0; + var src_abi_size: u32 = @intCast(src_ty.abiSize(cg.pt.zcu)); + for (src_regs) |src_reg| { + const src_rc = src_reg.class(); + const part_bit_size = @min(8 * src_abi_size, src_reg.bitSize()); + const part_size = @divExact(part_bit_size, 8); + if (src_rc == .x87 or std.math.isPowerOfTwo(part_size)) { + // hack around linker relocation bugs + switch (ptr.tracking(cg).short) { + else => {}, + .lea_symbol => while (try ptr.toRegClass(false, .general_purpose, cg)) {}, + } + const strat = try cg.moveStrategy(src_ty, src_rc, false); + try strat.write(cg, try ptr.tracking(cg).short.deref().mem(cg, .{ + .size = .fromBitSize(part_bit_size), + .disp = part_disp, + }), registerAlias(src_reg, part_size)); + } else { + const frame_size = std.math.ceilPowerOfTwoAssert(u32, part_size); + const frame_index = try cg.allocFrameIndex(.init(.{ + .size = frame_size, + .alignment = .fromNonzeroByteUnits(frame_size), + })); + const strat = try cg.moveStrategy(src_ty, src_rc, true); + try strat.write(cg, .{ + .base = .{ .frame = frame_index }, + .mod = .{ .rm = .{ .size = .fromSize(frame_size) } }, + }, registerAlias(src_reg, frame_size)); + try ptr.toOffset(deferred_disp, cg); + deferred_disp = 0; + var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } }); + var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size }); + try ptr.memcpy(&src_ptr, &len, cg); + try src_ptr.die(cg); + try len.die(cg); + } + part_disp += part_size; + deferred_disp += part_size; + src_abi_size -= part_size; } } @@ -28777,30 +28833,41 @@ const Temp = struct { })); } - fn writeReg(dst: Temp, disp: i32, src_ty: Type, src_reg: Register, cg: *CodeGen) !void { - const src_rc = src_reg.class(); - const src_abi_size = src_ty.abiSize(cg.pt.zcu); - const strat = try cg.moveStrategy(src_ty, src_rc, false); - if (src_rc == .x87 or std.math.isPowerOfTwo(src_abi_size)) { - try strat.write(cg, try dst.tracking(cg).short.mem(cg, .{ - .size = .fromBitSize(@min(8 * src_abi_size, src_reg.bitSize())), - .disp = disp, - }), src_reg); - } else { - const frame_alloc: FrameAlloc = .initSpill(src_ty, cg.pt.zcu); - const frame_index = try cg.allocFrameIndex(frame_alloc); - const frame_size: Memory.Size = .fromSize(frame_alloc.abi_size); - try strat.write(cg, .{ - .base = .{ .frame = frame_index }, - .mod = .{ .rm = .{ .size = frame_size } }, - }, src_reg); - var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address()); - var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } }); - var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size }); - try dst_ptr.memcpy(&src_ptr, &len, cg); - try dst_ptr.die(cg); - try src_ptr.die(cg); - try len.die(cg); + fn writeRegs(dst: Temp, disp: i32, src_ty: Type, src_regs: []const Register, cg: *CodeGen) !void { + var part_disp = disp; + var src_abi_size: u32 = @intCast(src_ty.abiSize(cg.pt.zcu)); + for (src_regs) |src_reg| { + const src_rc = src_reg.class(); + const part_bit_size = @min(8 * src_abi_size, src_reg.bitSize()); + const part_size = @divExact(part_bit_size, 8); + if (src_rc == .x87 or std.math.isPowerOfTwo(part_size)) { + const strat = try cg.moveStrategy(src_ty, src_rc, false); + try strat.write(cg, try dst.tracking(cg).short.mem(cg, .{ + .size = .fromBitSize(part_bit_size), + .disp = part_disp, + }), registerAlias(src_reg, part_size)); + } else { + const frame_size = std.math.ceilPowerOfTwoAssert(u32, part_size); + const frame_index = try cg.allocFrameIndex(.init(.{ + .size = frame_size, + .alignment = .fromNonzeroByteUnits(frame_size), + })); + const strat = try cg.moveStrategy(src_ty, src_rc, true); + try strat.write(cg, .{ + .base = .{ .frame = frame_index }, + .mod = .{ .rm = .{ .size = .fromSize(frame_size) } }, + }, registerAlias(src_reg, frame_size)); + var dst_ptr = try cg.tempInit(.usize, dst.tracking(cg).short.address()); + try dst_ptr.toOffset(part_disp, cg); + var src_ptr = try cg.tempInit(.usize, .{ .lea_frame = .{ .index = frame_index } }); + var len = try cg.tempInit(.usize, .{ .immediate = src_abi_size }); + try dst_ptr.memcpy(&src_ptr, &len, cg); + try dst_ptr.die(cg); + try src_ptr.die(cg); + try len.die(cg); + } + part_disp += part_size; + src_abi_size -= part_size; } } @@ -29123,8 +29190,8 @@ const Select = struct { signed_int_vec: Memory.Size, signed_int_or_full_vec: Memory.Size, unsigned_int_vec: Memory.Size, - int_or_vec: Memory.Size, - exact_remainder_int_or_vec: struct { of: Memory.Size, is: Memory.Size }, + size: Memory.Size, + multiple_size: Memory.Size, int: Memory.Size, scalar_int: Memory.Size, scalar_signed_int: Memory.Size, @@ -29170,15 +29237,8 @@ const Select = struct { } else false, .unsigned_int_vec => |size| ty.isVector(zcu) and size.bitSize(cg.target) >= 8 * ty.abiSize(zcu) and if (intInfo(ty.childType(zcu), cg)) |int_info| int_info.signedness == .unsigned else false, - .int_or_vec => |size| if (intInfo(ty, cg)) |int_info| - size.bitSize(cg.target) >= int_info.bits - else - ty.isVector(zcu) and size.bitSize(cg.target) >= 8 * ty.abiSize(zcu), - .exact_remainder_int_or_vec => |of_is| if (intInfo(ty, cg)) |int_info| - of_is.is.bitSize(cg.target) == (int_info.bits - 1) % of_is.of.bitSize(cg.target) + 1 - else - ty.isVector(zcu) and ty.childType(zcu).toIntern() != .bool_type and - of_is.is.bitSize(cg.target) == (8 * ty.abiSize(zcu) - 1) % of_is.of.bitSize(cg.target) + 1, + .size => |size| size.bitSize(cg.target) >= 8 * ty.abiSize(zcu), + .multiple_size => |size| size.bitSize(cg.target) % 8 * ty.abiSize(zcu) == 0, .int => |size| if (intInfo(ty, cg)) |int_info| size.bitSize(cg.target) >= int_info.bits else false, .scalar_int => |size| if (intInfo(ty.scalarType(zcu), cg)) |int_info| size.bitSize(cg.target) >= int_info.bits diff --git a/src/arch/x86_64/abi.zig b/src/arch/x86_64/abi.zig index 750ea99706..0be0833ff0 100644 --- a/src/arch/x86_64/abi.zig +++ b/src/arch/x86_64/abi.zig @@ -540,8 +540,12 @@ pub fn getCAbiSseReturnRegs(cc: std.builtin.CallingConvention.Tag) []const Regis } pub fn getCAbiLinkerScratchReg(cc: std.builtin.CallingConvention.Tag) Register { - const int_return_regs = getCAbiIntReturnRegs(cc); - return int_return_regs[int_return_regs.len - 1]; + return switch (cc) { + .auto => zigcc.int_return_regs[zigcc.int_return_regs.len - 1], + .x86_64_sysv => SysV.c_abi_int_return_regs[0], + .x86_64_win => Win64.c_abi_int_return_regs[0], + else => unreachable, + }; } const gp_regs = [_]Register{ diff --git a/src/main.zig b/src/main.zig index b17a753b2b..7d035ab135 100644 --- a/src/main.zig +++ b/src/main.zig @@ -39,6 +39,11 @@ test { _ = Package; } +const thread_stack_size = switch (builtin.zig_backend) { + else => std.Thread.SpawnConfig.default_stack_size, + .stage2_x86_64 => 32 << 20, +}; + pub const std_options: std.Options = .{ .wasiCwd = wasi_cwd, .logFn = log, @@ -3320,6 +3325,7 @@ fn buildOutputType( .allocator = gpa, .n_jobs = @min(@max(n_jobs orelse std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(Zcu.PerThread.IdBacking)), .track_ids = true, + .stack_size = thread_stack_size, }); defer thread_pool.deinit(); @@ -5024,6 +5030,7 @@ fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { .allocator = gpa, .n_jobs = @min(@max(n_jobs orelse std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(Zcu.PerThread.IdBacking)), .track_ids = true, + .stack_size = thread_stack_size, }); defer thread_pool.deinit(); @@ -5460,6 +5467,7 @@ fn jitCmd( .allocator = gpa, .n_jobs = @min(@max(std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(Zcu.PerThread.IdBacking)), .track_ids = true, + .stack_size = thread_stack_size, }); defer thread_pool.deinit(); diff --git a/src/register_manager.zig b/src/register_manager.zig index b9d3b6db0b..5621c8f750 100644 --- a/src/register_manager.zig +++ b/src/register_manager.zig @@ -58,11 +58,6 @@ pub fn RegisterManager( return @alignCast(@fieldParentPtr("register_manager", self)); } - fn excludeRegister(reg: Register, register_class: RegisterBitSet) bool { - const index = indexOfRegIntoTracked(reg) orelse return true; - return !register_class.isSet(index); - } - fn markRegIndexAllocated(self: *Self, tracked_index: TrackedIndex) void { self.allocated_registers.set(tracked_index); } @@ -234,28 +229,20 @@ pub fn RegisterManager( ) ?[count]Register { comptime assert(count > 0 and count <= tracked_registers.len); - var free_and_not_locked_registers = self.free_registers; - free_and_not_locked_registers.setIntersection(register_class); - - var unlocked_registers = self.locked_registers; - unlocked_registers.toggleAll(); - - free_and_not_locked_registers.setIntersection(unlocked_registers); - - if (free_and_not_locked_registers.count() < count) return null; + var free_and_unlocked_registers = self.locked_registers; + free_and_unlocked_registers.toggleAll(); + free_and_unlocked_registers.setIntersection(self.free_registers); + free_and_unlocked_registers.setIntersection(register_class); var regs: [count]Register = undefined; var i: usize = 0; - for (tracked_registers) |reg| { - if (i >= count) break; - if (excludeRegister(reg, register_class)) continue; - if (self.isRegLocked(reg)) continue; - if (!self.isRegFree(reg)) continue; - - regs[i] = reg; + var it = free_and_unlocked_registers.iterator(.{}); + while (it.next()) |reg_index| { + regs[i] = regAtTrackedIndex(@intCast(reg_index)); i += 1; + if (i >= count) break; } - assert(i == count); + if (i < count) return null; for (regs, insts) |reg, inst| { log.debug("tryAllocReg {} for inst {?}", .{ reg, inst }); @@ -290,46 +277,27 @@ pub fn RegisterManager( ) AllocationError![count]Register { comptime assert(count > 0 and count <= tracked_registers.len); - var locked_registers = self.locked_registers; - locked_registers.setIntersection(register_class); - - if (count > register_class.count() - locked_registers.count()) return error.OutOfRegisters; - const result = self.tryAllocRegs(count, insts, register_class) orelse blk: { + var unlocked_registers = self.locked_registers; + unlocked_registers.toggleAll(); + unlocked_registers.setIntersection(register_class); + // We'll take over the first count registers. Spill // the instructions that were previously there to a // stack allocations. var regs: [count]Register = undefined; var i: usize = 0; - for (tracked_registers) |reg| { - if (i >= count) break; - if (excludeRegister(reg, register_class)) break; - if (self.isRegLocked(reg)) continue; - - log.debug("allocReg {} for inst {?}", .{ reg, insts[i] }); - regs[i] = reg; - self.markRegAllocated(reg); - const index = indexOfRegIntoTracked(reg).?; // indexOfReg() on a callee-preserved reg should never return null - if (insts[i]) |inst| { - // Track the register - if (self.isRegFree(reg)) { - self.markRegUsed(reg); - } else { - const spilled_inst = self.registers[index]; - try self.getFunction().spillInstruction(reg, spilled_inst); - } - self.registers[index] = inst; - } else { - // Don't track the register - if (!self.isRegFree(reg)) { - const spilled_inst = self.registers[index]; - try self.getFunction().spillInstruction(reg, spilled_inst); - self.freeReg(reg); - } - } - + var it = unlocked_registers.iterator(.{}); + while (it.next()) |reg_index| { + const tracked_index: TrackedIndex = @intCast(reg_index); + if (!self.isRegIndexFree(tracked_index) and + self.registers[tracked_index].unwrap() == .target) continue; + try self.getRegIndex(tracked_index, insts[i]); + regs[i] = regAtTrackedIndex(tracked_index); i += 1; + if (i >= count) break; } + if (i < count) return error.OutOfRegisters; break :blk regs; }; @@ -351,7 +319,7 @@ pub fn RegisterManager( /// Spills the register if it is currently allocated. If a /// corresponding instruction is passed, will also track this /// register. - fn getRegIndex( + pub fn getRegIndex( self: *Self, tracked_index: TrackedIndex, inst: ?Air.Inst.Index, diff --git a/test/behavior/x86_64/math.zig b/test/behavior/x86_64/math.zig index 5bb257a5f4..6e36473b17 100644 --- a/test/behavior/x86_64/math.zig +++ b/test/behavior/x86_64/math.zig @@ -742,6 +742,16 @@ fn testBinary(comptime op: anytype) !void { 0xb7935f5c2f3b1ae7a422c0a7c446884294b7d5370bada307d2fe5a4c4284a999, 0x310e6e196ba4f143b8d285ca6addf7f3bb3344224aff221b27607a31e148be08, ); + try testType( + u258, + 0x186d5ddaab8cb8cb04e5b41e36f812e039d008baf49f12894c39e29a07796d800, + 0x2072daba6ffad168826163eb136f6d28ca4360c8e7e5e41e29755e19e4753a4f5, + ); + try testType( + u495, + 0x6eaf4e252b3bf74b75bac59e0b43ca5326bad2a25b3fdb74a67ef132ac5e47d72eebc3316fb2351ee66c50dc5afb92a75cea9b0e35160652c7db39eeb158, + 0x49fbed744a92b549d8c05bb3512c617d24dd824f3f69bdf3923bc326a75674b85f5b828d2566fab9c86f571d12c2a63c9164feb0d191d27905533d09622a, + ); try testType( u512, 0xe5b1fedca3c77db765e517aabd05ffc524a3a8aff1784bbf67c45b894447ede32b65b9940e78173c591e56e078932d465f235aece7ad47b7f229df7ba8f12295,