From 6d3c7bd4362b8f2415bc1b0d7fee6b30668bdfcd Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 17 Feb 2022 16:38:05 +0100 Subject: [PATCH] x64: pass all args on stack in debug and if not extern fn If the function call is not extern and we are compiling in debug, pass function params always on stack. This will improve debugging capabilities since the params will not be volatile and possibly clobbered by the procedure code. Finish implementation of `imul_complex`. --- src/arch/x86_64/CodeGen.zig | 106 ++++++++++++++++++++++++++---------- src/arch/x86_64/Emit.zig | 17 +++++- 2 files changed, 92 insertions(+), 31 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index aba98276c3..9e8866e1e1 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1737,6 +1737,10 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo .immediate => |imm| { try self.setRegOrMem(elem_ty, dst_mcv, .{ .memory = imm }); }, + .stack_offset => { + const reg = try self.copyToTmpRegister(ptr_ty, ptr); + try self.load(dst_mcv, .{ .register = reg }, ptr_ty); + }, .ptr_stack_offset => |off| { try self.setRegOrMem(elem_ty, dst_mcv, .{ .stack_offset = off }); }, @@ -1787,9 +1791,6 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo const reg = try self.copyToTmpRegister(ptr_ty, ptr); try self.load(dst_mcv, .{ .register = reg }, ptr_ty); }, - .stack_offset => { - return self.fail("TODO implement loading from MCValue.stack_offset", .{}); - }, } } @@ -1832,6 +1833,10 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type .immediate => |imm| { try self.setRegOrMem(value_ty, .{ .memory = imm }, value); }, + .stack_offset => { + const reg = try self.copyToTmpRegister(ptr_ty, ptr); + try self.store(.{ .register = reg }, value, ptr_ty, value_ty); + }, .ptr_stack_offset => |off| { try self.genSetStack(value_ty, off, value); }, @@ -1909,6 +1914,10 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type .data = .{ .imm = 0 }, }); }, + .stack_offset => { + const tmp_reg = try self.copyToTmpRegister(value_ty, value); + return self.store(ptr, .{ .register = tmp_reg }, ptr_ty, value_ty); + }, else => |other| { return self.fail("TODO implement set pointee with {}", .{other}); }, @@ -2020,9 +2029,6 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type else => return self.fail("TODO implement storing {} to MCValue.memory", .{value}), } }, - .stack_offset => { - return self.fail("TODO implement storing to MCValue.stack_offset", .{}); - }, } } @@ -2452,7 +2458,18 @@ fn genIMulOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) ! return self.genIMulOpMir(dst_ty, dst_mcv, MCValue{ .register = src_reg }); } }, - .embedded_in_code, .memory, .stack_offset => { + .stack_offset => |off| { + _ = try self.addInst(.{ + .tag = .imul_complex, + .ops = (Mir.Ops{ + .reg1 = dst_reg, + .reg2 = .rbp, + .flags = 0b01, + }).encode(), + .data = .{ .imm = @bitCast(u32, -off) }, + }); + }, + .embedded_in_code, .memory => { return self.fail("TODO implement x86 multiply source memory", .{}); }, .got_load, .direct_load => { @@ -3520,6 +3537,13 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE .dead => unreachable, .ptr_embedded_in_code => unreachable, .unreach, .none => return, + .undef => { + if (abi_size <= 8) { + const reg = try self.copyToTmpRegister(ty, mcv); + return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg }); + } + try self.genInlineMemset(stack_offset, .rsp, ty, .{ .immediate = 0xaa }); + }, .compare_flags_unsigned, .compare_flags_signed, => { @@ -3598,7 +3622,6 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE try self.genInlineMemcpy(stack_offset, .rsp, ty, mcv); }, - else => return self.fail("TODO implement args on stack for {}", .{mcv}), } } @@ -3617,7 +3640,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerErro 2 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaa }), 4 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaaaaaa }), 8 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaaaaaaaaaaaaaa }), - else => return self.genInlineMemset(ty, stack_offset, .{ .immediate = 0xaa }), + else => return self.genInlineMemset(stack_offset, .rbp, ty, .{ .immediate = 0xaa }), } }, .compare_flags_unsigned, @@ -3943,12 +3966,20 @@ fn genInlineMemcpy(self: *Self, stack_offset: i32, stack_reg: Register, ty: Type try self.performReloc(loop_reloc); } -fn genInlineMemset(self: *Self, ty: Type, stack_offset: i32, value: MCValue) InnerError!void { +fn genInlineMemset( + self: *Self, + stack_offset: i32, + stack_register: Register, + ty: Type, + value: MCValue, +) InnerError!void { try self.register_manager.getReg(.rax, null); + const abi_size = ty.abiSize(self.target.*); if (stack_offset > 128) { return self.fail("TODO inline memset with large stack offset", .{}); } + const negative_offset = @bitCast(u32, -stack_offset); // We are actually counting `abi_size` bytes; however, we reuse the index register @@ -4005,7 +4036,7 @@ fn genInlineMemset(self: *Self, ty: Type, stack_offset: i32, value: MCValue) Inn _ = try self.addInst(.{ .tag = .mov_mem_index_imm, .ops = (Mir.Ops{ - .reg1 = .rbp, + .reg1 = stack_register.to64(), }).encode(), .data = .{ .payload = payload }, }); @@ -4731,20 +4762,40 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { var next_int_reg: usize = 0; var by_reg = std.AutoHashMap(usize, usize).init(self.bin_file.allocator); defer by_reg.deinit(); - for (param_types) |ty, i| { - if (!ty.hasRuntimeBits()) continue; - const param_size = @intCast(u32, ty.abiSize(self.target.*)); - const pass_in_reg = switch (ty.zigTypeTag()) { - .Bool => true, - .Int, .Enum => param_size <= 8, - .Pointer => ty.ptrSize() != .Slice, - .Optional => ty.isPtrLikeOptional(), - else => false, - }; - if (pass_in_reg) { - if (next_int_reg >= c_abi_int_param_regs.len) break; - try by_reg.putNoClobber(i, next_int_reg); - next_int_reg += 1; + + // If we want debug output, we store all args on stack for better liveness of args + // in debugging contexts such as previewing the args in the debugger anywhere in + // the procedure. Passing the args via registers can lead to reusing the register + // for local ops thus clobbering the input arg forever. + // This of course excludes C ABI calls. + const omit_args_in_registers = blk: { + if (cc == .C) break :blk false; + switch (self.bin_file.options.optimize_mode) { + .Debug => break :blk true, + else => break :blk false, + } + }; + if (!omit_args_in_registers) { + for (param_types) |ty, i| { + if (!ty.hasRuntimeBits()) continue; + const param_size = @intCast(u32, ty.abiSize(self.target.*)); + // For simplicity of codegen, slices and other types are always pushed onto the stack. + // TODO: look into optimizing this by passing things as registers sometimes, + // such as ptr and len of slices as separate registers. + // TODO: also we need to honor the C ABI for relevant types rather than passing on + // the stack here. + const pass_in_reg = switch (ty.zigTypeTag()) { + .Bool => true, + .Int, .Enum => param_size <= 8, + .Pointer => ty.ptrSize() != .Slice, + .Optional => ty.isPtrLikeOptional(), + else => false, + }; + if (pass_in_reg) { + if (next_int_reg >= c_abi_int_param_regs.len) break; + try by_reg.putNoClobber(i, next_int_reg); + next_int_reg += 1; + } } } @@ -4765,11 +4816,6 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { result.args[i] = .{ .register = aliased_reg }; next_int_reg += 1; } else { - // For simplicity of codegen, slices and other types are always pushed onto the stack. - // TODO: look into optimizing this by passing things as registers sometimes, - // such as ptr and len of slices as separate registers. - // TODO: also we need to honor the C ABI for relevant types rather than passing on - // the stack here. const offset = mem.alignForwardGeneric(u32, next_stack_offset + param_size, param_align); result.args[i] = .{ .stack_offset = @intCast(i32, offset) }; next_stack_offset = offset; diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index 32877cb6f8..5ad3508aa2 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -691,11 +691,26 @@ fn mirIMulComplex(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { 0b00 => { return lowerToRmEnc(.imul, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code); }, + 0b01 => { + const imm = emit.mir.instructions.items(.data)[inst].imm; + const src_reg: ?Register = if (ops.reg2 == .none) null else ops.reg2; + return lowerToRmEnc(.imul, ops.reg1, RegisterOrMemory.mem(.qword_ptr, .{ + .disp = imm, + .base = src_reg, + }), emit.code); + }, 0b10 => { const imm = emit.mir.instructions.items(.data)[inst].imm; return lowerToRmiEnc(.imul, ops.reg1, RegisterOrMemory.reg(ops.reg2), imm, emit.code); }, - else => return emit.fail("TODO implement imul", .{}), + 0b11 => { + const payload = emit.mir.instructions.items(.data)[inst].payload; + const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data; + return lowerToRmiEnc(.imul, ops.reg1, RegisterOrMemory.mem(.qword_ptr, .{ + .disp = imm_pair.dest_off, + .base = ops.reg2, + }), imm_pair.operand, emit.code); + }, } }