From 6d3c7bd4362b8f2415bc1b0d7fee6b30668bdfcd Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 17 Feb 2022 16:38:05 +0100 Subject: [PATCH 1/7] x64: pass all args on stack in debug and if not extern fn If the function call is not extern and we are compiling in debug, pass function params always on stack. This will improve debugging capabilities since the params will not be volatile and possibly clobbered by the procedure code. Finish implementation of `imul_complex`. --- src/arch/x86_64/CodeGen.zig | 106 ++++++++++++++++++++++++++---------- src/arch/x86_64/Emit.zig | 17 +++++- 2 files changed, 92 insertions(+), 31 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index aba98276c3..9e8866e1e1 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1737,6 +1737,10 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo .immediate => |imm| { try self.setRegOrMem(elem_ty, dst_mcv, .{ .memory = imm }); }, + .stack_offset => { + const reg = try self.copyToTmpRegister(ptr_ty, ptr); + try self.load(dst_mcv, .{ .register = reg }, ptr_ty); + }, .ptr_stack_offset => |off| { try self.setRegOrMem(elem_ty, dst_mcv, .{ .stack_offset = off }); }, @@ -1787,9 +1791,6 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo const reg = try self.copyToTmpRegister(ptr_ty, ptr); try self.load(dst_mcv, .{ .register = reg }, ptr_ty); }, - .stack_offset => { - return self.fail("TODO implement loading from MCValue.stack_offset", .{}); - }, } } @@ -1832,6 +1833,10 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type .immediate => |imm| { try self.setRegOrMem(value_ty, .{ .memory = imm }, value); }, + .stack_offset => { + const reg = try self.copyToTmpRegister(ptr_ty, ptr); + try self.store(.{ .register = reg }, value, ptr_ty, value_ty); + }, .ptr_stack_offset => |off| { try self.genSetStack(value_ty, off, value); }, @@ -1909,6 +1914,10 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type .data = .{ .imm = 0 }, }); }, + .stack_offset => { + const tmp_reg = try self.copyToTmpRegister(value_ty, value); + return self.store(ptr, .{ .register = tmp_reg }, ptr_ty, value_ty); + }, else => |other| { return self.fail("TODO implement set pointee with {}", .{other}); }, @@ -2020,9 +2029,6 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type else => return self.fail("TODO implement storing {} to MCValue.memory", .{value}), } }, - .stack_offset => { - return self.fail("TODO implement storing to MCValue.stack_offset", .{}); - }, } } @@ -2452,7 +2458,18 @@ fn genIMulOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) ! return self.genIMulOpMir(dst_ty, dst_mcv, MCValue{ .register = src_reg }); } }, - .embedded_in_code, .memory, .stack_offset => { + .stack_offset => |off| { + _ = try self.addInst(.{ + .tag = .imul_complex, + .ops = (Mir.Ops{ + .reg1 = dst_reg, + .reg2 = .rbp, + .flags = 0b01, + }).encode(), + .data = .{ .imm = @bitCast(u32, -off) }, + }); + }, + .embedded_in_code, .memory => { return self.fail("TODO implement x86 multiply source memory", .{}); }, .got_load, .direct_load => { @@ -3520,6 +3537,13 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE .dead => unreachable, .ptr_embedded_in_code => unreachable, .unreach, .none => return, + .undef => { + if (abi_size <= 8) { + const reg = try self.copyToTmpRegister(ty, mcv); + return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg }); + } + try self.genInlineMemset(stack_offset, .rsp, ty, .{ .immediate = 0xaa }); + }, .compare_flags_unsigned, .compare_flags_signed, => { @@ -3598,7 +3622,6 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE try self.genInlineMemcpy(stack_offset, .rsp, ty, mcv); }, - else => return self.fail("TODO implement args on stack for {}", .{mcv}), } } @@ -3617,7 +3640,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerErro 2 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaa }), 4 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaaaaaa }), 8 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaaaaaaaaaaaaaa }), - else => return self.genInlineMemset(ty, stack_offset, .{ .immediate = 0xaa }), + else => return self.genInlineMemset(stack_offset, .rbp, ty, .{ .immediate = 0xaa }), } }, .compare_flags_unsigned, @@ -3943,12 +3966,20 @@ fn genInlineMemcpy(self: *Self, stack_offset: i32, stack_reg: Register, ty: Type try self.performReloc(loop_reloc); } -fn genInlineMemset(self: *Self, ty: Type, stack_offset: i32, value: MCValue) InnerError!void { +fn genInlineMemset( + self: *Self, + stack_offset: i32, + stack_register: Register, + ty: Type, + value: MCValue, +) InnerError!void { try self.register_manager.getReg(.rax, null); + const abi_size = ty.abiSize(self.target.*); if (stack_offset > 128) { return self.fail("TODO inline memset with large stack offset", .{}); } + const negative_offset = @bitCast(u32, -stack_offset); // We are actually counting `abi_size` bytes; however, we reuse the index register @@ -4005,7 +4036,7 @@ fn genInlineMemset(self: *Self, ty: Type, stack_offset: i32, value: MCValue) Inn _ = try self.addInst(.{ .tag = .mov_mem_index_imm, .ops = (Mir.Ops{ - .reg1 = .rbp, + .reg1 = stack_register.to64(), }).encode(), .data = .{ .payload = payload }, }); @@ -4731,20 +4762,40 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { var next_int_reg: usize = 0; var by_reg = std.AutoHashMap(usize, usize).init(self.bin_file.allocator); defer by_reg.deinit(); - for (param_types) |ty, i| { - if (!ty.hasRuntimeBits()) continue; - const param_size = @intCast(u32, ty.abiSize(self.target.*)); - const pass_in_reg = switch (ty.zigTypeTag()) { - .Bool => true, - .Int, .Enum => param_size <= 8, - .Pointer => ty.ptrSize() != .Slice, - .Optional => ty.isPtrLikeOptional(), - else => false, - }; - if (pass_in_reg) { - if (next_int_reg >= c_abi_int_param_regs.len) break; - try by_reg.putNoClobber(i, next_int_reg); - next_int_reg += 1; + + // If we want debug output, we store all args on stack for better liveness of args + // in debugging contexts such as previewing the args in the debugger anywhere in + // the procedure. Passing the args via registers can lead to reusing the register + // for local ops thus clobbering the input arg forever. + // This of course excludes C ABI calls. + const omit_args_in_registers = blk: { + if (cc == .C) break :blk false; + switch (self.bin_file.options.optimize_mode) { + .Debug => break :blk true, + else => break :blk false, + } + }; + if (!omit_args_in_registers) { + for (param_types) |ty, i| { + if (!ty.hasRuntimeBits()) continue; + const param_size = @intCast(u32, ty.abiSize(self.target.*)); + // For simplicity of codegen, slices and other types are always pushed onto the stack. + // TODO: look into optimizing this by passing things as registers sometimes, + // such as ptr and len of slices as separate registers. + // TODO: also we need to honor the C ABI for relevant types rather than passing on + // the stack here. + const pass_in_reg = switch (ty.zigTypeTag()) { + .Bool => true, + .Int, .Enum => param_size <= 8, + .Pointer => ty.ptrSize() != .Slice, + .Optional => ty.isPtrLikeOptional(), + else => false, + }; + if (pass_in_reg) { + if (next_int_reg >= c_abi_int_param_regs.len) break; + try by_reg.putNoClobber(i, next_int_reg); + next_int_reg += 1; + } } } @@ -4765,11 +4816,6 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { result.args[i] = .{ .register = aliased_reg }; next_int_reg += 1; } else { - // For simplicity of codegen, slices and other types are always pushed onto the stack. - // TODO: look into optimizing this by passing things as registers sometimes, - // such as ptr and len of slices as separate registers. - // TODO: also we need to honor the C ABI for relevant types rather than passing on - // the stack here. const offset = mem.alignForwardGeneric(u32, next_stack_offset + param_size, param_align); result.args[i] = .{ .stack_offset = @intCast(i32, offset) }; next_stack_offset = offset; diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index 32877cb6f8..5ad3508aa2 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -691,11 +691,26 @@ fn mirIMulComplex(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { 0b00 => { return lowerToRmEnc(.imul, ops.reg1, RegisterOrMemory.reg(ops.reg2), emit.code); }, + 0b01 => { + const imm = emit.mir.instructions.items(.data)[inst].imm; + const src_reg: ?Register = if (ops.reg2 == .none) null else ops.reg2; + return lowerToRmEnc(.imul, ops.reg1, RegisterOrMemory.mem(.qword_ptr, .{ + .disp = imm, + .base = src_reg, + }), emit.code); + }, 0b10 => { const imm = emit.mir.instructions.items(.data)[inst].imm; return lowerToRmiEnc(.imul, ops.reg1, RegisterOrMemory.reg(ops.reg2), imm, emit.code); }, - else => return emit.fail("TODO implement imul", .{}), + 0b11 => { + const payload = emit.mir.instructions.items(.data)[inst].payload; + const imm_pair = emit.mir.extraData(Mir.ImmPair, payload).data; + return lowerToRmiEnc(.imul, ops.reg1, RegisterOrMemory.mem(.qword_ptr, .{ + .disp = imm_pair.dest_off, + .base = ops.reg2, + }), imm_pair.operand, emit.code); + }, } } From 085c606b8739fac707e842069010661ed2ec8249 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 17 Feb 2022 17:32:23 +0100 Subject: [PATCH 2/7] x64: implement slice_elem_ptr --- src/arch/x86_64/CodeGen.zig | 87 ++++++++++++++++++++----------------- test/behavior/slice.zig | 3 -- 2 files changed, 48 insertions(+), 42 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 9e8866e1e1..7b96ea4baa 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1500,52 +1500,60 @@ fn elemOffset(self: *Self, index_ty: Type, index: MCValue, elem_size: u64) !Regi return reg; } +fn genSliceElemPtr(self: *Self, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue { + const slice_ty = self.air.typeOf(lhs); + const slice_mcv = try self.resolveInst(lhs); + slice_mcv.freezeIfRegister(&self.register_manager); + defer slice_mcv.unfreezeIfRegister(&self.register_manager); + + const elem_ty = slice_ty.childType(); + const elem_size = elem_ty.abiSize(self.target.*); + var buf: Type.SlicePtrFieldTypeBuffer = undefined; + const slice_ptr_field_type = slice_ty.slicePtrFieldType(&buf); + + const index_ty = self.air.typeOf(rhs); + const index_mcv = try self.resolveInst(rhs); + index_mcv.freezeIfRegister(&self.register_manager); + defer index_mcv.unfreezeIfRegister(&self.register_manager); + + const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_size); + self.register_manager.freezeRegs(&.{offset_reg}); + defer self.register_manager.unfreezeRegs(&.{offset_reg}); + + const addr_reg = try self.register_manager.allocReg(null); + switch (slice_mcv) { + .stack_offset => |off| { + // mov reg, [rbp - 8] + _ = try self.addInst(.{ + .tag = .mov, + .ops = (Mir.Ops{ + .reg1 = addr_reg.to64(), + .reg2 = .rbp, + .flags = 0b01, + }).encode(), + .data = .{ .imm = @bitCast(u32, -@intCast(i32, off)) }, + }); + }, + else => return self.fail("TODO implement slice_elem_ptr when slice is {}", .{slice_mcv}), + } + // TODO we could allocate register here, but need to expect addr register and potentially + // offset register. + try self.genBinMathOpMir(.add, slice_ptr_field_type, .{ .register = addr_reg.to64() }, .{ + .register = offset_reg.to64(), + }); + return MCValue{ .register = addr_reg.to64() }; +} + fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void { const is_volatile = false; // TODO const bin_op = self.air.instructions.items(.data)[inst].bin_op; const result: MCValue = if (!is_volatile and self.liveness.isUnused(inst)) .dead else result: { const slice_ty = self.air.typeOf(bin_op.lhs); - const slice_mcv = try self.resolveInst(bin_op.lhs); - slice_mcv.freezeIfRegister(&self.register_manager); - defer slice_mcv.unfreezeIfRegister(&self.register_manager); - - const elem_ty = slice_ty.childType(); - const elem_size = elem_ty.abiSize(self.target.*); var buf: Type.SlicePtrFieldTypeBuffer = undefined; const slice_ptr_field_type = slice_ty.slicePtrFieldType(&buf); - - const index_ty = self.air.typeOf(bin_op.rhs); - const index_mcv = try self.resolveInst(bin_op.rhs); - index_mcv.freezeIfRegister(&self.register_manager); - defer index_mcv.unfreezeIfRegister(&self.register_manager); - - const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_size); - self.register_manager.freezeRegs(&.{offset_reg}); - defer self.register_manager.unfreezeRegs(&.{offset_reg}); - - const addr_reg = try self.register_manager.allocReg(null); - switch (slice_mcv) { - .stack_offset => |off| { - // mov reg, [rbp - 8] - _ = try self.addInst(.{ - .tag = .mov, - .ops = (Mir.Ops{ - .reg1 = addr_reg.to64(), - .reg2 = .rbp, - .flags = 0b01, - }).encode(), - .data = .{ .imm = @bitCast(u32, -@intCast(i32, off)) }, - }); - }, - else => return self.fail("TODO implement slice_elem_val when slice is {}", .{slice_mcv}), - } - // TODO we could allocate register here, but need to expect addr register and potentially - // offset register. + const elem_ptr = try self.genSliceElemPtr(bin_op.lhs, bin_op.rhs); const dst_mcv = try self.allocRegOrMem(inst, false); - try self.genBinMathOpMir(.add, slice_ptr_field_type, .{ .register = addr_reg.to64() }, .{ - .register = offset_reg.to64(), - }); - try self.load(dst_mcv, .{ .register = addr_reg.to64() }, slice_ptr_field_type); + try self.load(dst_mcv, elem_ptr, slice_ptr_field_type); break :result dst_mcv; }; return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); @@ -1557,7 +1565,7 @@ fn airSliceElemPtr(self: *Self, inst: Air.Inst.Index) !void { const result: MCValue = if (self.liveness.isUnused(inst)) .dead else - return self.fail("TODO implement slice_elem_ptr for {}", .{self.target.cpu.arch}); + try self.genSliceElemPtr(extra.lhs, extra.rhs); return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, .none }); } @@ -1571,6 +1579,7 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { const elem_ty = array_ty.childType(); const elem_abi_size = elem_ty.abiSize(self.target.*); + const index_ty = self.air.typeOf(bin_op.rhs); const index = try self.resolveInst(bin_op.rhs); index.freezeIfRegister(&self.register_manager); diff --git a/test/behavior/slice.zig b/test/behavior/slice.zig index e64e82d474..08ab0edf48 100644 --- a/test/behavior/slice.zig +++ b/test/behavior/slice.zig @@ -261,7 +261,6 @@ fn sliceFromLenToLen(a_slice: []u8, start: usize, end: usize) []u8 { test "C pointer" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; var buf: [*c]const u8 = "kjdhfkjdhfdkjhfkfjhdfkjdhfkdjhfdkjhf"; var len: u32 = 10; @@ -302,7 +301,6 @@ fn sliceSum(comptime q: []const u8) i32 { test "slice type with custom alignment" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; const LazilyResolvedType = struct { anything: i32, @@ -317,7 +315,6 @@ test "slice type with custom alignment" { test "obtaining a null terminated slice" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO // here we have a normal array From abfaf8382b58d8bdfa029b74205d385cbc73d78e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 17 Feb 2022 17:54:37 +0100 Subject: [PATCH 3/7] x64: implement array_elem_val when array fits in register --- src/arch/x86_64/CodeGen.zig | 46 +++++++++++++++++++++++++------------ test/behavior/array.zig | 20 +++++----------- 2 files changed, 37 insertions(+), 29 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 7b96ea4baa..cc3953e57b 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1589,21 +1589,37 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { self.register_manager.freezeRegs(&.{offset_reg}); defer self.register_manager.unfreezeRegs(&.{offset_reg}); - const addr_reg = try self.register_manager.allocReg(null); - switch (array) { - .stack_offset => |off| { - // lea reg, [rbp] - _ = try self.addInst(.{ - .tag = .lea, - .ops = (Mir.Ops{ - .reg1 = addr_reg.to64(), - .reg2 = .rbp, - }).encode(), - .data = .{ .imm = @bitCast(u32, -off) }, - }); - }, - else => return self.fail("TODO implement array_elem_val when array is {}", .{array}), - } + const addr_reg = blk: { + const off = inner: { + switch (array) { + .register => { + const off = @intCast(i32, try self.allocMem( + inst, + @intCast(u32, array_ty.abiSize(self.target.*)), + array_ty.abiAlignment(self.target.*), + )); + try self.genSetStack(array_ty, off, array); + break :inner off; + }, + .stack_offset => |off| { + break :inner off; + }, + else => return self.fail("TODO implement array_elem_val when array is {}", .{array}), + } + }; + const addr_reg = try self.register_manager.allocReg(null); + // lea reg, [rbp] + _ = try self.addInst(.{ + .tag = .lea, + .ops = (Mir.Ops{ + .reg1 = addr_reg.to64(), + .reg2 = .rbp, + }).encode(), + .data = .{ .imm = @bitCast(u32, -off) }, + }); + break :blk addr_reg.to64(); + }; + // TODO we could allocate register here, but need to expect addr register and potentially // offset register. const dst_mcv = try self.allocRegOrMem(inst, false); diff --git a/test/behavior/array.zig b/test/behavior/array.zig index 7828963a1c..b4a4c37d95 100644 --- a/test/behavior/array.zig +++ b/test/behavior/array.zig @@ -8,7 +8,6 @@ const expectEqual = testing.expectEqual; test "array to slice" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; const a: u32 align(4) = 3; const b: u32 align(8) = 4; @@ -62,7 +61,7 @@ test "array init with mult" { test "array literal with explicit type" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 or builtin.zig_backend == .stage2_arm) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; const hex_mult: [4]u16 = .{ 4096, 256, 16, 1 }; @@ -71,7 +70,7 @@ test "array literal with explicit type" { } test "array literal with inferred length" { - if (builtin.zig_backend == .stage2_x86_64 or builtin.zig_backend == .stage2_arm) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; const hex_mult = [_]u16{ 4096, 256, 16, 1 }; @@ -92,7 +91,7 @@ const some_array = [_]u8{ 0, 1, 2, 3 }; test "array literal with specified size" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 or builtin.zig_backend == .stage2_arm) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; var array = [2]u8{ 1, 2 }; try expect(array[0] == 1); @@ -101,7 +100,7 @@ test "array literal with specified size" { test "array len field" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 or builtin.zig_backend == .stage2_arm) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; var arr = [4]u8{ 0, 0, 0, 0 }; var ptr = &arr; @@ -143,7 +142,7 @@ test "array with sentinels" { test "void arrays" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 or builtin.zig_backend == .stage2_arm) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; var array: [4]void = undefined; array[0] = void{}; @@ -222,7 +221,7 @@ test "implicit cast zero sized array ptr to slice" { test "anonymous list literal syntax" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 or builtin.zig_backend == .stage2_arm) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; const S = struct { fn doTheTest() !void { @@ -282,7 +281,6 @@ test "read/write through global variable array of struct fields initialized via test "implicit cast single-item pointer" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO try testImplicitCastSingleItemPtr(); @@ -303,7 +301,6 @@ fn testArrayByValAtComptime(b: [2]u8) u8 { test "comptime evaluating function that takes array by value" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO const arr = [_]u8{ 1, 2 }; @@ -316,7 +313,6 @@ test "comptime evaluating function that takes array by value" { test "runtime initialize array elem and then implicit cast to slice" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO var two: i32 = 2; @@ -327,7 +323,6 @@ test "runtime initialize array elem and then implicit cast to slice" { test "array literal as argument to function" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO const S = struct { @@ -418,7 +413,6 @@ test "double nested array to const slice cast in array literal" { test "anonymous literal in array" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO const S = struct { @@ -444,7 +438,6 @@ test "anonymous literal in array" { test "access the null element of a null terminated array" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO const S = struct { @@ -462,7 +455,6 @@ test "access the null element of a null terminated array" { test "type deduction for array subscript expression" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO const S = struct { From 97c25fb8d049ebcced9f29241516c51e480fb8a0 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 17 Feb 2022 18:10:02 +0100 Subject: [PATCH 4/7] x64: implement array_elem_val when array is stored in memory --- src/arch/x86_64/CodeGen.zig | 106 ++++++++++++++++-------------------- test/behavior/array.zig | 8 +-- 2 files changed, 50 insertions(+), 64 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index cc3953e57b..6b27627825 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1604,6 +1604,12 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { .stack_offset => |off| { break :inner off; }, + .memory, + .got_load, + .direct_load, + => { + break :blk try self.loadMemPtrIntoRegister(Type.usize, array); + }, else => return self.fail("TODO implement array_elem_val when array is {}", .{array}), } }; @@ -1845,6 +1851,42 @@ fn airLoad(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } +fn loadMemPtrIntoRegister(self: *Self, ptr_ty: Type, ptr: MCValue) InnerError!Register { + switch (ptr) { + .got_load, + .direct_load, + => |sym_index| { + const flags: u2 = switch (ptr) { + .got_load => 0b00, + .direct_load => 0b01, + else => unreachable, + }; + const reg = try self.register_manager.allocReg(null); + _ = try self.addInst(.{ + .tag = .lea_pie, + .ops = (Mir.Ops{ + .reg1 = reg.to64(), + .flags = flags, + }).encode(), + .data = .{ + .load_reloc = .{ + .atom_index = self.mod_fn.owner_decl.link.macho.local_sym_index, + .sym_index = sym_index, + }, + }, + }); + return reg.to64(); + }, + .memory => |addr| { + // TODO: in case the address fits in an imm32 we can use [ds:imm32] + // instead of wasting an instruction copying the address to a register + const reg = try self.copyToTmpRegister(ptr_ty, .{ .immediate = addr }); + return reg.to64(); + }, + else => unreachable, + } +} + fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type) InnerError!void { _ = ptr_ty; const abi_size = value_ty.abiSize(self.target.*); @@ -1955,41 +1997,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type value.freezeIfRegister(&self.register_manager); defer value.unfreezeIfRegister(&self.register_manager); - const addr_reg: Register = blk: { - switch (ptr) { - .got_load, - .direct_load, - => |sym_index| { - const flags: u2 = switch (ptr) { - .got_load => 0b00, - .direct_load => 0b01, - else => unreachable, - }; - const addr_reg = try self.register_manager.allocReg(null); - _ = try self.addInst(.{ - .tag = .lea_pie, - .ops = (Mir.Ops{ - .reg1 = addr_reg.to64(), - .flags = flags, - }).encode(), - .data = .{ - .load_reloc = .{ - .atom_index = self.mod_fn.owner_decl.link.macho.local_sym_index, - .sym_index = sym_index, - }, - }, - }); - break :blk addr_reg; - }, - .memory => |addr| { - // TODO: in case the address fits in an imm32 we can use [ds:imm32] - // instead of wasting an instruction copying the address to a register - const addr_reg = try self.copyToTmpRegister(ptr_ty, .{ .immediate = addr }); - break :blk addr_reg; - }, - else => unreachable, - } - }; + const addr_reg = try self.loadMemPtrIntoRegister(ptr_ty, ptr); // to get the actual address of the value we want to modify we have to go through the GOT // mov reg, [reg] @@ -3831,33 +3839,11 @@ fn genInlineMemcpy(self: *Self, stack_offset: i32, stack_reg: Register, ty: Type const addr_reg: Register = blk: { switch (val) { - .memory => |addr| { - const reg = try self.copyToTmpRegister(Type.usize, .{ .immediate = addr }); - break :blk reg; - }, + .memory, .direct_load, .got_load, - => |sym_index| { - const flags: u2 = switch (val) { - .got_load => 0b00, - .direct_load => 0b01, - else => unreachable, - }; - const addr_reg = (try self.register_manager.allocReg(null)).to64(); - _ = try self.addInst(.{ - .tag = .lea_pie, - .ops = (Mir.Ops{ - .reg1 = addr_reg, - .flags = flags, - }).encode(), - .data = .{ - .load_reloc = .{ - .atom_index = self.mod_fn.owner_decl.link.macho.local_sym_index, - .sym_index = sym_index, - }, - }, - }); - break :blk addr_reg; + => { + break :blk try self.loadMemPtrIntoRegister(Type.usize, val); }, .stack_offset => |off| { const addr_reg = (try self.register_manager.allocReg(null)).to64(); diff --git a/test/behavior/array.zig b/test/behavior/array.zig index b4a4c37d95..ba478fef93 100644 --- a/test/behavior/array.zig +++ b/test/behavior/array.zig @@ -153,7 +153,7 @@ test "void arrays" { test "nested arrays" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 or builtin.zig_backend == .stage2_arm) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; const array_of_strings = [_][]const u8{ "hello", "this", "is", "my", "thing" }; for (array_of_strings) |s, i| { @@ -525,8 +525,8 @@ test "zero-sized array with recursive type definition" { test "type coercion of anon struct literal to array" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO const S = struct { const U = union { @@ -543,8 +543,8 @@ test "type coercion of anon struct literal to array" { try expect(arr1[1] == 56); try expect(arr1[2] == 54); - if (@import("builtin").zig_backend == .stage2_llvm) return error.SkipZigTest; // TODO - if (@import("builtin").zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_llvm) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO var x2: U = .{ .a = 42 }; const t2 = .{ x2, .{ .b = true }, .{ .c = "hello" } }; From 83744b92a1d1a923fb3faf3aca9d0f57b09cb97d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 17 Feb 2022 22:49:01 +0100 Subject: [PATCH 5/7] x64: fix wrong regalloc with inst tracking in airCmp We return compare flags rather than a register which than wrongly cheats the regalloc into thinking we carry the instruction in the register which we do not. --- src/arch/x86_64/CodeGen.zig | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 6b27627825..2163cfd0bd 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1724,7 +1724,13 @@ fn airUnaryMath(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ un_op, .none, .none }); } -fn reuseOperand(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, op_index: Liveness.OperandInt, mcv: MCValue) bool { +fn reuseOperand( + self: *Self, + inst: Air.Inst.Index, + operand: Air.Inst.Ref, + op_index: Liveness.OperandInt, + mcv: MCValue, +) bool { if (!self.liveness.operandDies(inst, op_index)) return false; @@ -2267,13 +2273,14 @@ fn genBinMathOp(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_rhs: // A potential opportunity for future optimization here would be keeping track // of the fact that the instruction is available both as an immediate // and as a register. + // TODO consolidate with limitImmediateType() function switch (src_mcv) { .immediate => |imm| { if (imm > math.maxInt(u31)) { dst_mcv.freezeIfRegister(&self.register_manager); defer dst_mcv.unfreezeIfRegister(&self.register_manager); - src_mcv = try self.copyToNewRegister(inst, Type.u64, src_mcv); + src_mcv = MCValue{ .register = try self.copyToTmpRegister(Type.usize, src_mcv) }; } }, else => {}, @@ -2907,7 +2914,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { // Either one, but not both, can be a memory operand. // Source operand can be an immediate, 8 bits or 32 bits. const dst_mcv = if (lhs.isImmediate() or (lhs.isMemory() and rhs.isMemory())) - try self.copyToNewRegister(inst, ty, lhs) + MCValue{ .register = try self.copyToTmpRegister(ty, lhs) } else lhs; // This instruction supports only signed 32-bit immediates at most. From d74e9b2d98d00b0e9ae0196c0bb3272b2de2b52e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 17 Feb 2022 23:09:44 +0100 Subject: [PATCH 6/7] x64: rename copyToNewRegister to copyToRegisterWithInstTracking for impr clarity --- src/arch/x86_64/CodeGen.zig | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 2163cfd0bd..7b2adcce10 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -883,7 +883,8 @@ fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { /// Allocates a new register and copies `mcv` into it. /// `reg_owner` is the instruction that gets associated with the register in the register table. /// This can have a side effect of spilling instructions to the stack to free up a register. -fn copyToNewRegister(self: *Self, reg_owner: Air.Inst.Index, ty: Type, mcv: MCValue) !MCValue { +/// WARNING make sure that the allocated register matches the returned MCValue from an instruction! +fn copyToRegisterWithInstTracking(self: *Self, reg_owner: Air.Inst.Index, ty: Type, mcv: MCValue) !MCValue { const reg = try self.register_manager.allocReg(reg_owner); try self.genSetReg(ty, reg, mcv); return MCValue{ .register = reg }; @@ -939,7 +940,7 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { operand.freezeIfRegister(&self.register_manager); defer operand.unfreezeIfRegister(&self.register_manager); - break :blk try self.copyToNewRegister(inst, dest_ty, operand); + break :blk try self.copyToRegisterWithInstTracking(inst, dest_ty, operand); }; return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); @@ -970,7 +971,7 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { break :blk operand.register.to64(); } } - const mcv = try self.copyToNewRegister(inst, src_ty, operand); + const mcv = try self.copyToRegisterWithInstTracking(inst, src_ty, operand); break :blk mcv.register.to64(); }; @@ -1088,7 +1089,7 @@ fn genPtrBinMathOp(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_r if (self.reuseOperand(inst, op_lhs, 0, ptr)) { if (ptr.isMemory() or ptr.isRegister()) break :blk ptr; } - break :blk try self.copyToNewRegister(inst, dst_ty, ptr); + break :blk try self.copyToRegisterWithInstTracking(inst, dst_ty, ptr); }; const offset_mcv = blk: { @@ -1338,7 +1339,7 @@ fn airOptionalPayload(self: *Self, inst: Air.Inst.Index) !void { if (self.reuseOperand(inst, ty_op.operand, 0, operand)) { break :result operand; } - break :result try self.copyToNewRegister(inst, self.air.typeOfIndex(inst), operand); + break :result try self.copyToRegisterWithInstTracking(inst, self.air.typeOfIndex(inst), operand); }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } @@ -1666,7 +1667,7 @@ fn airPtrElemPtr(self: *Self, inst: Air.Inst.Index) !void { self.register_manager.freezeRegs(&.{offset_reg}); defer self.register_manager.unfreezeRegs(&.{offset_reg}); - const dst_mcv = try self.copyToNewRegister(inst, ptr_ty, ptr); + const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ptr_ty, ptr); try self.genBinMathOpMir(.add, ptr_ty, dst_mcv, .{ .register = offset_reg }); break :result dst_mcv; }; @@ -2113,7 +2114,7 @@ fn structFieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, inde self.register_manager.freezeRegs(&.{offset_reg}); defer self.register_manager.unfreezeRegs(&.{offset_reg}); - const dst_mcv = try self.copyToNewRegister(inst, ptr_ty, mcv); + const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ptr_ty, mcv); try self.genBinMathOpMir(.add, ptr_ty, dst_mcv, .{ .register = offset_reg }); break :result dst_mcv; }, @@ -2174,7 +2175,9 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { if (self.reuseOperand(inst, operand, 0, mcv)) { break :blk mcv; } else { - const dst_mcv = try self.copyToNewRegister(inst, Type.usize, .{ .register = reg.to64() }); + const dst_mcv = try self.copyToRegisterWithInstTracking(inst, Type.usize, .{ + .register = reg.to64(), + }); break :blk dst_mcv; } }; @@ -2237,7 +2240,7 @@ fn genBinMathOp(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_rhs: // LHS dies; use it as the destination. // Both operands cannot be memory. if (lhs.isMemory() and rhs.isMemory()) { - dst_mcv = try self.copyToNewRegister(inst, dst_ty, lhs); + dst_mcv = try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs); src_mcv = rhs; } else { dst_mcv = lhs; @@ -2247,7 +2250,7 @@ fn genBinMathOp(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_rhs: // RHS dies; use it as the destination. // Both operands cannot be memory. if (lhs.isMemory() and rhs.isMemory()) { - dst_mcv = try self.copyToNewRegister(inst, dst_ty, rhs); + dst_mcv = try self.copyToRegisterWithInstTracking(inst, dst_ty, rhs); src_mcv = lhs; } else { dst_mcv = rhs; @@ -2258,13 +2261,13 @@ fn genBinMathOp(self: *Self, inst: Air.Inst.Index, op_lhs: Air.Inst.Ref, op_rhs: rhs.freezeIfRegister(&self.register_manager); defer rhs.unfreezeIfRegister(&self.register_manager); - dst_mcv = try self.copyToNewRegister(inst, dst_ty, lhs); + dst_mcv = try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs); src_mcv = rhs; } else { lhs.freezeIfRegister(&self.register_manager); defer lhs.unfreezeIfRegister(&self.register_manager); - dst_mcv = try self.copyToNewRegister(inst, dst_ty, rhs); + dst_mcv = try self.copyToRegisterWithInstTracking(inst, dst_ty, rhs); src_mcv = lhs; } } @@ -2837,7 +2840,11 @@ fn airCall(self: *Self, inst: Air.Inst.Index) !void { .register => |reg| { if (Register.allocIndex(reg) == null) { // Save function return value in a callee saved register - break :result try self.copyToNewRegister(inst, self.air.typeOfIndex(inst), info.return_value); + break :result try self.copyToRegisterWithInstTracking( + inst, + self.air.typeOfIndex(inst), + info.return_value, + ); } }, else => {}, From 0f0bb7e5ea2aa4216dcbec57086d2d5c7a84625e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 18 Feb 2022 13:02:08 +0100 Subject: [PATCH 7/7] x64: ensure 16byte stack alignment across calls --- src/arch/x86_64/CodeGen.zig | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 7b2adcce10..bf49aedd2d 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2581,16 +2581,10 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void { self.arg_index += 1; const mcv = self.args[arg_index]; - const max_stack = loop: for (self.args) |arg| { - switch (arg) { - .stack_offset => |last| break :loop last, - else => {}, - } - } else 0; const payload = try self.addExtra(Mir.ArgDbgInfo{ .air_inst = inst, .arg_index = arg_index, - .max_stack = @intCast(u32, max_stack), + .max_stack = self.max_end_stack, }); _ = try self.addInst(.{ .tag = .arg_dbg_info, @@ -2607,7 +2601,7 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void { break :blk mcv; }, .stack_offset => |off| { - const offset = max_stack - off + 16; + const offset = @intCast(i32, self.max_end_stack) - off + 16; break :blk MCValue{ .stack_offset = -offset }; }, else => return self.fail("TODO implement arg for {}", .{mcv}), @@ -2651,7 +2645,6 @@ fn airCall(self: *Self, inst: Air.Inst.Index) !void { var info = try self.resolveCallingConventionValues(fn_ty); defer info.deinit(self); - var stack_adjustment: ?u32 = null; for (args) |arg, arg_i| { const mc_arg = info.args[arg_i]; const arg_ty = self.air.typeOf(arg); @@ -2666,9 +2659,6 @@ fn airCall(self: *Self, inst: Air.Inst.Index) !void { }, .stack_offset => |off| { try self.genSetStackArg(arg_ty, off, arg_mcv); - if (stack_adjustment == null) { - stack_adjustment = @intCast(u32, off); - } }, .ptr_stack_offset => { return self.fail("TODO implement calling with MCValue.ptr_stack_offset arg", .{}); @@ -2689,14 +2679,14 @@ fn airCall(self: *Self, inst: Air.Inst.Index) !void { } } - if (stack_adjustment) |off| { + if (info.stack_byte_count > 0) { // Adjust the stack _ = try self.addInst(.{ .tag = .sub, .ops = (Mir.Ops{ .reg1 = .rsp, }).encode(), - .data = .{ .imm = off }, + .data = .{ .imm = info.stack_byte_count }, }); } @@ -2824,14 +2814,14 @@ fn airCall(self: *Self, inst: Air.Inst.Index) !void { } } else unreachable; - if (stack_adjustment) |off| { + if (info.stack_byte_count > 0) { // Readjust the stack _ = try self.addInst(.{ .tag = .add, .ops = (Mir.Ops{ .reg1 = .rsp, }).encode(), - .data = .{ .imm = off }, + .data = .{ .imm = info.stack_byte_count }, }); } @@ -4847,8 +4837,8 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { } } - result.stack_byte_count = next_stack_offset; result.stack_align = 16; + result.stack_byte_count = mem.alignForwardGeneric(u32, next_stack_offset, result.stack_align); }, else => return self.fail("TODO implement function parameters for {} on x86_64", .{cc}), }