diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 6880d6dbf3..be88dc67d8 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -328,6 +328,19 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { self.free_registers |= @as(FreeRegInt, 1) << shift; } + /// Before calling, must ensureCapacity + 1 on branch.registers. + /// Returns `null` if all registers are allocated. + fn allocReg(self: *Branch, inst: *ir.Inst) ?Register { + const free_index = @ctz(FreeRegInt, self.free_registers); + if (free_index >= callee_preserved_regs.len) { + return null; + } + self.free_registers &= ~(@as(FreeRegInt, 1) << free_index); + const reg = callee_preserved_regs[free_index]; + self.registers.putAssumeCapacityNoClobber(reg, .{ .inst = inst }); + return reg; + } + fn deinit(self: *Branch, gpa: *Allocator) void { self.inst_table.deinit(gpa); self.registers.deinit(gpa); @@ -502,8 +515,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { entry.value = .dead; switch (prev_value) { .register => |reg| { - _ = branch.registers.remove(reg); - branch.markRegFree(reg); + const reg64 = reg.to64(); + _ = branch.registers.remove(reg64); + branch.markRegFree(reg64); }, else => {}, // TODO process stack allocation death } @@ -582,30 +596,26 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { self.stack_align = abi_align; const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; - // TODO Make sure the type can fit in a register before we try to allocate one. - const free_index = @ctz(FreeRegInt, branch.free_registers); - if (free_index >= callee_preserved_regs.len) { - const stack_offset = try self.allocMem(inst, abi_size, abi_align); - return MCValue{ .stack_offset = stack_offset }; + // Make sure the type can fit in a register before we try to allocate one. + const ptr_bits = arch.ptrBitWidth(); + const ptr_bytes: u64 = @divExact(ptr_bits, 8); + if (abi_size <= ptr_bytes) { + try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1); + if (branch.allocReg(inst)) |reg| { + return MCValue{ .register = registerAlias(reg, abi_size) }; + } } - branch.free_registers &= ~(@as(FreeRegInt, 1) << free_index); - const reg = callee_preserved_regs[free_index]; - try branch.registers.putNoClobber(self.gpa, reg, .{ .inst = inst }); - return MCValue{ .register = reg }; + const stack_offset = try self.allocMem(inst, abi_size, abi_align); + return MCValue{ .stack_offset = stack_offset }; } /// Does not "move" the instruction. fn copyToNewRegister(self: *Self, inst: *ir.Inst) !MCValue { const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1); - try branch.inst_table.ensureCapacity(self.gpa, branch.inst_table.items().len + 1); - const free_index = @ctz(FreeRegInt, branch.free_registers); - if (free_index >= callee_preserved_regs.len) + const reg = branch.allocReg(inst) orelse return self.fail(inst.src, "TODO implement spilling register to stack", .{}); - branch.free_registers &= ~(@as(FreeRegInt, 1) << free_index); - const reg = callee_preserved_regs[free_index]; - branch.registers.putAssumeCapacityNoClobber(reg, .{ .inst = inst }); const old_mcv = branch.inst_table.get(inst).?; const new_mcv: MCValue = .{ .register = reg }; try self.genSetReg(inst.src, reg, old_mcv); @@ -1131,7 +1141,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // test reg, 1 // TODO detect al, ax, eax try self.code.ensureCapacity(self.code.items.len + 4); - self.rex(.{ .b = reg.isExtended(), .w = reg.size() == 64 }); + // TODO audit this codegen: we force w = true here to make + // the value affect the big register + self.rex(.{ .b = reg.isExtended(), .w = true }); self.code.appendSliceAssumeCapacity(&[_]u8{ 0xf6, @as(u8, 0xC0) | (0 << 3) | @truncate(u3, reg.id()), @@ -1319,7 +1331,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { if (!self.wantSafety()) return; // The already existing value will do just fine. // TODO Upgrade this to a memset call when we have that available. - return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaa }); + switch (ty.abiSize(self.target.*)) { + 1 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaa }), + 2 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaa }), + 4 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaa }), + 8 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaaaaaaaaaa }), + else => return self.fail(src, "TODO implement memset", .{}), + } }, .compare_flags_unsigned => |op| { return self.fail(src, "TODO implement set stack variable with compare flags value (unsigned)", .{}); @@ -1328,24 +1346,35 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { return self.fail(src, "TODO implement set stack variable with compare flags value (signed)", .{}); }, .immediate => |x_big| { - if (ty.abiSize(self.target.*) != 4) { - // TODO after fixing this, need to update the undef case above - return self.fail(src, "TODO implement set non 4 abi size stack variable with immediate", .{}); + if (stack_offset > 128) { + return self.fail(src, "TODO implement set stack variable with large stack offset", .{}); } - try self.code.ensureCapacity(self.code.items.len + 7); - if (x_big <= math.maxInt(u32)) { - const x = @intCast(u32, x_big); - if (stack_offset > 128) { - return self.fail(src, "TODO implement set stack variable with large stack offset", .{}); - } - // We have a positive stack offset value but we want a twos complement negative - // offset from rbp, which is at the top of the stack frame. - const negative_offset = @intCast(i8, -@intCast(i32, stack_offset)); - const twos_comp = @bitCast(u8, negative_offset); - // mov DWORD PTR [rbp+offset], immediate - self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp }); - mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x); - } else { + try self.code.ensureCapacity(self.code.items.len + 8); + switch (ty.abiSize(self.target.*)) { + 1 => { + return self.fail(src, "TODO implement set abi_size=1 stack variable with immediate", .{}); + }, + 2 => { + return self.fail(src, "TODO implement set abi_size=2 stack variable with immediate", .{}); + }, + 4 => { + const x = @intCast(u32, x_big); + // We have a positive stack offset value but we want a twos complement negative + // offset from rbp, which is at the top of the stack frame. + const negative_offset = @intCast(i8, -@intCast(i32, stack_offset)); + const twos_comp = @bitCast(u8, negative_offset); + // mov DWORD PTR [rbp+offset], immediate + self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp }); + mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x); + }, + 8 => { + return self.fail(src, "TODO implement set abi_size=8 stack variable with immediate", .{}); + }, + else => { + return self.fail(src, "TODO implement set abi_size=large stack variable with immediate", .{}); + }, + } + if (x_big <= math.maxInt(u32)) {} else { return self.fail(src, "TODO implement set stack variable with large immediate", .{}); } }, @@ -1407,7 +1436,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { }, .compare_flags_unsigned => |op| { try self.code.ensureCapacity(self.code.items.len + 3); - self.rex(.{ .b = reg.isExtended(), .w = reg.size() == 64 }); + // TODO audit this codegen: we force w = true here to make + // the value affect the big register + self.rex(.{ .b = reg.isExtended(), .w = true }); const opcode: u8 = switch (op) { .gte => 0x93, .gt => 0x97, @@ -1423,9 +1454,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { return self.fail(src, "TODO set register with compare flags value (signed)", .{}); }, .immediate => |x| { - if (reg.size() != 64) { - return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); - } // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit // register is the fastest way to zero a register. if (x == 0) { @@ -1478,16 +1506,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // // In this case, the encoding of the REX byte is 0b0100100B try self.code.ensureCapacity(self.code.items.len + 10); - self.rex(.{ .w = true, .b = reg.isExtended() }); + self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() }); self.code.items.len += 9; self.code.items[self.code.items.len - 9] = 0xB8 | @as(u8, reg.id() & 0b111); const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8]; mem.writeIntLittle(u64, imm_ptr, x); }, .embedded_in_code => |code_offset| { - if (reg.size() != 64) { - return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); - } // We need the offset from RIP in a signed i32 twos complement. // The instruction is 7 bytes long and RIP points to the next instruction. try self.code.ensureCapacity(self.code.items.len + 7); @@ -1495,7 +1520,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // but the operation size is unchanged. Since we're using a disp32, we want mode 0 and lower three // bits as five. // REX 0x8D 0b00RRR101, where RRR is the lower three bits of the id. - self.rex(.{ .w = true, .b = reg.isExtended() }); + self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() }); self.code.items.len += 6; const rip = self.code.items.len; const big_offset = @intCast(i64, code_offset) - @intCast(i64, rip); @@ -1507,12 +1532,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { }, .register => |src_reg| { // If the registers are the same, nothing to do. - if (src_reg == reg) + if (src_reg.id() == reg.id()) return; - if (reg.size() != 64) { - return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); - } // This is a variant of 8B /r. Since we're using 64-bit moves, we require a REX. // This is thus three bytes: REX 0x8B R/M. // If the destination is extended, the R field must be 1. @@ -1520,14 +1542,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // Since the register is being accessed directly, the R/M mode is three. The reg field (the middle // three bits) contain the destination, and the R/M field (the lower three bits) contain the source. try self.code.ensureCapacity(self.code.items.len + 3); - self.rex(.{ .w = true, .r = reg.isExtended(), .b = src_reg.isExtended() }); + self.rex(.{ .w = reg.size() == 64, .r = reg.isExtended(), .b = src_reg.isExtended() }); const R = 0xC0 | (@as(u8, reg.id() & 0b111) << 3) | @as(u8, src_reg.id() & 0b111); self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, R }); }, .memory => |x| { - if (reg.size() != 64) { - return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); - } if (x <= math.maxInt(u32)) { // Moving from memory to a register is a variant of `8B /r`. // Since we're using 64-bit moves, we require a REX. @@ -1537,7 +1556,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // 0b00RRR100, where RRR is the lower three bits of the register ID. // The instruction is thus eight bytes; REX 0x8B 0b00RRR100 0x25 followed by a four-byte disp32. try self.code.ensureCapacity(self.code.items.len + 8); - self.rex(.{ .w = true, .b = reg.isExtended() }); + self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() }); self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, 0x04 | (@as(u8, reg.id() & 0b111) << 3), // R @@ -1580,18 +1599,15 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // // Furthermore, if this is an extended register, both B and R must be set in the REX byte, as *both* // register operands need to be marked as extended. - self.rex(.{ .w = true, .b = reg.isExtended(), .r = reg.isExtended() }); + self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended(), .r = reg.isExtended() }); const RM = (@as(u8, reg.id() & 0b111) << 3) | @truncate(u3, reg.id()); self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, RM }); } } }, .stack_offset => |off| { - if (reg.size() != 64) { - return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); - } try self.code.ensureCapacity(self.code.items.len + 7); - self.rex(.{ .w = true, .r = reg.isExtended() }); + self.rex(.{ .w = reg.size() == 64, .r = reg.isExtended() }); const reg_id: u8 = @truncate(u3, reg.id()); if (off <= 128) { // Example: 48 8b 4d 7f mov rcx,QWORD PTR [rbp+0x7f] @@ -1750,11 +1766,16 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { for (param_types) |ty, i| { switch (ty.zigTypeTag()) { .Bool, .Int => { + const param_size = @intCast(u32, ty.abiSize(self.target.*)); if (next_int_reg >= c_abi_int_param_regs.len) { result.args[i] = .{ .stack_offset = next_stack_offset }; - next_stack_offset += @intCast(u32, ty.abiSize(self.target.*)); + next_stack_offset += param_size; } else { - result.args[i] = .{ .register = c_abi_int_param_regs[next_int_reg] }; + const aliased_reg = registerAlias( + c_abi_int_param_regs[next_int_reg], + param_size, + ); + result.args[i] = .{ .register = aliased_reg }; next_int_reg += 1; } }, @@ -1778,7 +1799,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .x86_64 => switch (cc) { .Naked => unreachable, .Unspecified, .C => { - result.return_value = .{ .register = c_abi_int_return_regs[0] }; + const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*)); + const aliased_reg = registerAlias(c_abi_int_return_regs[0], ret_ty_size); + result.return_value = .{ .register = aliased_reg }; }, else => return self.fail(src, "TODO implement function return values for {}", .{cc}), }, @@ -1825,5 +1848,19 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { fn parseRegName(name: []const u8) ?Register { return std.meta.stringToEnum(Register, name); } + + fn registerAlias(reg: Register, size_bytes: u32) Register { + switch (arch) { + // For x86_64 we have to pick a smaller register alias depending on abi size. + .x86_64 => switch (size_bytes) { + 1 => return reg.to8(), + 2 => return reg.to16(), + 4 => return reg.to32(), + 8 => return reg.to64(), + else => unreachable, + }, + else => return reg, + } + } }; } diff --git a/src-self-hosted/codegen/x86_64.zig b/src-self-hosted/codegen/x86_64.zig index f6bad45809..c149613ae9 100644 --- a/src-self-hosted/codegen/x86_64.zig +++ b/src-self-hosted/codegen/x86_64.zig @@ -81,6 +81,26 @@ pub const Register = enum(u8) { else => null, }; } + + /// Convert from any register to its 64 bit alias. + pub fn to64(self: Register) Register { + return @intToEnum(Register, self.id()); + } + + /// Convert from any register to its 32 bit alias. + pub fn to32(self: Register) Register { + return @intToEnum(Register, @as(u8, self.id()) + 16); + } + + /// Convert from any register to its 16 bit alias. + pub fn to16(self: Register) Register { + return @intToEnum(Register, @as(u8, self.id()) + 32); + } + + /// Convert from any register to its 8 bit alias. + pub fn to8(self: Register) Register { + return @intToEnum(Register, @as(u8, self.id()) + 48); + } }; // zig fmt: on diff --git a/test/stage2/compare_output.zig b/test/stage2/compare_output.zig index 2e7c6317b6..bf6a01f483 100644 --- a/test/stage2/compare_output.zig +++ b/test/stage2/compare_output.zig @@ -363,5 +363,39 @@ pub fn addCases(ctx: *TestContext) !void { , "", ); + + // Local mutable variables. + case.addCompareOutput( + \\export fn _start() noreturn { + \\ assert(add(3, 4) == 7); + \\ assert(add(20, 10) == 30); + \\ + \\ exit(); + \\} + \\ + \\fn add(a: u32, b: u32) u32 { + \\ var x: u32 = undefined; + \\ x = 0; + \\ x += a; + \\ x += b; + \\ return x; + \\} + \\ + \\pub fn assert(ok: bool) void { + \\ if (!ok) unreachable; // assertion failure + \\} + \\ + \\fn exit() noreturn { + \\ asm volatile ("syscall" + \\ : + \\ : [number] "{rax}" (231), + \\ [arg1] "{rdi}" (0) + \\ : "rcx", "r11", "memory" + \\ ); + \\ unreachable; + \\} + , + "", + ); } }