Merge pull request #8709 from joachimschmidt557/stage2-arm

stage2 ARM: Generate correct code when spilling registers
This commit is contained in:
Jakub Konka 2021-05-09 14:16:40 +02:00 committed by GitHub
commit 15d6efecfb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 281 additions and 85 deletions

View File

@ -928,7 +928,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// TODO separate architectures with registers from
// stack-based architectures (spu_2)
if (callee_preserved_regs.len > 0) {
if (self.register_manager.tryAllocReg(inst)) |reg| {
if (self.register_manager.tryAllocReg(inst, &.{})) |reg| {
return MCValue{ .register = registerAlias(reg, abi_size) };
}
}
@ -940,6 +940,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
pub fn spillInstruction(self: *Self, src: LazySrcLoc, reg: Register, inst: *ir.Inst) !void {
const stack_mcv = try self.allocRegOrMem(inst, false);
log.debug("spilling {*} to stack mcv {any}", .{ inst, stack_mcv });
const reg_mcv = self.getResolvedInstValue(inst);
assert(reg == toCanonicalReg(reg_mcv.register));
const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
@ -951,7 +952,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
/// allocated. A second call to `copyToTmpRegister` may return the same register.
/// This can have a side effect of spilling instructions to the stack to free up a register.
fn copyToTmpRegister(self: *Self, src: LazySrcLoc, ty: Type, mcv: MCValue) !Register {
const reg = try self.register_manager.allocRegWithoutTracking();
const reg = try self.register_manager.allocRegWithoutTracking(&.{});
try self.genSetReg(src, ty, reg, mcv);
return reg;
}
@ -960,7 +961,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
/// `reg_owner` is the instruction that gets associated with the register in the register table.
/// This can have a side effect of spilling instructions to the stack to free up a register.
fn copyToNewRegister(self: *Self, reg_owner: *ir.Inst, mcv: MCValue) !MCValue {
const reg = try self.register_manager.allocReg(reg_owner);
const reg = try self.register_manager.allocReg(reg_owner, &.{});
try self.genSetReg(reg_owner.src, reg_owner.ty, reg, mcv);
return MCValue{ .register = reg };
}
@ -1380,54 +1381,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
}
}
fn genArmBinOp(self: *Self, inst: *ir.Inst, op_lhs: *ir.Inst, op_rhs: *ir.Inst, op: ir.Inst.Tag) !MCValue {
const lhs = try self.resolveInst(op_lhs);
const rhs = try self.resolveInst(op_rhs);
// Destination must be a register
var dst_mcv: MCValue = undefined;
var lhs_mcv: MCValue = undefined;
var rhs_mcv: MCValue = undefined;
if (self.reuseOperand(inst, 0, lhs)) {
// LHS is the destination
// RHS is the source
lhs_mcv = if (lhs != .register) try self.copyToNewRegister(inst, lhs) else lhs;
rhs_mcv = rhs;
dst_mcv = lhs_mcv;
} else if (self.reuseOperand(inst, 1, rhs)) {
// RHS is the destination
// LHS is the source
lhs_mcv = lhs;
rhs_mcv = if (rhs != .register) try self.copyToNewRegister(inst, rhs) else rhs;
dst_mcv = rhs_mcv;
} else {
// TODO save 1 copy instruction by directly allocating the destination register
// LHS is the destination
// RHS is the source
lhs_mcv = try self.copyToNewRegister(inst, lhs);
rhs_mcv = rhs;
dst_mcv = lhs_mcv;
}
try self.genArmBinOpCode(inst.src, dst_mcv.register, lhs_mcv, rhs_mcv, op);
return dst_mcv;
}
fn genArmBinOpCode(
self: *Self,
src: LazySrcLoc,
dst_reg: Register,
lhs_mcv: MCValue,
rhs_mcv: MCValue,
op: ir.Inst.Tag,
) !void {
assert(lhs_mcv == .register or lhs_mcv == .register);
const swap_lhs_and_rhs = rhs_mcv == .register and lhs_mcv != .register;
const op1 = if (swap_lhs_and_rhs) rhs_mcv.register else lhs_mcv.register;
const op2 = if (swap_lhs_and_rhs) lhs_mcv else rhs_mcv;
const operand = switch (op2) {
fn armOperandShouldBeRegister(self: *Self, src: LazySrcLoc, mcv: MCValue) !bool {
return switch (mcv) {
.none => unreachable,
.undef => unreachable,
.dead, .unreach => unreachable,
@ -1439,15 +1394,142 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
if (imm > std.math.maxInt(u32)) return self.fail(src, "TODO ARM binary arithmetic immediate larger than u32", .{});
// Load immediate into register if it doesn't fit
// as an operand
break :blk Instruction.Operand.fromU32(@intCast(u32, imm)) orelse
Instruction.Operand.reg(try self.copyToTmpRegister(src, Type.initTag(.u32), op2), Instruction.Operand.Shift.none);
// in an operand
break :blk Instruction.Operand.fromU32(@intCast(u32, imm)) == null;
},
.register => true,
.stack_offset,
.embedded_in_code,
.memory,
=> true,
};
}
fn genArmBinOp(self: *Self, inst: *ir.Inst, op_lhs: *ir.Inst, op_rhs: *ir.Inst, op: ir.Inst.Tag) !MCValue {
const lhs = try self.resolveInst(op_lhs);
const rhs = try self.resolveInst(op_rhs);
const lhs_is_register = lhs == .register;
const rhs_is_register = rhs == .register;
const lhs_should_be_register = try self.armOperandShouldBeRegister(op_lhs.src, lhs);
const rhs_should_be_register = try self.armOperandShouldBeRegister(op_rhs.src, rhs);
const reuse_lhs = lhs_is_register and self.reuseOperand(inst, 0, lhs);
const reuse_rhs = !reuse_lhs and rhs_is_register and self.reuseOperand(inst, 1, rhs);
// Destination must be a register
var dst_mcv: MCValue = undefined;
var lhs_mcv = lhs;
var rhs_mcv = rhs;
var swap_lhs_and_rhs = false;
// Allocate registers for operands and/or destination
const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
if (reuse_lhs) {
// Allocate 0 or 1 registers
if (!rhs_is_register and rhs_should_be_register) {
rhs_mcv = MCValue{ .register = try self.register_manager.allocReg(op_rhs, &.{lhs.register}) };
branch.inst_table.putAssumeCapacity(op_rhs, rhs_mcv);
}
dst_mcv = lhs;
} else if (reuse_rhs) {
// Allocate 0 or 1 registers
if (!lhs_is_register and lhs_should_be_register) {
lhs_mcv = MCValue{ .register = try self.register_manager.allocReg(op_lhs, &.{rhs.register}) };
branch.inst_table.putAssumeCapacity(op_lhs, lhs_mcv);
}
dst_mcv = rhs;
swap_lhs_and_rhs = true;
} else {
// Allocate 1 or 2 registers
if (lhs_should_be_register and rhs_should_be_register) {
if (lhs_is_register and rhs_is_register) {
dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, &.{ lhs.register, rhs.register }) };
} else if (lhs_is_register) {
// Move RHS to register
dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, &.{lhs.register}) };
rhs_mcv = dst_mcv;
} else if (rhs_is_register) {
// Move LHS to register
dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, &.{rhs.register}) };
lhs_mcv = dst_mcv;
} else {
// Move LHS and RHS to register
const regs = try self.register_manager.allocRegs(2, .{ inst, op_rhs }, &.{});
lhs_mcv = MCValue{ .register = regs[0] };
rhs_mcv = MCValue{ .register = regs[1] };
dst_mcv = lhs_mcv;
branch.inst_table.putAssumeCapacity(op_rhs, rhs_mcv);
}
} else if (lhs_should_be_register) {
// RHS is immediate
if (lhs_is_register) {
dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, &.{lhs.register}) };
} else {
dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, &.{}) };
lhs_mcv = dst_mcv;
}
} else if (rhs_should_be_register) {
// LHS is immediate
if (rhs_is_register) {
dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, &.{rhs.register}) };
} else {
dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, &.{}) };
rhs_mcv = dst_mcv;
}
swap_lhs_and_rhs = true;
} else unreachable; // binary operation on two immediates
}
// Move the operands to the newly allocated registers
if (lhs_mcv == .register and !lhs_is_register) {
try self.genSetReg(op_lhs.src, op_lhs.ty, lhs_mcv.register, lhs);
}
if (rhs_mcv == .register and !rhs_is_register) {
try self.genSetReg(op_rhs.src, op_rhs.ty, rhs_mcv.register, rhs);
}
try self.genArmBinOpCode(
inst.src,
dst_mcv.register,
lhs_mcv,
rhs_mcv,
swap_lhs_and_rhs,
op,
);
return dst_mcv;
}
fn genArmBinOpCode(
self: *Self,
src: LazySrcLoc,
dst_reg: Register,
lhs_mcv: MCValue,
rhs_mcv: MCValue,
swap_lhs_and_rhs: bool,
op: ir.Inst.Tag,
) !void {
assert(lhs_mcv == .register or rhs_mcv == .register);
const op1 = if (swap_lhs_and_rhs) rhs_mcv.register else lhs_mcv.register;
const op2 = if (swap_lhs_and_rhs) lhs_mcv else rhs_mcv;
const operand = switch (op2) {
.none => unreachable,
.undef => unreachable,
.dead, .unreach => unreachable,
.compare_flags_unsigned => unreachable,
.compare_flags_signed => unreachable,
.ptr_stack_offset => unreachable,
.ptr_embedded_in_code => unreachable,
.immediate => |imm| Instruction.Operand.fromU32(@intCast(u32, imm)).?,
.register => |reg| Instruction.Operand.reg(reg, Instruction.Operand.Shift.none),
.stack_offset,
.embedded_in_code,
.memory,
=> Instruction.Operand.reg(try self.copyToTmpRegister(src, Type.initTag(.u32), op2), Instruction.Operand.Shift.none),
=> unreachable,
};
switch (op) {
@ -2613,10 +2695,42 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
const lhs = try self.resolveInst(inst.lhs);
const rhs = try self.resolveInst(inst.rhs);
const src_mcv = rhs;
const dst_mcv = if (lhs != .register) try self.copyToNewRegister(inst.lhs, lhs) else lhs;
const lhs_is_register = lhs == .register;
const rhs_is_register = rhs == .register;
// lhs should always be a register
const rhs_should_be_register = try self.armOperandShouldBeRegister(inst.rhs.src, rhs);
var lhs_mcv = lhs;
var rhs_mcv = rhs;
// Allocate registers
if (rhs_should_be_register) {
if (!lhs_is_register and !rhs_is_register) {
const regs = try self.register_manager.allocRegs(2, .{ inst.rhs, inst.lhs }, &.{});
lhs_mcv = MCValue{ .register = regs[0] };
rhs_mcv = MCValue{ .register = regs[1] };
} else if (!rhs_is_register) {
rhs_mcv = MCValue{ .register = try self.register_manager.allocReg(inst.rhs, &.{}) };
}
}
if (!lhs_is_register) {
lhs_mcv = MCValue{ .register = try self.register_manager.allocReg(inst.lhs, &.{}) };
}
// Move the operands to the newly allocated registers
const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
if (lhs_mcv == .register and !lhs_is_register) {
try self.genSetReg(inst.lhs.src, inst.lhs.ty, lhs_mcv.register, lhs);
branch.inst_table.putAssumeCapacity(inst.lhs, lhs);
}
if (rhs_mcv == .register and !rhs_is_register) {
try self.genSetReg(inst.rhs.src, inst.rhs.ty, rhs_mcv.register, rhs);
branch.inst_table.putAssumeCapacity(inst.rhs, rhs);
}
// The destination register is not present in the cmp instruction
try self.genArmBinOpCode(inst.base.src, undefined, lhs_mcv, rhs_mcv, false, .cmp_eq);
try self.genArmBinOpCode(inst.base.src, dst_mcv.register, dst_mcv, src_mcv, .cmp_eq);
const info = inst.lhs.ty.intInfo(self.target.*);
return switch (info.signedness) {
.signed => MCValue{ .compare_flags_signed = op },

View File

@ -1,5 +1,6 @@
const std = @import("std");
const math = std.math;
const mem = std.mem;
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const ir = @import("ir.zig");
@ -66,8 +67,13 @@ pub fn RegisterManager(
}
/// Returns `null` if all registers are allocated.
pub fn tryAllocRegs(self: *Self, comptime count: comptime_int, insts: [count]*ir.Inst) ?[count]Register {
if (self.tryAllocRegsWithoutTracking(count)) |regs| {
pub fn tryAllocRegs(
self: *Self,
comptime count: comptime_int,
insts: [count]*ir.Inst,
exceptions: []Register,
) ?[count]Register {
if (self.tryAllocRegsWithoutTracking(count, exceptions)) |regs| {
for (regs) |reg, i| {
const index = reg.allocIndex().?; // allocIndex() on a callee-preserved reg should never return null
self.registers[index] = insts[i];
@ -81,21 +87,30 @@ pub fn RegisterManager(
}
/// Returns `null` if all registers are allocated.
pub fn tryAllocReg(self: *Self, inst: *ir.Inst) ?Register {
return if (tryAllocRegs(self, 1, .{inst})) |regs| regs[0] else null;
pub fn tryAllocReg(self: *Self, inst: *ir.Inst, exceptions: []Register) ?Register {
return if (tryAllocRegs(self, 1, .{inst}, exceptions)) |regs| regs[0] else null;
}
pub fn allocRegs(self: *Self, comptime count: comptime_int, insts: [count]*ir.Inst) ![count]Register {
pub fn allocRegs(
self: *Self,
comptime count: comptime_int,
insts: [count]*ir.Inst,
exceptions: []Register,
) ![count]Register {
comptime assert(count > 0 and count <= callee_preserved_regs.len);
assert(count + exceptions.len <= callee_preserved_regs.len);
return self.tryAllocRegs(count, insts) orelse blk: {
return self.tryAllocRegs(count, insts, exceptions) orelse blk: {
// We'll take over the first count registers. Spill
// the instructions that were previously there to a
// stack allocations.
var regs: [count]Register = undefined;
std.mem.copy(Register, &regs, callee_preserved_regs[0..count]);
var i: usize = 0;
for (callee_preserved_regs) |reg| {
if (i >= count) break;
if (mem.indexOfScalar(Register, exceptions, reg) != null) continue;
regs[i] = reg;
for (regs) |reg, i| {
const index = reg.allocIndex().?; // allocIndex() on a callee-preserved reg should never return null
if (self.isRegFree(reg)) {
self.markRegUsed(reg);
@ -104,21 +119,28 @@ pub fn RegisterManager(
try self.getFunction().spillInstruction(spilled_inst.src, reg, spilled_inst);
}
self.registers[index] = insts[i];
i += 1;
}
break :blk regs;
};
}
pub fn allocReg(self: *Self, inst: *ir.Inst) !Register {
return (try self.allocRegs(1, .{inst}))[0];
pub fn allocReg(self: *Self, inst: *ir.Inst, exceptions: []Register) !Register {
return (try self.allocRegs(1, .{inst}, exceptions))[0];
}
/// Does not track the registers.
/// Returns `null` if not enough registers are free.
pub fn tryAllocRegsWithoutTracking(self: *Self, comptime count: comptime_int) ?[count]Register {
pub fn tryAllocRegsWithoutTracking(
self: *Self,
comptime count: comptime_int,
exceptions: []Register,
) ?[count]Register {
comptime if (callee_preserved_regs.len == 0) return null;
comptime assert(count > 0 and count <= callee_preserved_regs.len);
assert(count + exceptions.len <= callee_preserved_regs.len);
const free_registers = @popCount(FreeRegInt, self.free_registers);
if (free_registers < count) return null;
@ -127,30 +149,35 @@ pub fn RegisterManager(
var i: usize = 0;
for (callee_preserved_regs) |reg| {
if (i >= count) break;
if (mem.indexOfScalar(Register, exceptions, reg) != null) continue;
if (self.isRegFree(reg)) {
regs[i] = reg;
i += 1;
}
}
return regs;
return if (i < count) null else regs;
}
/// Does not track the register.
/// Returns `null` if all registers are allocated.
pub fn tryAllocRegWithoutTracking(self: *Self) ?Register {
return if (self.tryAllocRegsWithoutTracking(1)) |regs| regs[0] else null;
pub fn tryAllocRegWithoutTracking(self: *Self, exceptions: []Register) ?Register {
return if (self.tryAllocRegsWithoutTracking(1, exceptions)) |regs| regs[0] else null;
}
/// Does not track the registers
pub fn allocRegsWithoutTracking(self: *Self, comptime count: comptime_int) ![count]Register {
return self.tryAllocRegsWithoutTracking(count) orelse blk: {
pub fn allocRegsWithoutTracking(self: *Self, comptime count: comptime_int, exceptions: []Register) ![count]Register {
return self.tryAllocRegsWithoutTracking(count, exceptions) orelse blk: {
// We'll take over the first count registers. Spill
// the instructions that were previously there to a
// stack allocations.
var regs: [count]Register = undefined;
std.mem.copy(Register, &regs, callee_preserved_regs[0..count]);
var i: usize = 0;
for (callee_preserved_regs) |reg| {
if (i >= count) break;
if (mem.indexOfScalar(Register, exceptions, reg) != null) continue;
regs[i] = reg;
for (regs) |reg, i| {
const index = reg.allocIndex().?; // allocIndex() on a callee-preserved reg should never return null
if (!self.isRegFree(reg)) {
const spilled_inst = self.registers[index].?;
@ -158,6 +185,8 @@ pub fn RegisterManager(
self.registers[index] = null;
self.markRegFree(reg);
}
i += 1;
}
break :blk regs;
@ -165,8 +194,8 @@ pub fn RegisterManager(
}
/// Does not track the register.
pub fn allocRegWithoutTracking(self: *Self) !Register {
return (try self.allocRegsWithoutTracking(1))[0];
pub fn allocRegWithoutTracking(self: *Self, exceptions: []Register) !Register {
return (try self.allocRegsWithoutTracking(1, exceptions))[0];
}
/// Allocates the specified register with the specified
@ -270,9 +299,9 @@ test "tryAllocReg: no spilling" {
try std.testing.expect(!function.register_manager.isRegAllocated(.r2));
try std.testing.expect(!function.register_manager.isRegAllocated(.r3));
try std.testing.expectEqual(@as(?MockRegister, .r2), function.register_manager.tryAllocReg(&mock_instruction));
try std.testing.expectEqual(@as(?MockRegister, .r3), function.register_manager.tryAllocReg(&mock_instruction));
try std.testing.expectEqual(@as(?MockRegister, null), function.register_manager.tryAllocReg(&mock_instruction));
try std.testing.expectEqual(@as(?MockRegister, .r2), function.register_manager.tryAllocReg(&mock_instruction, &.{}));
try std.testing.expectEqual(@as(?MockRegister, .r3), function.register_manager.tryAllocReg(&mock_instruction, &.{}));
try std.testing.expectEqual(@as(?MockRegister, null), function.register_manager.tryAllocReg(&mock_instruction, &.{}));
try std.testing.expect(function.register_manager.isRegAllocated(.r2));
try std.testing.expect(function.register_manager.isRegAllocated(.r3));
@ -301,16 +330,16 @@ test "allocReg: spilling" {
try std.testing.expect(!function.register_manager.isRegAllocated(.r2));
try std.testing.expect(!function.register_manager.isRegAllocated(.r3));
try std.testing.expectEqual(@as(?MockRegister, .r2), try function.register_manager.allocReg(&mock_instruction));
try std.testing.expectEqual(@as(?MockRegister, .r3), try function.register_manager.allocReg(&mock_instruction));
try std.testing.expectEqual(@as(?MockRegister, .r2), try function.register_manager.allocReg(&mock_instruction, &.{}));
try std.testing.expectEqual(@as(?MockRegister, .r3), try function.register_manager.allocReg(&mock_instruction, &.{}));
// Spill a register
try std.testing.expectEqual(@as(?MockRegister, .r2), try function.register_manager.allocReg(&mock_instruction));
try std.testing.expectEqual(@as(?MockRegister, .r2), try function.register_manager.allocReg(&mock_instruction, &.{}));
try std.testing.expectEqualSlices(MockRegister, &[_]MockRegister{.r2}, function.spilled.items);
// No spilling necessary
function.register_manager.freeReg(.r3);
try std.testing.expectEqual(@as(?MockRegister, .r3), try function.register_manager.allocReg(&mock_instruction));
try std.testing.expectEqual(@as(?MockRegister, .r3), try function.register_manager.allocReg(&mock_instruction, &.{}));
try std.testing.expectEqualSlices(MockRegister, &[_]MockRegister{.r2}, function.spilled.items);
}

View File

@ -458,4 +458,57 @@ pub fn addCases(ctx: *TestContext) !void {
"",
);
}
{
var case = ctx.exe("spilling registers", linux_arm);
case.addCompareOutput(
\\export fn _start() noreturn {
\\ assert(add(3, 4) == 791);
\\ exit();
\\}
\\
\\fn add(a: u32, b: u32) u32 {
\\ const x: u32 = blk: {
\\ const c = a + b; // 7
\\ const d = a + c; // 10
\\ const e = d + b; // 14
\\ const f = d + e; // 24
\\ const g = e + f; // 38
\\ const h = f + g; // 62
\\ const i = g + h; // 100
\\ const j = i + d; // 110
\\ const k = i + j; // 210
\\ const l = k + c; // 217
\\ const m = l + d; // 227
\\ const n = m + e; // 241
\\ const o = n + f; // 265
\\ const p = o + g; // 303
\\ const q = p + h; // 365
\\ const r = q + i; // 465
\\ const s = r + j; // 575
\\ const t = s + k; // 785
\\ break :blk t;
\\ };
\\ const y = x + a; // 788
\\ const z = y + a; // 791
\\ return z;
\\}
\\
\\fn assert(ok: bool) void {
\\ if (!ok) unreachable;
\\}
\\
\\fn exit() noreturn {
\\ asm volatile ("svc #0"
\\ :
\\ : [number] "{r7}" (1),
\\ [arg1] "{r0}" (0)
\\ : "memory"
\\ );
\\ unreachable;
\\}
,
"",
);
}
}