mirror of
https://github.com/ziglang/zig.git
synced 2026-01-19 13:55:20 +00:00
Merge pull request #6163 from ziglang/stage2-condbr
stage2: codegen for conditional branching
This commit is contained in:
commit
3abf9e1457
@ -83,6 +83,7 @@ pub fn build(b: *Builder) !void {
|
||||
}
|
||||
|
||||
const log_scopes = b.option([]const []const u8, "log", "Which log scopes to enable") orelse &[0][]const u8{};
|
||||
const zir_dumps = b.option([]const []const u8, "dump-zir", "Which functions to dump ZIR for before codegen") orelse &[0][]const u8{};
|
||||
|
||||
const opt_version_string = b.option([]const u8, "version-string", "Override Zig version string. Default is to find out with git.");
|
||||
const version = if (opt_version_string) |version| version else v: {
|
||||
@ -103,6 +104,7 @@ pub fn build(b: *Builder) !void {
|
||||
exe.addBuildOption([]const u8, "version", version);
|
||||
|
||||
exe.addBuildOption([]const []const u8, "log_scopes", log_scopes);
|
||||
exe.addBuildOption([]const []const u8, "zir_dumps", zir_dumps);
|
||||
exe.addBuildOption(bool, "enable_tracy", tracy != null);
|
||||
if (tracy) |tracy_path| {
|
||||
const client_cpp = fs.path.join(
|
||||
|
||||
@ -13,7 +13,8 @@ const Scope = Module.Scope;
|
||||
const InnerError = Module.InnerError;
|
||||
|
||||
pub const ResultLoc = union(enum) {
|
||||
/// The expression is the right-hand side of assignment to `_`.
|
||||
/// The expression is the right-hand side of assignment to `_`. Only the side-effects of the
|
||||
/// expression should be generated.
|
||||
discard,
|
||||
/// The expression has an inferred type, and it will be evaluated as an rvalue.
|
||||
none,
|
||||
|
||||
@ -273,8 +273,22 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
/// across each runtime branch upon joining.
|
||||
branch_stack: *std.ArrayList(Branch),
|
||||
|
||||
/// The key must be canonical register.
|
||||
registers: std.AutoHashMapUnmanaged(Register, *ir.Inst) = .{},
|
||||
free_registers: FreeRegInt = math.maxInt(FreeRegInt),
|
||||
/// Maps offset to what is stored there.
|
||||
stack: std.AutoHashMapUnmanaged(u32, StackAllocation) = .{},
|
||||
|
||||
/// Offset from the stack base, representing the end of the stack frame.
|
||||
max_end_stack: u32 = 0,
|
||||
/// Represents the current end stack offset. If there is no existing slot
|
||||
/// to place a new stack allocation, it goes here, and then bumps `max_end_stack`.
|
||||
next_stack_offset: u32 = 0,
|
||||
|
||||
const MCValue = union(enum) {
|
||||
/// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc.
|
||||
/// TODO Look into deleting this tag and using `dead` instead, since every use
|
||||
/// of MCValue.none should be instead looking at the type and noticing it is 0 bits.
|
||||
none,
|
||||
/// Control flow will not allow this value to be observed.
|
||||
unreach,
|
||||
@ -346,71 +360,55 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
|
||||
const Branch = struct {
|
||||
inst_table: std.AutoHashMapUnmanaged(*ir.Inst, MCValue) = .{},
|
||||
/// The key must be canonical register.
|
||||
registers: std.AutoHashMapUnmanaged(Register, RegisterAllocation) = .{},
|
||||
free_registers: FreeRegInt = math.maxInt(FreeRegInt),
|
||||
|
||||
/// Maps offset to what is stored there.
|
||||
stack: std.AutoHashMapUnmanaged(u32, StackAllocation) = .{},
|
||||
/// Offset from the stack base, representing the end of the stack frame.
|
||||
max_end_stack: u32 = 0,
|
||||
/// Represents the current end stack offset. If there is no existing slot
|
||||
/// to place a new stack allocation, it goes here, and then bumps `max_end_stack`.
|
||||
next_stack_offset: u32 = 0,
|
||||
|
||||
fn markRegUsed(self: *Branch, reg: Register) void {
|
||||
if (FreeRegInt == u0) return;
|
||||
const index = reg.allocIndex() orelse return;
|
||||
const ShiftInt = math.Log2Int(FreeRegInt);
|
||||
const shift = @intCast(ShiftInt, index);
|
||||
self.free_registers &= ~(@as(FreeRegInt, 1) << shift);
|
||||
}
|
||||
|
||||
fn markRegFree(self: *Branch, reg: Register) void {
|
||||
if (FreeRegInt == u0) return;
|
||||
const index = reg.allocIndex() orelse return;
|
||||
const ShiftInt = math.Log2Int(FreeRegInt);
|
||||
const shift = @intCast(ShiftInt, index);
|
||||
self.free_registers |= @as(FreeRegInt, 1) << shift;
|
||||
}
|
||||
|
||||
/// Before calling, must ensureCapacity + 1 on branch.registers.
|
||||
/// Returns `null` if all registers are allocated.
|
||||
fn allocReg(self: *Branch, inst: *ir.Inst) ?Register {
|
||||
const free_index = @ctz(FreeRegInt, self.free_registers);
|
||||
if (free_index >= callee_preserved_regs.len) {
|
||||
return null;
|
||||
}
|
||||
self.free_registers &= ~(@as(FreeRegInt, 1) << free_index);
|
||||
const reg = callee_preserved_regs[free_index];
|
||||
self.registers.putAssumeCapacityNoClobber(reg, .{ .inst = inst });
|
||||
log.debug("alloc {} => {*}", .{reg, inst});
|
||||
return reg;
|
||||
}
|
||||
|
||||
/// Does not track the register.
|
||||
fn findUnusedReg(self: *Branch) ?Register {
|
||||
const free_index = @ctz(FreeRegInt, self.free_registers);
|
||||
if (free_index >= callee_preserved_regs.len) {
|
||||
return null;
|
||||
}
|
||||
return callee_preserved_regs[free_index];
|
||||
}
|
||||
|
||||
fn deinit(self: *Branch, gpa: *Allocator) void {
|
||||
self.inst_table.deinit(gpa);
|
||||
self.registers.deinit(gpa);
|
||||
self.stack.deinit(gpa);
|
||||
self.* = undefined;
|
||||
}
|
||||
};
|
||||
|
||||
const RegisterAllocation = struct {
|
||||
inst: *ir.Inst,
|
||||
};
|
||||
fn markRegUsed(self: *Self, reg: Register) void {
|
||||
if (FreeRegInt == u0) return;
|
||||
const index = reg.allocIndex() orelse return;
|
||||
const ShiftInt = math.Log2Int(FreeRegInt);
|
||||
const shift = @intCast(ShiftInt, index);
|
||||
self.free_registers &= ~(@as(FreeRegInt, 1) << shift);
|
||||
}
|
||||
|
||||
fn markRegFree(self: *Self, reg: Register) void {
|
||||
if (FreeRegInt == u0) return;
|
||||
const index = reg.allocIndex() orelse return;
|
||||
const ShiftInt = math.Log2Int(FreeRegInt);
|
||||
const shift = @intCast(ShiftInt, index);
|
||||
self.free_registers |= @as(FreeRegInt, 1) << shift;
|
||||
}
|
||||
|
||||
/// Before calling, must ensureCapacity + 1 on self.registers.
|
||||
/// Returns `null` if all registers are allocated.
|
||||
fn allocReg(self: *Self, inst: *ir.Inst) ?Register {
|
||||
const free_index = @ctz(FreeRegInt, self.free_registers);
|
||||
if (free_index >= callee_preserved_regs.len) {
|
||||
return null;
|
||||
}
|
||||
self.free_registers &= ~(@as(FreeRegInt, 1) << free_index);
|
||||
const reg = callee_preserved_regs[free_index];
|
||||
self.registers.putAssumeCapacityNoClobber(reg, inst);
|
||||
log.debug("alloc {} => {*}", .{reg, inst});
|
||||
return reg;
|
||||
}
|
||||
|
||||
/// Does not track the register.
|
||||
fn findUnusedReg(self: *Self) ?Register {
|
||||
const free_index = @ctz(FreeRegInt, self.free_registers);
|
||||
if (free_index >= callee_preserved_regs.len) {
|
||||
return null;
|
||||
}
|
||||
return callee_preserved_regs[free_index];
|
||||
}
|
||||
|
||||
const StackAllocation = struct {
|
||||
inst: *ir.Inst,
|
||||
/// TODO do we need size? should be determined by inst.ty.abiSize()
|
||||
size: u32,
|
||||
};
|
||||
|
||||
@ -435,8 +433,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
branch_stack.items[0].deinit(bin_file.allocator);
|
||||
branch_stack.deinit();
|
||||
}
|
||||
const branch = try branch_stack.addOne();
|
||||
branch.* = .{};
|
||||
try branch_stack.append(.{});
|
||||
|
||||
const src_data: struct {lbrace_src: usize, rbrace_src: usize, source: []const u8} = blk: {
|
||||
if (module_fn.owner_decl.scope.cast(Module.Scope.File)) |scope_file| {
|
||||
@ -476,6 +473,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
.rbrace_src = src_data.rbrace_src,
|
||||
.source = src_data.source,
|
||||
};
|
||||
defer function.registers.deinit(bin_file.allocator);
|
||||
defer function.stack.deinit(bin_file.allocator);
|
||||
defer function.exitlude_jump_relocs.deinit(bin_file.allocator);
|
||||
|
||||
var call_info = function.resolveCallingConventionValues(src, fn_type) catch |err| switch (err) {
|
||||
@ -487,7 +486,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
function.args = call_info.args;
|
||||
function.ret_mcv = call_info.return_value;
|
||||
function.stack_align = call_info.stack_align;
|
||||
branch.max_end_stack = call_info.stack_byte_count;
|
||||
function.max_end_stack = call_info.stack_byte_count;
|
||||
|
||||
function.gen() catch |err| switch (err) {
|
||||
error.CodegenFail => return Result{ .fail = function.err_msg.? },
|
||||
@ -523,7 +522,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
try self.dbgSetPrologueEnd();
|
||||
try self.genBody(self.mod_fn.analysis.success);
|
||||
|
||||
const stack_end = self.branch_stack.items[0].max_end_stack;
|
||||
const stack_end = self.max_end_stack;
|
||||
if (stack_end > math.maxInt(i32))
|
||||
return self.fail(self.src, "too much stack used in call parameters", .{});
|
||||
const aligned_stack_end = mem.alignForward(stack_end, self.stack_align);
|
||||
@ -580,13 +579,15 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
}
|
||||
|
||||
fn genBody(self: *Self, body: ir.Body) InnerError!void {
|
||||
const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
|
||||
const inst_table = &branch.inst_table;
|
||||
for (body.instructions) |inst| {
|
||||
try self.ensureProcessDeathCapacity(@popCount(@TypeOf(inst.deaths), inst.deaths));
|
||||
|
||||
const mcv = try self.genFuncInst(inst);
|
||||
log.debug("{*} => {}", .{inst, mcv});
|
||||
// TODO don't put void or dead things in here
|
||||
try inst_table.putNoClobber(self.gpa, inst, mcv);
|
||||
if (!inst.isUnused()) {
|
||||
log.debug("{*} => {}", .{inst, mcv});
|
||||
const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
|
||||
try branch.inst_table.putNoClobber(self.gpa, inst, mcv);
|
||||
}
|
||||
|
||||
var i: ir.Inst.DeathsBitIndex = 0;
|
||||
while (inst.getOperand(i)) |operand| : (i += 1) {
|
||||
@ -628,21 +629,28 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
self.dbg_line.appendAssumeCapacity(DW.LNS_copy);
|
||||
}
|
||||
|
||||
/// Asserts there is already capacity to insert into top branch inst_table.
|
||||
fn processDeath(self: *Self, inst: *ir.Inst) void {
|
||||
if (inst.tag == .constant) return; // Constants are immortal.
|
||||
// When editing this function, note that the logic must synchronize with `reuseOperand`.
|
||||
const prev_value = self.getResolvedInstValue(inst);
|
||||
const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
|
||||
const entry = branch.inst_table.getEntry(inst) orelse return;
|
||||
const prev_value = entry.value;
|
||||
entry.value = .dead;
|
||||
branch.inst_table.putAssumeCapacity(inst, .dead);
|
||||
switch (prev_value) {
|
||||
.register => |reg| {
|
||||
const canon_reg = toCanonicalReg(reg);
|
||||
_ = branch.registers.remove(canon_reg);
|
||||
branch.markRegFree(canon_reg);
|
||||
_ = self.registers.remove(canon_reg);
|
||||
self.markRegFree(canon_reg);
|
||||
},
|
||||
else => {}, // TODO process stack allocation death
|
||||
}
|
||||
}
|
||||
|
||||
fn ensureProcessDeathCapacity(self: *Self, additional_count: usize) !void {
|
||||
const table = &self.branch_stack.items[self.branch_stack.items.len - 1].inst_table;
|
||||
try table.ensureCapacity(self.gpa, table.items().len + additional_count);
|
||||
}
|
||||
|
||||
/// Adds a Type to the .debug_info at the current position. The bytes will be populated later,
|
||||
/// after codegen for this symbol is done.
|
||||
fn addDbgInfoTypeReloc(self: *Self, ty: Type) !void {
|
||||
@ -705,13 +713,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
fn allocMem(self: *Self, inst: *ir.Inst, abi_size: u32, abi_align: u32) !u32 {
|
||||
if (abi_align > self.stack_align)
|
||||
self.stack_align = abi_align;
|
||||
const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
|
||||
// TODO find a free slot instead of always appending
|
||||
const offset = mem.alignForwardGeneric(u32, branch.next_stack_offset, abi_align);
|
||||
branch.next_stack_offset = offset + abi_size;
|
||||
if (branch.next_stack_offset > branch.max_end_stack)
|
||||
branch.max_end_stack = branch.next_stack_offset;
|
||||
try branch.stack.putNoClobber(self.gpa, offset, .{
|
||||
const offset = mem.alignForwardGeneric(u32, self.next_stack_offset, abi_align);
|
||||
self.next_stack_offset = offset + abi_size;
|
||||
if (self.next_stack_offset > self.max_end_stack)
|
||||
self.max_end_stack = self.next_stack_offset;
|
||||
try self.stack.putNoClobber(self.gpa, offset, .{
|
||||
.inst = inst,
|
||||
.size = abi_size,
|
||||
});
|
||||
@ -737,15 +744,14 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
const abi_align = elem_ty.abiAlignment(self.target.*);
|
||||
if (abi_align > self.stack_align)
|
||||
self.stack_align = abi_align;
|
||||
const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
|
||||
|
||||
if (reg_ok) {
|
||||
// Make sure the type can fit in a register before we try to allocate one.
|
||||
const ptr_bits = arch.ptrBitWidth();
|
||||
const ptr_bytes: u64 = @divExact(ptr_bits, 8);
|
||||
if (abi_size <= ptr_bytes) {
|
||||
try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1);
|
||||
if (branch.allocReg(inst)) |reg| {
|
||||
try self.registers.ensureCapacity(self.gpa, self.registers.items().len + 1);
|
||||
if (self.allocReg(inst)) |reg| {
|
||||
return MCValue{ .register = registerAlias(reg, abi_size) };
|
||||
}
|
||||
}
|
||||
@ -758,20 +764,18 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
/// allocated. A second call to `copyToTmpRegister` may return the same register.
|
||||
/// This can have a side effect of spilling instructions to the stack to free up a register.
|
||||
fn copyToTmpRegister(self: *Self, src: usize, mcv: MCValue) !Register {
|
||||
const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
|
||||
|
||||
const reg = branch.findUnusedReg() orelse b: {
|
||||
const reg = self.findUnusedReg() orelse b: {
|
||||
// We'll take over the first register. Move the instruction that was previously
|
||||
// there to a stack allocation.
|
||||
const reg = callee_preserved_regs[0];
|
||||
const regs_entry = branch.registers.remove(reg).?;
|
||||
const spilled_inst = regs_entry.value.inst;
|
||||
const regs_entry = self.registers.remove(reg).?;
|
||||
const spilled_inst = regs_entry.value;
|
||||
|
||||
const stack_mcv = try self.allocRegOrMem(spilled_inst, false);
|
||||
const inst_entry = branch.inst_table.getEntry(spilled_inst).?;
|
||||
const reg_mcv = inst_entry.value;
|
||||
const reg_mcv = self.getResolvedInstValue(spilled_inst);
|
||||
assert(reg == toCanonicalReg(reg_mcv.register));
|
||||
inst_entry.value = stack_mcv;
|
||||
const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
|
||||
try branch.inst_table.put(self.gpa, spilled_inst, stack_mcv);
|
||||
try self.genSetStack(src, spilled_inst.ty, stack_mcv.stack_offset, reg_mcv);
|
||||
|
||||
break :b reg;
|
||||
@ -784,22 +788,21 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
/// `reg_owner` is the instruction that gets associated with the register in the register table.
|
||||
/// This can have a side effect of spilling instructions to the stack to free up a register.
|
||||
fn copyToNewRegister(self: *Self, reg_owner: *ir.Inst, mcv: MCValue) !MCValue {
|
||||
const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
|
||||
try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1);
|
||||
try self.registers.ensureCapacity(self.gpa, self.registers.items().len + 1);
|
||||
|
||||
const reg = branch.allocReg(reg_owner) orelse b: {
|
||||
const reg = self.allocReg(reg_owner) orelse b: {
|
||||
// We'll take over the first register. Move the instruction that was previously
|
||||
// there to a stack allocation.
|
||||
const reg = callee_preserved_regs[0];
|
||||
const regs_entry = branch.registers.getEntry(reg).?;
|
||||
const spilled_inst = regs_entry.value.inst;
|
||||
regs_entry.value = .{ .inst = reg_owner };
|
||||
const regs_entry = self.registers.getEntry(reg).?;
|
||||
const spilled_inst = regs_entry.value;
|
||||
regs_entry.value = reg_owner;
|
||||
|
||||
const stack_mcv = try self.allocRegOrMem(spilled_inst, false);
|
||||
const inst_entry = branch.inst_table.getEntry(spilled_inst).?;
|
||||
const reg_mcv = inst_entry.value;
|
||||
const reg_mcv = self.getResolvedInstValue(spilled_inst);
|
||||
assert(reg == toCanonicalReg(reg_mcv.register));
|
||||
inst_entry.value = stack_mcv;
|
||||
const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
|
||||
try branch.inst_table.put(self.gpa, spilled_inst, stack_mcv);
|
||||
try self.genSetStack(reg_owner.src, spilled_inst.ty, stack_mcv.stack_offset, reg_mcv);
|
||||
|
||||
break :b reg;
|
||||
@ -934,9 +937,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
.register => |reg| {
|
||||
// If it's in the registers table, need to associate the register with the
|
||||
// new instruction.
|
||||
const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
|
||||
if (branch.registers.getEntry(toCanonicalReg(reg))) |entry| {
|
||||
entry.value = .{ .inst = inst };
|
||||
if (self.registers.getEntry(toCanonicalReg(reg))) |entry| {
|
||||
entry.value = inst;
|
||||
}
|
||||
log.debug("reusing {} => {*}", .{reg, inst});
|
||||
},
|
||||
@ -950,6 +952,10 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
// Prevent the operand deaths processing code from deallocating it.
|
||||
inst.clearOperandDeath(op_index);
|
||||
|
||||
// That makes us responsible for doing the rest of the stuff that processDeath would have done.
|
||||
const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
|
||||
branch.inst_table.putAssumeCapacity(inst.getOperand(op_index).?, .dead);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1231,8 +1237,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
if (inst.base.isUnused())
|
||||
return MCValue.dead;
|
||||
|
||||
const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
|
||||
try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1);
|
||||
try self.registers.ensureCapacity(self.gpa, self.registers.items().len + 1);
|
||||
|
||||
const result = self.args[self.arg_index];
|
||||
self.arg_index += 1;
|
||||
@ -1240,8 +1245,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
const name_with_null = inst.name[0..mem.lenZ(inst.name) + 1];
|
||||
switch (result) {
|
||||
.register => |reg| {
|
||||
branch.registers.putAssumeCapacityNoClobber(toCanonicalReg(reg), .{ .inst = &inst.base });
|
||||
branch.markRegUsed(reg);
|
||||
self.registers.putAssumeCapacityNoClobber(toCanonicalReg(reg), &inst.base);
|
||||
self.markRegUsed(reg);
|
||||
|
||||
try self.dbg_info.ensureCapacity(self.dbg_info.items.len + 8 + name_with_null.len);
|
||||
self.dbg_info.appendAssumeCapacity(link.File.Elf.abbrev_parameter);
|
||||
@ -1536,18 +1541,19 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
|
||||
fn genDbgStmt(self: *Self, inst: *ir.Inst.NoOp) !MCValue {
|
||||
try self.dbgAdvancePCAndLine(inst.base.src);
|
||||
return MCValue.none;
|
||||
assert(inst.base.isUnused());
|
||||
return MCValue.dead;
|
||||
}
|
||||
|
||||
fn genCondBr(self: *Self, inst: *ir.Inst.CondBr) !MCValue {
|
||||
// TODO Rework this so that the arch-independent logic isn't buried and duplicated.
|
||||
switch (arch) {
|
||||
.x86_64 => {
|
||||
const cond = try self.resolveInst(inst.condition);
|
||||
|
||||
const reloc: Reloc = switch (arch) {
|
||||
.i386, .x86_64 => reloc: {
|
||||
try self.code.ensureCapacity(self.code.items.len + 6);
|
||||
|
||||
const cond = try self.resolveInst(inst.condition);
|
||||
switch (cond) {
|
||||
.compare_flags_signed => |cmp_op| {
|
||||
const opcode: u8 = switch (cond) {
|
||||
.compare_flags_signed => |cmp_op| blk: {
|
||||
// Here we map to the opposite opcode because the jump is to the false branch.
|
||||
const opcode: u8 = switch (cmp_op) {
|
||||
.gte => 0x8c,
|
||||
@ -1557,9 +1563,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
.lte => 0x8f,
|
||||
.eq => 0x85,
|
||||
};
|
||||
return self.genX86CondBr(inst, opcode);
|
||||
break :blk opcode;
|
||||
},
|
||||
.compare_flags_unsigned => |cmp_op| {
|
||||
.compare_flags_unsigned => |cmp_op| blk: {
|
||||
// Here we map to the opposite opcode because the jump is to the false branch.
|
||||
const opcode: u8 = switch (cmp_op) {
|
||||
.gte => 0x82,
|
||||
@ -1569,9 +1575,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
.lte => 0x87,
|
||||
.eq => 0x85,
|
||||
};
|
||||
return self.genX86CondBr(inst, opcode);
|
||||
break :blk opcode;
|
||||
},
|
||||
.register => |reg| {
|
||||
.register => |reg| blk: {
|
||||
// test reg, 1
|
||||
// TODO detect al, ax, eax
|
||||
try self.code.ensureCapacity(self.code.items.len + 4);
|
||||
@ -1583,23 +1589,128 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
@as(u8, 0xC0) | (0 << 3) | @truncate(u3, reg.id()),
|
||||
0x01,
|
||||
});
|
||||
return self.genX86CondBr(inst, 0x84);
|
||||
break :blk 0x84;
|
||||
},
|
||||
else => return self.fail(inst.base.src, "TODO implement condbr {} when condition is {}", .{ self.target.cpu.arch, @tagName(cond) }),
|
||||
}
|
||||
};
|
||||
self.code.appendSliceAssumeCapacity(&[_]u8{ 0x0f, opcode });
|
||||
const reloc = Reloc{ .rel32 = self.code.items.len };
|
||||
self.code.items.len += 4;
|
||||
break :reloc reloc;
|
||||
},
|
||||
else => return self.fail(inst.base.src, "TODO implement condbr for {}", .{self.target.cpu.arch}),
|
||||
}
|
||||
}
|
||||
else => return self.fail(inst.base.src, "TODO implement condbr {}", .{ self.target.cpu.arch }),
|
||||
};
|
||||
|
||||
fn genX86CondBr(self: *Self, inst: *ir.Inst.CondBr, opcode: u8) !MCValue {
|
||||
// TODO deal with liveness / deaths condbr's then_entry_deaths and else_entry_deaths
|
||||
self.code.appendSliceAssumeCapacity(&[_]u8{ 0x0f, opcode });
|
||||
const reloc = Reloc{ .rel32 = self.code.items.len };
|
||||
self.code.items.len += 4;
|
||||
// Capture the state of register and stack allocation state so that we can revert to it.
|
||||
const parent_next_stack_offset = self.next_stack_offset;
|
||||
const parent_free_registers = self.free_registers;
|
||||
var parent_stack = try self.stack.clone(self.gpa);
|
||||
defer parent_stack.deinit(self.gpa);
|
||||
var parent_registers = try self.registers.clone(self.gpa);
|
||||
defer parent_registers.deinit(self.gpa);
|
||||
|
||||
try self.branch_stack.append(.{});
|
||||
|
||||
const then_deaths = inst.thenDeaths();
|
||||
try self.ensureProcessDeathCapacity(then_deaths.len);
|
||||
for (then_deaths) |operand| {
|
||||
self.processDeath(operand);
|
||||
}
|
||||
try self.genBody(inst.then_body);
|
||||
|
||||
// Revert to the previous register and stack allocation state.
|
||||
|
||||
var saved_then_branch = self.branch_stack.pop();
|
||||
defer saved_then_branch.deinit(self.gpa);
|
||||
|
||||
self.registers.deinit(self.gpa);
|
||||
self.registers = parent_registers;
|
||||
parent_registers = .{};
|
||||
|
||||
self.stack.deinit(self.gpa);
|
||||
self.stack = parent_stack;
|
||||
parent_stack = .{};
|
||||
|
||||
self.next_stack_offset = parent_next_stack_offset;
|
||||
self.free_registers = parent_free_registers;
|
||||
|
||||
try self.performReloc(inst.base.src, reloc);
|
||||
const else_branch = self.branch_stack.addOneAssumeCapacity();
|
||||
else_branch.* = .{};
|
||||
|
||||
const else_deaths = inst.elseDeaths();
|
||||
try self.ensureProcessDeathCapacity(else_deaths.len);
|
||||
for (else_deaths) |operand| {
|
||||
self.processDeath(operand);
|
||||
}
|
||||
try self.genBody(inst.else_body);
|
||||
|
||||
// At this point, each branch will possibly have conflicting values for where
|
||||
// each instruction is stored. They agree, however, on which instructions are alive/dead.
|
||||
// We use the first ("then") branch as canonical, and here emit
|
||||
// instructions into the second ("else") branch to make it conform.
|
||||
// We continue respect the data structure semantic guarantees of the else_branch so
|
||||
// that we can use all the code emitting abstractions. This is why at the bottom we
|
||||
// assert that parent_branch.free_registers equals the saved_then_branch.free_registers
|
||||
// rather than assigning it.
|
||||
const parent_branch = &self.branch_stack.items[self.branch_stack.items.len - 2];
|
||||
try parent_branch.inst_table.ensureCapacity(self.gpa, parent_branch.inst_table.items().len +
|
||||
else_branch.inst_table.items().len);
|
||||
for (else_branch.inst_table.items()) |else_entry| {
|
||||
const canon_mcv = if (saved_then_branch.inst_table.remove(else_entry.key)) |then_entry| blk: {
|
||||
// The instruction's MCValue is overridden in both branches.
|
||||
parent_branch.inst_table.putAssumeCapacity(else_entry.key, then_entry.value);
|
||||
if (else_entry.value == .dead) {
|
||||
assert(then_entry.value == .dead);
|
||||
continue;
|
||||
}
|
||||
break :blk then_entry.value;
|
||||
} else blk: {
|
||||
if (else_entry.value == .dead)
|
||||
continue;
|
||||
// The instruction is only overridden in the else branch.
|
||||
var i: usize = self.branch_stack.items.len - 2;
|
||||
while (true) {
|
||||
i -= 1; // If this overflows, the question is: why wasn't the instruction marked dead?
|
||||
if (self.branch_stack.items[i].inst_table.get(else_entry.key)) |mcv| {
|
||||
assert(mcv != .dead);
|
||||
break :blk mcv;
|
||||
}
|
||||
}
|
||||
};
|
||||
log.debug("consolidating else_entry {*} {}=>{}", .{else_entry.key, else_entry.value, canon_mcv});
|
||||
// TODO make sure the destination stack offset / register does not already have something
|
||||
// going on there.
|
||||
try self.setRegOrMem(inst.base.src, else_entry.key.ty, canon_mcv, else_entry.value);
|
||||
// TODO track the new register / stack allocation
|
||||
}
|
||||
try parent_branch.inst_table.ensureCapacity(self.gpa, parent_branch.inst_table.items().len +
|
||||
saved_then_branch.inst_table.items().len);
|
||||
for (saved_then_branch.inst_table.items()) |then_entry| {
|
||||
// We already deleted the items from this table that matched the else_branch.
|
||||
// So these are all instructions that are only overridden in the then branch.
|
||||
parent_branch.inst_table.putAssumeCapacity(then_entry.key, then_entry.value);
|
||||
if (then_entry.value == .dead)
|
||||
continue;
|
||||
const parent_mcv = blk: {
|
||||
var i: usize = self.branch_stack.items.len - 2;
|
||||
while (true) {
|
||||
i -= 1;
|
||||
if (self.branch_stack.items[i].inst_table.get(then_entry.key)) |mcv| {
|
||||
assert(mcv != .dead);
|
||||
break :blk mcv;
|
||||
}
|
||||
}
|
||||
};
|
||||
log.debug("consolidating then_entry {*} {}=>{}", .{then_entry.key, parent_mcv, then_entry.value});
|
||||
// TODO make sure the destination stack offset / register does not already have something
|
||||
// going on there.
|
||||
try self.setRegOrMem(inst.base.src, then_entry.key.ty, parent_mcv, then_entry.value);
|
||||
// TODO track the new register / stack allocation
|
||||
}
|
||||
|
||||
self.branch_stack.pop().deinit(self.gpa);
|
||||
|
||||
return MCValue.unreach;
|
||||
}
|
||||
|
||||
@ -1673,11 +1784,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
switch (reloc) {
|
||||
.rel32 => |pos| {
|
||||
const amt = self.code.items.len - (pos + 4);
|
||||
// If it wouldn't jump at all, elide it.
|
||||
if (amt == 0) {
|
||||
self.code.items.len -= 5;
|
||||
return;
|
||||
}
|
||||
// Here it would be tempting to implement testing for amt == 0 and then elide the
|
||||
// jump. However, that will cause a problem because other jumps may assume that they
|
||||
// can jump to this code. Or maybe I didn't understand something when I was debugging.
|
||||
// It could be worth another look. Anyway, that's why that isn't done here. Probably the
|
||||
// best place to elide jumps will be in semantic analysis, by inlining blocks that only
|
||||
// only have 1 break instruction.
|
||||
const s32_amt = math.cast(i32, amt) catch
|
||||
return self.fail(src, "unable to perform relocation: jump too far", .{});
|
||||
mem.writeIntLittle(i32, self.code.items[pos..][0..4], s32_amt);
|
||||
@ -2282,8 +2394,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
}
|
||||
|
||||
fn resolveInst(self: *Self, inst: *ir.Inst) !MCValue {
|
||||
// If the type has no codegen bits, no need to store it.
|
||||
if (!inst.ty.hasCodeGenBits())
|
||||
return MCValue.none;
|
||||
|
||||
// Constants have static lifetimes, so they are always memoized in the outer most table.
|
||||
if (inst.cast(ir.Inst.Constant)) |const_inst| {
|
||||
if (inst.castTag(.constant)) |const_inst| {
|
||||
const branch = &self.branch_stack.items[0];
|
||||
const gop = try branch.inst_table.getOrPut(self.gpa, inst);
|
||||
if (!gop.found_existing) {
|
||||
@ -2292,6 +2408,10 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
return gop.entry.value;
|
||||
}
|
||||
|
||||
return self.getResolvedInstValue(inst);
|
||||
}
|
||||
|
||||
fn getResolvedInstValue(self: *Self, inst: *ir.Inst) MCValue {
|
||||
// Treat each stack item as a "layer" on top of the previous one.
|
||||
var i: usize = self.branch_stack.items.len;
|
||||
while (true) {
|
||||
|
||||
@ -17,6 +17,7 @@ const Type = @import("../type.zig").Type;
|
||||
const link = @import("../link.zig");
|
||||
const File = link.File;
|
||||
const Elf = @This();
|
||||
const build_options = @import("build_options");
|
||||
|
||||
const default_entry_addr = 0x8000000;
|
||||
|
||||
@ -1640,9 +1641,15 @@ pub fn updateDecl(self: *Elf, module: *Module, decl: *Module.Decl) !void {
|
||||
else => false,
|
||||
};
|
||||
if (is_fn) {
|
||||
//if (mem.eql(u8, mem.spanZ(decl.name), "add")) {
|
||||
// typed_value.val.cast(Value.Payload.Function).?.func.dump(module.*);
|
||||
//}
|
||||
const zir_dumps = if (std.builtin.is_test) &[0][]const u8{} else build_options.zir_dumps;
|
||||
if (zir_dumps.len != 0) {
|
||||
for (zir_dumps) |fn_name| {
|
||||
if (mem.eql(u8, mem.spanZ(decl.name), fn_name)) {
|
||||
std.debug.print("\n{}\n", .{decl.name});
|
||||
typed_value.val.cast(Value.Payload.Function).?.func.dump(module.*);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For functions we need to add a prologue to the debug line program.
|
||||
try dbg_line_buffer.ensureCapacity(26);
|
||||
|
||||
@ -771,8 +771,8 @@ pub const Type = extern union {
|
||||
.array => self.elemType().hasCodeGenBits() and self.arrayLen() != 0,
|
||||
.array_u8 => self.arrayLen() != 0,
|
||||
.array_sentinel, .single_const_pointer, .single_mut_pointer, .many_const_pointer, .many_mut_pointer, .c_const_pointer, .c_mut_pointer, .const_slice, .mut_slice, .pointer => self.elemType().hasCodeGenBits(),
|
||||
.int_signed => self.cast(Payload.IntSigned).?.bits == 0,
|
||||
.int_unsigned => self.cast(Payload.IntUnsigned).?.bits == 0,
|
||||
.int_signed => self.cast(Payload.IntSigned).?.bits != 0,
|
||||
.int_unsigned => self.cast(Payload.IntUnsigned).?.bits != 0,
|
||||
|
||||
.error_union => {
|
||||
const payload = self.cast(Payload.ErrorUnion).?;
|
||||
|
||||
@ -954,6 +954,7 @@ pub const Module = struct {
|
||||
|
||||
pub const MetaData = struct {
|
||||
deaths: ir.Inst.DeathsInt,
|
||||
addr: usize,
|
||||
};
|
||||
|
||||
pub const BodyMetaData = struct {
|
||||
@ -1152,6 +1153,12 @@ const Writer = struct {
|
||||
try self.writeInstToStream(stream, inst);
|
||||
if (self.module.metadata.get(inst)) |metadata| {
|
||||
try stream.print(" ; deaths=0b{b}", .{metadata.deaths});
|
||||
// This is conditionally compiled in because addresses mess up the tests due
|
||||
// to Address Space Layout Randomization. It's super useful when debugging
|
||||
// codegen.zig though.
|
||||
if (!std.builtin.is_test) {
|
||||
try stream.print(" 0x{x}", .{metadata.addr});
|
||||
}
|
||||
}
|
||||
self.indent -= 2;
|
||||
try stream.writeByte('\n');
|
||||
@ -2417,7 +2424,10 @@ const EmitZIR = struct {
|
||||
|
||||
.varptr => @panic("TODO"),
|
||||
};
|
||||
try self.metadata.put(new_inst, .{ .deaths = inst.deaths });
|
||||
try self.metadata.put(new_inst, .{
|
||||
.deaths = inst.deaths,
|
||||
.addr = @ptrToInt(inst),
|
||||
});
|
||||
try instructions.append(new_inst);
|
||||
try inst_table.put(inst, new_inst);
|
||||
}
|
||||
|
||||
@ -694,6 +694,68 @@ pub fn addCases(ctx: *TestContext) !void {
|
||||
"",
|
||||
);
|
||||
|
||||
// Reusing the registers of dead operands playing nicely with conditional branching.
|
||||
case.addCompareOutput(
|
||||
\\export fn _start() noreturn {
|
||||
\\ assert(add(3, 4) == 791);
|
||||
\\ assert(add(4, 3) == 79);
|
||||
\\
|
||||
\\ exit();
|
||||
\\}
|
||||
\\
|
||||
\\fn add(a: u32, b: u32) u32 {
|
||||
\\ const x: u32 = if (a < b) blk: {
|
||||
\\ const c = a + b; // 7
|
||||
\\ const d = a + c; // 10
|
||||
\\ const e = d + b; // 14
|
||||
\\ const f = d + e; // 24
|
||||
\\ const g = e + f; // 38
|
||||
\\ const h = f + g; // 62
|
||||
\\ const i = g + h; // 100
|
||||
\\ const j = i + d; // 110
|
||||
\\ const k = i + j; // 210
|
||||
\\ const l = k + c; // 217
|
||||
\\ const m = l + d; // 227
|
||||
\\ const n = m + e; // 241
|
||||
\\ const o = n + f; // 265
|
||||
\\ const p = o + g; // 303
|
||||
\\ const q = p + h; // 365
|
||||
\\ const r = q + i; // 465
|
||||
\\ const s = r + j; // 575
|
||||
\\ const t = s + k; // 785
|
||||
\\ break :blk t;
|
||||
\\ } else blk: {
|
||||
\\ const t = b + b + a; // 10
|
||||
\\ const c = a + t; // 14
|
||||
\\ const d = c + t; // 24
|
||||
\\ const e = d + t; // 34
|
||||
\\ const f = e + t; // 44
|
||||
\\ const g = f + t; // 54
|
||||
\\ const h = c + g; // 68
|
||||
\\ break :blk h + b; // 71
|
||||
\\ };
|
||||
\\ const y = x + a; // 788, 75
|
||||
\\ const z = y + a; // 791, 79
|
||||
\\ return z;
|
||||
\\}
|
||||
\\
|
||||
\\pub fn assert(ok: bool) void {
|
||||
\\ if (!ok) unreachable; // assertion failure
|
||||
\\}
|
||||
\\
|
||||
\\fn exit() noreturn {
|
||||
\\ asm volatile ("syscall"
|
||||
\\ :
|
||||
\\ : [number] "{rax}" (231),
|
||||
\\ [arg1] "{rdi}" (0)
|
||||
\\ : "rcx", "r11", "memory"
|
||||
\\ );
|
||||
\\ unreachable;
|
||||
\\}
|
||||
,
|
||||
"",
|
||||
);
|
||||
|
||||
// Character literals and multiline strings.
|
||||
case.addCompareOutput(
|
||||
\\export fn _start() noreturn {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user