Merge pull request #10664 from ziglang/stage2-x86_64-refactor-air-call

stage2: refactor how we preserve callee regs and how we pass args on the stack in x86_64 backend
This commit is contained in:
Jakub Konka 2022-01-23 00:01:12 +01:00 committed by GitHub
commit 081ce09575
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 194 additions and 245 deletions

View File

@ -43,7 +43,7 @@ err_msg: ?*ErrorMsg,
args: []MCValue,
ret_mcv: MCValue,
fn_type: Type,
arg_index: usize,
arg_index: u32,
src_loc: Module.SrcLoc,
stack_align: u32,
@ -61,8 +61,6 @@ end_di_column: u32,
/// which is a relative jump, based on the address following the reloc.
exitlude_jump_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .{},
stack_args_relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .{},
/// Whenever there is a runtime branch, we push a Branch onto this stack,
/// and pop it off when the runtime branch joins. This provides an "overlay"
/// of the table of mappings from instructions to `MCValue` from within the branch.
@ -119,9 +117,9 @@ pub const MCValue = union(enum) {
memory: u64,
/// The value is one of the stack variables.
/// If the type is a pointer, it means the pointer address is in the stack at this offset.
stack_offset: u32,
stack_offset: i32,
/// The value is a pointer to one of the stack variables (payload is stack offset).
ptr_stack_offset: u32,
ptr_stack_offset: i32,
/// The value is in the compare flags assuming an unsigned operation,
/// with this operator applied on top of it.
compare_flags_unsigned: math.CompareOperator,
@ -286,7 +284,6 @@ pub fn generate(
defer function.exitlude_jump_relocs.deinit(bin_file.allocator);
defer function.mir_instructions.deinit(bin_file.allocator);
defer function.mir_extra.deinit(bin_file.allocator);
defer function.stack_args_relocs.deinit(bin_file.allocator);
defer if (builtin.mode == .Debug) function.mir_to_air_map.deinit();
var call_info = function.resolveCallingConventionValues(fn_type) catch |err| switch (err) {
@ -378,13 +375,6 @@ pub fn addExtraAssumeCapacity(self: *Self, extra: anytype) u32 {
fn gen(self: *Self) InnerError!void {
const cc = self.fn_type.fnCallingConvention();
if (cc != .Naked) {
// push the callee_preserved_regs that were used
const backpatch_push_callee_preserved_regs_i = try self.addInst(.{
.tag = .push_regs_from_callee_preserved_regs,
.ops = undefined,
.data = .{ .regs_to_push_or_pop = undefined }, // to be backpatched
});
_ = try self.addInst(.{
.tag = .push,
.ops = (Mir.Ops{
@ -416,6 +406,15 @@ fn gen(self: *Self) InnerError!void {
.data = undefined,
});
// push the callee_preserved_regs that were used
const backpatch_push_callee_preserved_regs_i = try self.addInst(.{
.tag = .push_regs_from_callee_preserved_regs,
.ops = (Mir.Ops{
.reg1 = .rbp,
}).encode(),
.data = .{ .payload = undefined }, // to be backpatched
});
try self.genBody(self.air.getMainBody());
// TODO can single exitlude jump reloc be elided? What if it is not at the end of the code?
@ -429,6 +428,33 @@ fn gen(self: *Self) InnerError!void {
self.mir_instructions.items(.data)[jmp_reloc].inst = @intCast(u32, self.mir_instructions.len);
}
// calculate the data for callee_preserved_regs to be pushed and popped
const callee_preserved_regs_payload = blk: {
var data = Mir.RegsToPushOrPop{
.regs = 0,
.disp = mem.alignForwardGeneric(u32, self.next_stack_offset, 8),
};
inline for (callee_preserved_regs) |reg, i| {
if (self.register_manager.isRegAllocated(reg)) {
data.regs |= 1 << @intCast(u5, i);
self.max_end_stack += 8;
}
}
break :blk try self.addExtra(data);
};
const data = self.mir_instructions.items(.data);
// backpatch the push instruction
data[backpatch_push_callee_preserved_regs_i].payload = callee_preserved_regs_payload;
// pop the callee_preserved_regs
_ = try self.addInst(.{
.tag = .pop_regs_from_callee_preserved_regs,
.ops = (Mir.Ops{
.reg1 = .rbp,
}).encode(),
.data = .{ .payload = callee_preserved_regs_payload },
});
_ = try self.addInst(.{
.tag = .dbg_epilogue_begin,
.ops = undefined,
@ -450,34 +476,6 @@ fn gen(self: *Self) InnerError!void {
.data = undefined,
});
// calculate the data for callee_preserved_regs to be pushed and popped
var callee_preserved_regs_push_data: u32 = 0x0;
// TODO this is required on macOS since macOS actively checks for stack alignment
// at every extern call site. As far as I can tell, macOS accounts for the typical
// function prologue first 2 instructions of:
// ...
// push rbp
// mov rsp, rbp
// ...
// Thus we don't need to adjust the stack for the first push instruction. However,
// any subsequent push of values on the stack such as when preserving registers,
// needs to be taken into account here.
var stack_adjustment: u32 = 0;
inline for (callee_preserved_regs) |reg, i| {
if (self.register_manager.isRegAllocated(reg)) {
callee_preserved_regs_push_data |= 1 << @intCast(u5, i);
stack_adjustment += @divExact(reg.size(), 8);
}
}
const data = self.mir_instructions.items(.data);
// backpatch the push instruction
data[backpatch_push_callee_preserved_regs_i].regs_to_push_or_pop = callee_preserved_regs_push_data;
// pop the callee_preserved_regs
_ = try self.addInst(.{
.tag = .pop_regs_from_callee_preserved_regs,
.ops = undefined,
.data = .{ .regs_to_push_or_pop = callee_preserved_regs_push_data },
});
_ = try self.addInst(.{
.tag = .ret,
.ops = (Mir.Ops{
@ -487,37 +485,28 @@ fn gen(self: *Self) InnerError!void {
});
// Adjust the stack
const stack_end = self.max_end_stack;
if (stack_end > math.maxInt(i32) - stack_adjustment) {
if (self.max_end_stack > math.maxInt(i32)) {
return self.failSymbol("too much stack used in call parameters", .{});
}
// TODO we should reuse this mechanism to align the stack when calling any function even if
// we do not pass any args on the stack BUT we still push regs to stack with `push` inst.
const aligned_stack_end = @intCast(u32, mem.alignForward(stack_end, self.stack_align));
if (aligned_stack_end > 0 or (stack_adjustment > 0 and self.target.isDarwin())) {
const imm = if (self.target.isDarwin()) aligned_stack_end + stack_adjustment else aligned_stack_end;
const aligned_stack_end = @intCast(u32, mem.alignForward(self.max_end_stack, self.stack_align));
if (aligned_stack_end > 0) {
self.mir_instructions.set(backpatch_stack_sub, .{
.tag = .sub,
.ops = (Mir.Ops{
.reg1 = .rsp,
}).encode(),
.data = .{ .imm = imm },
.data = .{ .imm = aligned_stack_end },
});
self.mir_instructions.set(backpatch_stack_add, .{
.tag = .add,
.ops = (Mir.Ops{
.reg1 = .rsp,
}).encode(),
.data = .{ .imm = imm },
.data = .{ .imm = aligned_stack_end },
});
}
while (self.stack_args_relocs.popOrNull()) |index| {
// TODO like above, gotta figure out the alignment shenanigans for macOS, etc.
const adjustment = if (self.target.isDarwin()) 2 * stack_adjustment else stack_adjustment;
// +16 bytes to account for saved return address of the `call` instruction and
// `push rbp`.
self.mir_instructions.items(.data)[index].imm += adjustment + aligned_stack_end + 16;
}
} else {
_ = try self.addInst(.{
.tag = .dbg_prologue_end,
@ -808,7 +797,7 @@ fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue {
}
}
const stack_offset = try self.allocMem(inst, abi_size, abi_align);
return MCValue{ .stack_offset = stack_offset };
return MCValue{ .stack_offset = @intCast(i32, stack_offset) };
}
pub fn spillInstruction(self: *Self, reg: Register, inst: Air.Inst.Index) !void {
@ -854,12 +843,12 @@ fn copyToNewRegisterWithExceptions(
fn airAlloc(self: *Self, inst: Air.Inst.Index) !void {
const stack_offset = try self.allocMemPtr(inst);
return self.finishAir(inst, .{ .ptr_stack_offset = stack_offset }, .{ .none, .none, .none });
return self.finishAir(inst, .{ .ptr_stack_offset = @intCast(i32, stack_offset) }, .{ .none, .none, .none });
}
fn airRetPtr(self: *Self, inst: Air.Inst.Index) !void {
const stack_offset = try self.allocMemPtr(inst);
return self.finishAir(inst, .{ .ptr_stack_offset = stack_offset }, .{ .none, .none, .none });
return self.finishAir(inst, .{ .ptr_stack_offset = @intCast(i32, stack_offset) }, .{ .none, .none, .none });
}
fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void {
@ -1419,7 +1408,7 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void {
.reg1 = addr_reg.to64(),
.reg2 = .rbp,
}).encode(),
.data = .{ .imm = @bitCast(u32, -@intCast(i32, off + array_abi_size)) },
.data = .{ .imm = @bitCast(u32, -(off + @intCast(i32, array_abi_size))) },
});
},
else => return self.fail("TODO implement array_elem_val when array is {}", .{array}),
@ -1623,7 +1612,7 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo
try self.genSetReg(Type.initTag(.u32), count_reg, .{ .immediate = @intCast(u32, abi_size) });
return self.genInlineMemcpy(
@bitCast(u32, -@intCast(i32, off + abi_size)),
-(off + @intCast(i32, abi_size)),
.rbp,
registerAlias(addr_reg, @divExact(reg.size(), 8)),
count_reg.to64(),
@ -1780,10 +1769,10 @@ fn structFieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, inde
return if (self.liveness.isUnused(inst)) .dead else result: {
const mcv = try self.resolveInst(operand);
const struct_ty = self.air.typeOf(operand).childType();
const struct_size = @intCast(u32, struct_ty.abiSize(self.target.*));
const struct_field_offset = @intCast(u32, struct_ty.structFieldOffset(index, self.target.*));
const struct_size = @intCast(i32, struct_ty.abiSize(self.target.*));
const struct_field_offset = @intCast(i32, struct_ty.structFieldOffset(index, self.target.*));
const struct_field_ty = struct_ty.structFieldType(index);
const struct_field_size = @intCast(u32, struct_field_ty.abiSize(self.target.*));
const struct_field_size = @intCast(i32, struct_field_ty.abiSize(self.target.*));
switch (mcv) {
.ptr_stack_offset => |off| {
@ -1803,10 +1792,10 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: {
const mcv = try self.resolveInst(operand);
const struct_ty = self.air.typeOf(operand);
const struct_size = @intCast(u32, struct_ty.abiSize(self.target.*));
const struct_field_offset = @intCast(u32, struct_ty.structFieldOffset(index, self.target.*));
const struct_size = @intCast(i32, struct_ty.abiSize(self.target.*));
const struct_field_offset = @intCast(i32, struct_ty.structFieldOffset(index, self.target.*));
const struct_field_ty = struct_ty.structFieldType(index);
const struct_field_size = @intCast(u32, struct_field_ty.abiSize(self.target.*));
const struct_field_size = @intCast(i32, struct_field_ty.abiSize(self.target.*));
switch (mcv) {
.stack_offset => |off| {
@ -1970,7 +1959,7 @@ fn genBinMathOpMir(
return self.fail("stack offset too large", .{});
}
const abi_size = dst_ty.abiSize(self.target.*);
const adj_off = off + abi_size;
const adj_off = off + @intCast(i32, abi_size);
_ = try self.addInst(.{
.tag = mir_tag,
.ops = (Mir.Ops{
@ -1978,7 +1967,7 @@ fn genBinMathOpMir(
.reg2 = .rbp,
.flags = 0b01,
}).encode(),
.data = .{ .imm = @bitCast(u32, -@intCast(i32, adj_off)) },
.data = .{ .imm = @bitCast(u32, -adj_off) },
});
},
.compare_flags_unsigned => {
@ -1997,7 +1986,7 @@ fn genBinMathOpMir(
if (abi_size > 8) {
return self.fail("TODO implement ADD/SUB/CMP for stack dst with large ABI", .{});
}
const adj_off = off + abi_size;
const adj_off = off + @intCast(i32, abi_size);
switch (src_mcv) {
.none => unreachable,
@ -2013,7 +2002,7 @@ fn genBinMathOpMir(
.reg2 = registerAlias(src_reg, @intCast(u32, abi_size)),
.flags = 0b10,
}).encode(),
.data = .{ .imm = @bitCast(u32, -@intCast(i32, adj_off)) },
.data = .{ .imm = @bitCast(u32, -adj_off) },
});
},
.immediate => |imm| {
@ -2034,7 +2023,7 @@ fn genBinMathOpMir(
else => unreachable,
};
const payload = try self.addExtra(Mir.ImmPair{
.dest_off = @bitCast(u32, -@intCast(i32, adj_off)),
.dest_off = @bitCast(u32, -adj_off),
.operand = @truncate(u32, imm),
});
_ = try self.addInst(.{
@ -2172,7 +2161,7 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void {
const mcv = self.args[arg_index];
const payload = try self.addExtra(Mir.ArgDbgInfo{
.air_inst = inst,
.arg_index = @truncate(u32, arg_index), // TODO can arg_index: u32?
.arg_index = arg_index,
});
_ = try self.addInst(.{
.tag = .arg_dbg_info,
@ -2188,58 +2177,13 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void {
self.register_manager.getRegAssumeFree(reg.to64(), inst);
break :blk mcv;
},
.stack_offset => |off| {
.stack_offset => {
const ty = self.air.typeOfIndex(inst);
const abi_size = ty.abiSize(self.target.*);
if (abi_size <= 8) {
const reg = try self.register_manager.allocReg(inst, &.{});
const reloc = try self.addInst(.{
.tag = .mov,
.ops = (Mir.Ops{
.reg1 = registerAlias(reg, @intCast(u32, abi_size)),
.reg2 = .rsp,
.flags = 0b01,
}).encode(),
.data = .{ .imm = off },
});
try self.stack_args_relocs.append(self.bin_file.allocator, reloc);
break :blk .{ .register = reg };
}
// TODO copy ellision
const dst_mcv = try self.allocRegOrMem(inst, false);
const regs = try self.register_manager.allocRegs(3, .{ null, null, null }, &.{ .rax, .rcx });
const addr_reg = regs[0];
const count_reg = regs[1];
const tmp_reg = regs[2];
try self.register_manager.getReg(.rax, null);
try self.register_manager.getReg(.rcx, null);
const reloc = try self.addInst(.{
.tag = .lea,
.ops = (Mir.Ops{
.reg1 = addr_reg.to64(),
.reg2 = .rsp,
}).encode(),
.data = .{ .imm = off },
});
try self.stack_args_relocs.append(self.bin_file.allocator, reloc);
// TODO allow for abi_size to be u64
try self.genSetReg(Type.initTag(.u32), count_reg, .{ .immediate = @intCast(u32, abi_size) });
try self.genInlineMemcpy(
@bitCast(u32, -@intCast(i32, dst_mcv.stack_offset + abi_size)),
.rbp,
addr_reg.to64(),
count_reg.to64(),
tmp_reg.to8(),
);
break :blk dst_mcv;
const off = @intCast(i32, (arg_index + 1) * abi_size) + 16;
break :blk MCValue{ .stack_offset = -off };
},
else => unreachable,
else => return self.fail("TODO implement arg for {}", .{mcv}),
}
};
@ -2264,64 +2208,6 @@ fn airFence(self: *Self) !void {
//return self.finishAirBookkeeping();
}
fn genSetStackArg(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerError!void {
const abi_size = ty.abiSize(self.target.*);
switch (mcv) {
.dead => unreachable,
.ptr_embedded_in_code => unreachable,
.unreach, .none => return,
.register => |reg| {
_ = try self.addInst(.{
.tag = .mov,
.ops = (Mir.Ops{
.reg1 = .rsp,
.reg2 = registerAlias(reg, @intCast(u32, abi_size)),
.flags = 0b10,
}).encode(),
.data = .{ .imm = @bitCast(u32, -@intCast(i32, stack_offset + abi_size)) },
});
},
.ptr_stack_offset => {
const reg = try self.copyToTmpRegister(ty, mcv);
return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg });
},
.stack_offset => |unadjusted_off| {
if (abi_size <= 8) {
const reg = try self.copyToTmpRegister(ty, mcv);
return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg });
}
const regs = try self.register_manager.allocRegs(3, .{ null, null, null }, &.{ .rax, .rcx });
const addr_reg = regs[0];
const count_reg = regs[1];
const tmp_reg = regs[2];
try self.register_manager.getReg(.rax, null);
try self.register_manager.getReg(.rcx, null);
_ = try self.addInst(.{
.tag = .lea,
.ops = (Mir.Ops{
.reg1 = addr_reg.to64(),
.reg2 = .rbp,
}).encode(),
.data = .{ .imm = @bitCast(u32, -@intCast(i32, unadjusted_off + abi_size)) },
});
// TODO allow for abi_size to be u64
try self.genSetReg(Type.initTag(.u32), count_reg, .{ .immediate = @intCast(u32, abi_size) });
try self.genInlineMemcpy(
@bitCast(u32, -@intCast(i32, stack_offset + abi_size)),
.rsp,
addr_reg.to64(),
count_reg.to64(),
tmp_reg.to8(),
);
},
else => return self.fail("TODO implement args on stack for {}", .{mcv}),
}
}
fn airCall(self: *Self, inst: Air.Inst.Index) !void {
const pl_op = self.air.instructions.items(.data)[inst].pl_op;
const callee = pl_op.operand;
@ -2338,12 +2224,9 @@ fn airCall(self: *Self, inst: Air.Inst.Index) !void {
var info = try self.resolveCallingConventionValues(fn_ty);
defer info.deinit(self);
var count: usize = info.args.len;
var stack_adjustment: u32 = 0;
while (count > 0) : (count -= 1) {
const arg_i = count - 1;
for (args) |arg, arg_i| {
const mc_arg = info.args[arg_i];
const arg = args[arg_i];
const arg_ty = self.air.typeOf(arg);
const arg_mcv = try self.resolveInst(args[arg_i]);
// Here we do not use setRegOrMem even though the logic is similar, because
@ -2355,9 +2238,9 @@ fn airCall(self: *Self, inst: Air.Inst.Index) !void {
try self.genSetReg(arg_ty, reg, arg_mcv);
},
.stack_offset => |off| {
const abi_size = arg_ty.abiSize(self.target.*);
const abi_size = @intCast(u32, arg_ty.abiSize(self.target.*));
try self.genSetStackArg(arg_ty, off, arg_mcv);
stack_adjustment += @intCast(u32, abi_size);
stack_adjustment += abi_size;
},
.ptr_stack_offset => {
return self.fail("TODO implement calling with MCValue.ptr_stack_offset arg", .{});
@ -3269,7 +3152,65 @@ fn setRegOrMem(self: *Self, ty: Type, loc: MCValue, val: MCValue) !void {
}
}
fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerError!void {
fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerError!void {
const abi_size = ty.abiSize(self.target.*);
switch (mcv) {
.dead => unreachable,
.ptr_embedded_in_code => unreachable,
.unreach, .none => return,
.register => |reg| {
_ = try self.addInst(.{
.tag = .mov,
.ops = (Mir.Ops{
.reg1 = .rsp,
.reg2 = registerAlias(reg, @intCast(u32, abi_size)),
.flags = 0b10,
}).encode(),
.data = .{ .imm = @bitCast(u32, -(stack_offset + @intCast(i32, abi_size))) },
});
},
.ptr_stack_offset => {
const reg = try self.copyToTmpRegister(ty, mcv);
return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg });
},
.stack_offset => |unadjusted_off| {
if (abi_size <= 8) {
const reg = try self.copyToTmpRegister(ty, mcv);
return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg });
}
const regs = try self.register_manager.allocRegs(3, .{ null, null, null }, &.{ .rax, .rcx });
const addr_reg = regs[0];
const count_reg = regs[1];
const tmp_reg = regs[2];
try self.register_manager.getReg(.rax, null);
try self.register_manager.getReg(.rcx, null);
_ = try self.addInst(.{
.tag = .lea,
.ops = (Mir.Ops{
.reg1 = addr_reg.to64(),
.reg2 = .rbp,
}).encode(),
.data = .{ .imm = @bitCast(u32, -(unadjusted_off + @intCast(i32, abi_size))) },
});
// TODO allow for abi_size to be u64
try self.genSetReg(Type.initTag(.u32), count_reg, .{ .immediate = @intCast(u32, abi_size) });
try self.genInlineMemcpy(
-(stack_offset + @intCast(i32, abi_size)),
.rsp,
addr_reg.to64(),
count_reg.to64(),
tmp_reg.to8(),
);
},
else => return self.fail("TODO implement args on stack for {}", .{mcv}),
}
}
fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerError!void {
switch (mcv) {
.dead => unreachable,
.ptr_embedded_in_code => unreachable,
@ -3296,7 +3237,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
},
.immediate => |x_big| {
const abi_size = ty.abiSize(self.target.*);
const adj_off = stack_offset + abi_size;
const adj_off = stack_offset + @intCast(i32, abi_size);
if (adj_off > 128) {
return self.fail("TODO implement set stack variable with large stack offset", .{});
}
@ -3306,7 +3247,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
// offset from rbp, which is at the top of the stack frame.
// mov [rbp+offset], immediate
const payload = try self.addExtra(Mir.ImmPair{
.dest_off = @bitCast(u32, -@intCast(i32, adj_off)),
.dest_off = @bitCast(u32, -adj_off),
.operand = @truncate(u32, x_big),
});
_ = try self.addInst(.{
@ -3326,7 +3267,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
8 => {
// We have a positive stack offset value but we want a twos complement negative
// offset from rbp, which is at the top of the stack frame.
const negative_offset = -@intCast(i32, adj_off);
const negative_offset = -adj_off;
// 64 bit write to memory would take two mov's anyways so we
// insted just use two 32 bit writes to avoid register allocation
@ -3369,7 +3310,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
return self.fail("stack offset too large", .{});
}
const abi_size = ty.abiSize(self.target.*);
const adj_off = stack_offset + abi_size;
const adj_off = stack_offset + @intCast(i32, abi_size);
_ = try self.addInst(.{
.tag = .mov,
.ops = (Mir.Ops{
@ -3377,7 +3318,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
.reg2 = registerAlias(reg, @intCast(u32, abi_size)),
.flags = 0b10,
}).encode(),
.data = .{ .imm = @bitCast(u32, -@intCast(i32, adj_off)) },
.data = .{ .imm = @bitCast(u32, -adj_off) },
});
},
.memory, .embedded_in_code => {
@ -3403,7 +3344,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
return self.genSetStack(ty, stack_offset, MCValue{ .register = reg });
}
const regs = try self.register_manager.allocRegs(3, .{ null, null, null }, &.{ .rax, .rcx });
const regs = try self.register_manager.allocRegs(3, .{ null, null, null }, &.{ .rax, .rcx, .rbp });
const addr_reg = regs[0];
const count_reg = regs[1];
const tmp_reg = regs[2];
@ -3417,14 +3358,14 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
.reg1 = addr_reg.to64(),
.reg2 = .rbp,
}).encode(),
.data = .{ .imm = @bitCast(u32, -@intCast(i32, off + abi_size)) },
.data = .{ .imm = @bitCast(u32, -(off + @intCast(i32, abi_size))) },
});
// TODO allow for abi_size to be u64
try self.genSetReg(Type.initTag(.u32), count_reg, .{ .immediate = @intCast(u32, abi_size) });
return self.genInlineMemcpy(
@bitCast(u32, -@intCast(i32, stack_offset + abi_size)),
-(stack_offset + @intCast(i32, abi_size)),
.rbp,
addr_reg.to64(),
count_reg.to64(),
@ -3436,7 +3377,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
fn genInlineMemcpy(
self: *Self,
stack_offset: u32,
stack_offset: i32,
stack_reg: Register,
addr_reg: Register,
count_reg: Register,
@ -3494,7 +3435,7 @@ fn genInlineMemcpy(
.reg1 = stack_reg,
.reg2 = tmp_reg.to8(),
}).encode(),
.data = .{ .imm = stack_offset },
.data = .{ .imm = @bitCast(u32, stack_offset) },
});
// add rcx, 1
@ -3535,14 +3476,14 @@ fn genInlineMemcpy(
try self.performReloc(loop_reloc);
}
fn genInlineMemset(self: *Self, ty: Type, stack_offset: u32, value: MCValue) InnerError!void {
fn genInlineMemset(self: *Self, ty: Type, stack_offset: i32, value: MCValue) InnerError!void {
try self.register_manager.getReg(.rax, null);
const abi_size = ty.abiSize(self.target.*);
const adj_off = stack_offset + abi_size;
const adj_off = stack_offset + @intCast(i32, abi_size);
if (adj_off > 128) {
return self.fail("TODO inline memset with large stack offset", .{});
}
const negative_offset = @bitCast(u32, -@intCast(i32, adj_off));
const negative_offset = @bitCast(u32, -adj_off);
// We are actually counting `abi_size` bytes; however, we reuse the index register
// as both the counter and offset scaler, hence we need to subtract one from `abi_size`
@ -3633,7 +3574,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
const ptr_abi_size = ty.abiSize(self.target.*);
const elem_ty = ty.childType();
const elem_abi_size = elem_ty.abiSize(self.target.*);
const off = unadjusted_off + elem_abi_size;
const off = unadjusted_off + @intCast(i32, elem_abi_size);
if (off < std.math.minInt(i32) or off > std.math.maxInt(i32)) {
return self.fail("stack offset too large", .{});
}
@ -3643,7 +3584,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
.reg1 = registerAlias(reg, @intCast(u32, ptr_abi_size)),
.reg2 = .rbp,
}).encode(),
.data = .{ .imm = @bitCast(u32, -@intCast(i32, off)) },
.data = .{ .imm = @bitCast(u32, -off) },
});
},
.ptr_embedded_in_code => unreachable,
@ -3830,7 +3771,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
},
.stack_offset => |unadjusted_off| {
const abi_size = ty.abiSize(self.target.*);
const off = unadjusted_off + abi_size;
const off = unadjusted_off + @intCast(i32, abi_size);
if (off < std.math.minInt(i32) or off > std.math.maxInt(i32)) {
return self.fail("stack offset too large", .{});
}
@ -3841,7 +3782,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
.reg2 = .rbp,
.flags = 0b01,
}).encode(),
.data = .{ .imm = @bitCast(u32, -@intCast(i32, off)) },
.data = .{ .imm = @bitCast(u32, -off) },
});
},
}
@ -3866,7 +3807,7 @@ fn airArrayToSlice(self: *Self, inst: Air.Inst.Index) !void {
const array_ty = ptr_ty.childType();
const array_len = array_ty.arrayLenIncludingSentinel();
const result: MCValue = if (self.liveness.isUnused(inst)) .dead else blk: {
const stack_offset = try self.allocMem(inst, 16, 16);
const stack_offset = @intCast(i32, try self.allocMem(inst, 16, 16));
try self.genSetStack(ptr_ty, stack_offset + 8, ptr);
try self.genSetStack(Type.initTag(.u64), stack_offset, .{ .immediate = array_len });
break :blk .{ .stack_offset = stack_offset };
@ -4247,6 +4188,7 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
var next_stack_offset: u32 = 0;
var count: usize = param_types.len;
while (count > 0) : (count -= 1) {
// for (param_types) |ty, i| {
const i = count - 1;
const ty = param_types[i];
if (!ty.hasCodeGenBits()) {
@ -4265,7 +4207,7 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues {
// such as ptr and len of slices as separate registers.
// TODO: also we need to honor the C ABI for relevant types rather than passing on
// the stack here.
result.args[i] = .{ .stack_offset = next_stack_offset };
result.args[i] = .{ .stack_offset = @intCast(i32, next_stack_offset) };
next_stack_offset += param_size;
}
}

View File

@ -251,23 +251,25 @@ fn mirPushPop(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
}
}
fn mirPushPopRegsFromCalleePreservedRegs(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void {
const callee_preserved_regs = bits.callee_preserved_regs;
const regs = emit.mir.instructions.items(.data)[inst].regs_to_push_or_pop;
if (tag == .push) {
for (callee_preserved_regs) |reg, i| {
if ((regs >> @intCast(u5, i)) & 1 == 0) continue;
lowerToOEnc(.push, reg, emit.code) catch |err|
return emit.failWithLoweringError(err);
}
} else {
// pop in the reverse direction
var i = callee_preserved_regs.len;
while (i > 0) : (i -= 1) {
const reg = callee_preserved_regs[i - 1];
if ((regs >> @intCast(u5, i - 1)) & 1 == 0) continue;
lowerToOEnc(.pop, reg, emit.code) catch |err|
return emit.failWithLoweringError(err);
const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
const payload = emit.mir.instructions.items(.data)[inst].payload;
const data = emit.mir.extraData(Mir.RegsToPushOrPop, payload).data;
const regs = data.regs;
var disp: u32 = data.disp + 8;
for (bits.callee_preserved_regs) |reg, i| {
if ((regs >> @intCast(u5, i)) & 1 == 0) continue;
if (tag == .push) {
lowerToMrEnc(.mov, RegisterOrMemory.mem(.qword_ptr, .{
.disp = @bitCast(u32, -@intCast(i32, disp)),
.base = ops.reg1,
}), reg.to64(), emit.code) catch |err| return emit.failWithLoweringError(err);
} else {
lowerToRmEnc(.mov, reg.to64(), RegisterOrMemory.mem(.qword_ptr, .{
.disp = @bitCast(u32, -@intCast(i32, disp)),
.base = ops.reg1,
}), emit.code) catch |err| return emit.failWithLoweringError(err);
}
disp += 8;
}
}
@ -1603,7 +1605,7 @@ fn lowerToRmEnc(
if (reg.size() != src_reg.size()) {
return error.OperandSizeMismatch;
}
const encoder = try Encoder.init(code, 3);
const encoder = try Encoder.init(code, 4);
encoder.rex(.{
.w = setRexWRegister(reg) or setRexWRegister(src_reg),
.r = reg.isExtended(),

View File

@ -333,8 +333,6 @@ pub const Inst = struct {
got_entry: u32,
/// Index into `extra`. Meaning of what can be found there is context-dependent.
payload: u32,
/// A bitfield of which callee_preserved_regs to push
regs_to_push_or_pop: u32,
};
// Make sure we don't accidentally make instructions bigger than expected.
@ -346,6 +344,11 @@ pub const Inst = struct {
}
};
pub const RegsToPushOrPop = struct {
regs: u32,
disp: u32,
};
pub const ImmPair = struct {
dest_off: u32,
operand: u32,

View File

@ -180,26 +180,28 @@ fn mirPushPop(print: *const Print, tag: Mir.Inst.Tag, inst: Mir.Inst.Index, w: a
try w.writeByte('\n');
}
fn mirPushPopRegsFromCalleePreservedRegs(print: *const Print, tag: Mir.Inst.Tag, inst: Mir.Inst.Index, w: anytype) !void {
const callee_preserved_regs = bits.callee_preserved_regs;
// PUSH/POP reg
const regs = print.mir.instructions.items(.data)[inst].regs_to_push_or_pop;
if (regs == 0) return w.writeAll("push/pop no regs from callee_preserved_regs\n");
if (tag == .push) {
try w.writeAll("push ");
for (callee_preserved_regs) |reg, i| {
if ((regs >> @intCast(u5, i)) & 1 == 0) continue;
try w.print("{s}, ", .{@tagName(reg)});
}
} else {
// pop in the reverse direction
var i = callee_preserved_regs.len;
try w.writeAll("pop ");
while (i > 0) : (i -= 1) {
if ((regs >> @intCast(u5, i - 1)) & 1 == 0) continue;
const reg = callee_preserved_regs[i - 1];
try w.print("{s}, ", .{@tagName(reg)});
const ops = Mir.Ops.decode(print.mir.instructions.items(.ops)[inst]);
const payload = print.mir.instructions.items(.data)[inst].payload;
const data = print.mir.extraData(Mir.RegsToPushOrPop, payload).data;
const regs = data.regs;
var disp: u32 = data.disp + 8;
if (regs == 0) return w.writeAll("no regs from callee_preserved_regs\n");
for (bits.callee_preserved_regs) |reg, i| {
if ((regs >> @intCast(u5, i)) & 1 == 0) continue;
if (tag == .push) {
try w.print("mov qword ptr [{s} + {d}], {s}", .{
@tagName(ops.reg1),
@bitCast(u32, -@intCast(i32, disp)),
@tagName(reg.to64()),
});
} else {
try w.print("mov {s}, qword ptr [{s} + {d}]", .{
@tagName(reg.to64()),
@tagName(ops.reg1),
@bitCast(u32, -@intCast(i32, disp)),
});
}
disp += 8;
}
try w.writeByte('\n');
}

View File

@ -2118,7 +2118,7 @@ fn allocateTextBlock(self: *Elf, block_list: *TextBlockList, text_block: *TextBl
const sym = self.local_symbols.items[big_block.local_sym_index];
const capacity = big_block.capacity(self.*);
const ideal_capacity = padToIdeal(capacity);
const ideal_capacity_end_vaddr = sym.st_value + ideal_capacity;
const ideal_capacity_end_vaddr = std.math.add(u64, sym.st_value, ideal_capacity) catch ideal_capacity;
const capacity_end_vaddr = sym.st_value + capacity;
const new_start_vaddr_unaligned = capacity_end_vaddr - new_block_ideal_capacity;
const new_start_vaddr = mem.alignBackwardGeneric(u64, new_start_vaddr_unaligned, alignment);

View File

@ -5064,7 +5064,7 @@ fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, m
const sym = self.locals.items[big_atom.local_sym_index];
const capacity = big_atom.capacity(self.*);
const ideal_capacity = if (needs_padding) padToIdeal(capacity) else capacity;
const ideal_capacity_end_vaddr = sym.n_value + ideal_capacity;
const ideal_capacity_end_vaddr = math.add(u64, sym.n_value, ideal_capacity) catch ideal_capacity;
const capacity_end_vaddr = sym.n_value + capacity;
const new_start_vaddr_unaligned = capacity_end_vaddr - new_atom_ideal_capacity;
const new_start_vaddr = mem.alignBackwardGeneric(u64, new_start_vaddr_unaligned, alignment);