x86_64: implement stack probing

This commit is contained in:
Jacob Young 2023-05-12 02:11:37 -04:00
parent 3681da25f8
commit f83ebd8e6c
4 changed files with 163 additions and 11 deletions

View File

@ -1550,7 +1550,9 @@ fn gen(self: *Self) InnerError!void {
const backpatch_push_callee_preserved_regs = try self.asmPlaceholder();
try self.asmRegisterRegister(.{ ._, .mov }, .rbp, .rsp);
const backpatch_frame_align = try self.asmPlaceholder();
const backpatch_frame_align_extra = try self.asmPlaceholder();
const backpatch_stack_alloc = try self.asmPlaceholder();
const backpatch_stack_alloc_extra = try self.asmPlaceholder();
switch (self.ret_mcv.long) {
.none, .unreach => {},
@ -1599,24 +1601,67 @@ fn gen(self: *Self) InnerError!void {
const need_stack_adjust = frame_layout.stack_adjust > 0;
const need_save_reg = frame_layout.save_reg_list.count() > 0;
if (need_frame_align) {
const page_align = @as(u32, math.maxInt(u32)) << 12;
self.mir_instructions.set(backpatch_frame_align, .{
.tag = .@"and",
.ops = .ri_s,
.data = .{ .ri = .{
.r1 = .rsp,
.i = frame_layout.stack_mask,
.i = @max(frame_layout.stack_mask, page_align),
} },
});
if (frame_layout.stack_mask < page_align) {
self.mir_instructions.set(backpatch_frame_align_extra, .{
.tag = .pseudo,
.ops = .pseudo_probe_align_ri_s,
.data = .{ .ri = .{
.r1 = .rsp,
.i = ~frame_layout.stack_mask & page_align,
} },
});
}
}
if (need_stack_adjust) {
self.mir_instructions.set(backpatch_stack_alloc, .{
.tag = .sub,
.ops = .ri_s,
.data = .{ .ri = .{
.r1 = .rsp,
.i = frame_layout.stack_adjust,
} },
});
const page_size: u32 = 1 << 12;
if (frame_layout.stack_adjust <= page_size) {
self.mir_instructions.set(backpatch_stack_alloc, .{
.tag = .sub,
.ops = .ri_s,
.data = .{ .ri = .{
.r1 = .rsp,
.i = frame_layout.stack_adjust,
} },
});
} else if (frame_layout.stack_adjust <
page_size * Lower.pseudo_probe_adjust_unrolled_max_insts)
{
self.mir_instructions.set(backpatch_stack_alloc, .{
.tag = .pseudo,
.ops = .pseudo_probe_adjust_unrolled_ri_s,
.data = .{ .ri = .{
.r1 = .rsp,
.i = frame_layout.stack_adjust,
} },
});
} else {
self.mir_instructions.set(backpatch_stack_alloc, .{
.tag = .pseudo,
.ops = .pseudo_probe_adjust_setup_rri_s,
.data = .{ .rri = .{
.r1 = .rsp,
.r2 = .rax,
.i = frame_layout.stack_adjust,
} },
});
self.mir_instructions.set(backpatch_stack_alloc_extra, .{
.tag = .pseudo,
.ops = .pseudo_probe_adjust_loop_rr,
.data = .{ .rr = .{
.r1 = .rsp,
.r2 = .rax,
} },
});
}
}
if (need_frame_align or need_stack_adjust) {
self.mir_instructions.set(backpatch_stack_dealloc, .{

View File

@ -9,19 +9,33 @@ result_insts_len: u8 = undefined,
result_relocs_len: u8 = undefined,
result_insts: [
std.mem.max(usize, &.{
1, // non-pseudo instructions
2, // cmovcc: cmovcc \ cmovcc
3, // setcc: setcc \ setcc \ logicop
2, // jcc: jcc \ jcc
pseudo_probe_align_insts,
pseudo_probe_adjust_unrolled_max_insts,
pseudo_probe_adjust_setup_insts,
pseudo_probe_adjust_loop_insts,
abi.Win64.callee_preserved_regs.len, // push_regs/pop_regs
abi.SysV.callee_preserved_regs.len, // push_regs/pop_regs
})
]Instruction = undefined,
result_relocs: [
std.mem.max(usize, &.{
1, // jmp/jcc/call/mov/lea: jmp/jcc/call/mov/lea
2, // jcc: jcc \ jcc
2, // test \ jcc \ probe \ sub \ jmp
1, // probe \ sub \ jcc
})
]Reloc = undefined,
pub const pseudo_probe_align_insts = 5; // test \ jcc \ probe \ sub \ jmp
pub const pseudo_probe_adjust_unrolled_max_insts =
pseudo_probe_adjust_setup_insts + pseudo_probe_adjust_loop_insts;
pub const pseudo_probe_adjust_setup_insts = 2; // mov \ sub
pub const pseudo_probe_adjust_loop_insts = 3; // probe \ sub \ jcc
pub const Error = error{
OutOfMemory,
LowerFail,
@ -62,6 +76,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
else => try lower.generic(inst),
.pseudo => switch (inst.ops) {
.pseudo_cmov_z_and_np_rr => {
assert(inst.data.rr.fixes == ._);
try lower.emit(.none, .cmovnz, &.{
.{ .reg = inst.data.rr.r2 },
.{ .reg = inst.data.rr.r1 },
@ -72,6 +87,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
});
},
.pseudo_cmov_nz_or_p_rr => {
assert(inst.data.rr.fixes == ._);
try lower.emit(.none, .cmovnz, &.{
.{ .reg = inst.data.rr.r1 },
.{ .reg = inst.data.rr.r2 },
@ -84,6 +100,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.pseudo_cmov_nz_or_p_rm_sib,
.pseudo_cmov_nz_or_p_rm_rip,
=> {
assert(inst.data.rx.fixes == ._);
try lower.emit(.none, .cmovnz, &.{
.{ .reg = inst.data.rx.r1 },
.{ .mem = lower.mem(inst.ops, inst.data.rx.payload) },
@ -94,6 +111,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
});
},
.pseudo_set_z_and_np_r => {
assert(inst.data.rr.fixes == ._);
try lower.emit(.none, .setz, &.{
.{ .reg = inst.data.rr.r1 },
});
@ -108,6 +126,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.pseudo_set_z_and_np_m_sib,
.pseudo_set_z_and_np_m_rip,
=> {
assert(inst.data.rx.fixes == ._);
try lower.emit(.none, .setz, &.{
.{ .mem = lower.mem(inst.ops, inst.data.rx.payload) },
});
@ -120,6 +139,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
});
},
.pseudo_set_nz_or_p_r => {
assert(inst.data.rr.fixes == ._);
try lower.emit(.none, .setnz, &.{
.{ .reg = inst.data.rr.r1 },
});
@ -134,6 +154,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.pseudo_set_nz_or_p_m_sib,
.pseudo_set_nz_or_p_m_rip,
=> {
assert(inst.data.rx.fixes == ._);
try lower.emit(.none, .setnz, &.{
.{ .mem = lower.mem(inst.ops, inst.data.rx.payload) },
});
@ -146,6 +167,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
});
},
.pseudo_j_z_and_np_inst => {
assert(inst.data.inst.fixes == ._);
try lower.emit(.none, .jnz, &.{
.{ .imm = lower.reloc(.{ .inst = index + 1 }) },
});
@ -154,6 +176,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
});
},
.pseudo_j_nz_or_p_inst => {
assert(inst.data.inst.fixes == ._);
try lower.emit(.none, .jnz, &.{
.{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }) },
});
@ -162,6 +185,78 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
});
},
.pseudo_probe_align_ri_s => {
try lower.emit(.none, .@"test", &.{
.{ .reg = inst.data.ri.r1 },
.{ .imm = Immediate.s(@bitCast(i32, inst.data.ri.i)) },
});
try lower.emit(.none, .jz, &.{
.{ .imm = lower.reloc(.{ .inst = index + 1 }) },
});
try lower.emit(.none, .lea, &.{
.{ .reg = inst.data.ri.r1 },
.{ .mem = Memory.sib(.qword, .{
.base = .{ .reg = inst.data.ri.r1 },
.disp = -page_size,
}) },
});
try lower.emit(.none, .@"test", &.{
.{ .mem = Memory.sib(.dword, .{
.base = .{ .reg = inst.data.ri.r1 },
}) },
.{ .reg = inst.data.ri.r1.to32() },
});
try lower.emit(.none, .jmp, &.{
.{ .imm = lower.reloc(.{ .inst = index }) },
});
assert(lower.result_insts_len == pseudo_probe_align_insts);
},
.pseudo_probe_adjust_unrolled_ri_s => {
var offset = page_size;
while (offset < @bitCast(i32, inst.data.ri.i)) : (offset += page_size) {
try lower.emit(.none, .@"test", &.{
.{ .mem = Memory.sib(.dword, .{
.base = .{ .reg = inst.data.ri.r1 },
.disp = -offset,
}) },
.{ .reg = inst.data.ri.r1.to32() },
});
}
try lower.emit(.none, .sub, &.{
.{ .reg = inst.data.ri.r1 },
.{ .imm = Immediate.s(@bitCast(i32, inst.data.ri.i)) },
});
assert(lower.result_insts_len <= pseudo_probe_adjust_unrolled_max_insts);
},
.pseudo_probe_adjust_setup_rri_s => {
try lower.emit(.none, .mov, &.{
.{ .reg = inst.data.rri.r2.to32() },
.{ .imm = Immediate.s(@bitCast(i32, inst.data.rri.i)) },
});
try lower.emit(.none, .sub, &.{
.{ .reg = inst.data.rri.r1 },
.{ .reg = inst.data.rri.r2 },
});
assert(lower.result_insts_len == pseudo_probe_adjust_setup_insts);
},
.pseudo_probe_adjust_loop_rr => {
try lower.emit(.none, .@"test", &.{
.{ .mem = Memory.sib(.dword, .{
.base = .{ .reg = inst.data.rr.r1 },
.scale_index = .{ .scale = 1, .index = inst.data.rr.r2 },
.disp = -page_size,
}) },
.{ .reg = inst.data.rr.r1.to32() },
});
try lower.emit(.none, .sub, &.{
.{ .reg = inst.data.rr.r2 },
.{ .imm = Immediate.s(page_size) },
});
try lower.emit(.none, .jae, &.{
.{ .imm = lower.reloc(.{ .inst = index }) },
});
assert(lower.result_insts_len == pseudo_probe_adjust_loop_insts);
},
.pseudo_push_reg_list => try lower.pushPopRegList(.push, inst),
.pseudo_pop_reg_list => try lower.pushPopRegList(.pop, inst),
@ -440,6 +535,8 @@ fn pushPopRegList(lower: *Lower, comptime mnemonic: Mnemonic, inst: Mir.Inst) Er
}});
}
const page_size: i32 = 1 << 12;
const abi = @import("abi.zig");
const assert = std.debug.assert;
const bits = @import("bits.zig");

View File

@ -740,6 +740,18 @@ pub const Inst = struct {
/// Uses `inst` payload.
pseudo_j_nz_or_p_inst,
/// Probe alignment
/// Uses `ri` payload
pseudo_probe_align_ri_s,
/// Probe adjust unrolled
/// Uses `ri` payload
pseudo_probe_adjust_unrolled_ri_s,
/// Probe adjust setup
/// Uses `rri` payload
pseudo_probe_adjust_setup_rri_s,
/// Probe adjust loop
/// Uses `rr` payload
pseudo_probe_adjust_loop_rr,
/// Push registers
/// Uses `reg_list` payload.
pseudo_push_reg_list,

View File

@ -120,7 +120,6 @@ test "memset with large array element, runtime known" {
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.os.tag == .windows) return error.SkipZigTest;
const A = [128]u64;
var buf: [5]A = undefined;
@ -139,7 +138,6 @@ test "memset with large array element, comptime known" {
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.os.tag == .windows) return error.SkipZigTest;
const A = [128]u64;
var buf: [5]A = undefined;