diff --git a/src/codegen.cpp b/src/codegen.cpp index a9542f96d9..200589cd2a 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -7184,6 +7184,9 @@ static void do_code_gen(CodeGen *g) { if (!is_async) { // allocate async frames for noasync calls & awaits to async functions + ZigType *largest_call_frame_type = nullptr; + IrInstruction *all_calls_alloca = ir_create_alloca(g, &fn_table_entry->fndef_scope->base, + fn_table_entry->body_node, fn_table_entry, g->builtin_types.entry_void, "@async_call_frame"); for (size_t i = 0; i < fn_table_entry->call_list.length; i += 1) { IrInstructionCallGen *call = fn_table_entry->call_list.at(i); if (call->fn_entry == nullptr) @@ -7195,8 +7198,15 @@ static void do_code_gen(CodeGen *g) { if (call->frame_result_loc != nullptr) continue; ZigType *callee_frame_type = get_fn_frame_type(g, call->fn_entry); - call->frame_result_loc = ir_create_alloca(g, call->base.scope, call->base.source_node, - fn_table_entry, callee_frame_type, ""); + if (largest_call_frame_type == nullptr || + callee_frame_type->abi_size > largest_call_frame_type->abi_size) + { + largest_call_frame_type = callee_frame_type; + } + call->frame_result_loc = all_calls_alloca; + } + if (largest_call_frame_type != nullptr) { + all_calls_alloca->value.type = get_pointer_to_type(g, largest_call_frame_type, false); } // allocate temporary stack data for (size_t alloca_i = 0; alloca_i < fn_table_entry->alloca_gen_list.length; alloca_i += 1) { diff --git a/src/link.cpp b/src/link.cpp index 1130481dce..b10220d5da 100644 --- a/src/link.cpp +++ b/src/link.cpp @@ -1615,6 +1615,11 @@ static void construct_linker_job_elf(LinkJob *lj) { lj->args.append("-error-limit=0"); + if (g->out_type == OutTypeExe) { + lj->args.append("-z"); + lj->args.append("stack-size=16777216"); // default to 16 MiB + } + if (g->linker_script) { lj->args.append("-T"); lj->args.append(g->linker_script); diff --git a/std/debug.zig b/std/debug.zig index 377e6e4845..9fd2c0ff53 100644 --- a/std/debug.zig +++ b/std/debug.zig @@ -1478,10 +1478,11 @@ const LineNumberProgram = struct { } }; +// TODO the noasyncs here are workarounds fn readStringRaw(allocator: *mem.Allocator, in_stream: var) ![]u8 { var buf = ArrayList(u8).init(allocator); while (true) { - const byte = try in_stream.readByte(); + const byte = try noasync in_stream.readByte(); if (byte == 0) break; try buf.append(byte); } @@ -1494,10 +1495,11 @@ fn getString(di: *DwarfInfo, offset: u64) ![]u8 { return di.readString(); } +// TODO the noasyncs here are workarounds fn readAllocBytes(allocator: *mem.Allocator, in_stream: var, size: usize) ![]u8 { const buf = try allocator.alloc(u8, size); errdefer allocator.free(buf); - if ((try in_stream.read(buf)) < size) return error.EndOfFile; + if ((try noasync in_stream.read(buf)) < size) return error.EndOfFile; return buf; } @@ -1506,8 +1508,9 @@ fn parseFormValueBlockLen(allocator: *mem.Allocator, in_stream: var, size: usize return FormValue{ .Block = buf }; } +// TODO the noasyncs here are workarounds fn parseFormValueBlock(allocator: *mem.Allocator, in_stream: var, size: usize) !FormValue { - const block_len = try in_stream.readVarInt(usize, builtin.Endian.Little, size); + const block_len = try noasync in_stream.readVarInt(usize, builtin.Endian.Little, size); return parseFormValueBlockLen(allocator, in_stream, block_len); } @@ -1537,27 +1540,37 @@ fn parseFormValueConstant(allocator: *mem.Allocator, in_stream: var, signed: boo }; } +// TODO the noasyncs here are workarounds fn parseFormValueDwarfOffsetSize(in_stream: var, is_64: bool) !u64 { - return if (is_64) try in_stream.readIntLittle(u64) else u64(try in_stream.readIntLittle(u32)); + return if (is_64) try noasync in_stream.readIntLittle(u64) else u64(try noasync in_stream.readIntLittle(u32)); } +// TODO the noasyncs here are workarounds fn parseFormValueTargetAddrSize(in_stream: var) !u64 { - return if (@sizeOf(usize) == 4) u64(try in_stream.readIntLittle(u32)) else if (@sizeOf(usize) == 8) try in_stream.readIntLittle(u64) else unreachable; + if (@sizeOf(usize) == 4) { + return u64(try noasync in_stream.readIntLittle(u32)); + } else if (@sizeOf(usize) == 8) { + return noasync in_stream.readIntLittle(u64); + } else { + unreachable; + } } +// TODO the noasyncs here are workarounds fn parseFormValueRef(allocator: *mem.Allocator, in_stream: var, size: i32) !FormValue { return FormValue{ .Ref = switch (size) { - 1 => try in_stream.readIntLittle(u8), - 2 => try in_stream.readIntLittle(u16), - 4 => try in_stream.readIntLittle(u32), - 8 => try in_stream.readIntLittle(u64), - -1 => try leb.readULEB128(u64, in_stream), + 1 => try noasync in_stream.readIntLittle(u8), + 2 => try noasync in_stream.readIntLittle(u16), + 4 => try noasync in_stream.readIntLittle(u32), + 8 => try noasync in_stream.readIntLittle(u64), + -1 => try noasync leb.readULEB128(u64, in_stream), else => unreachable, }, }; } +// TODO the noasyncs here are workarounds fn parseFormValue(allocator: *mem.Allocator, in_stream: var, form_id: u64, is_64: bool) anyerror!FormValue { return switch (form_id) { DW.FORM_addr => FormValue{ .Address = try parseFormValueTargetAddrSize(in_stream) }, @@ -1565,7 +1578,7 @@ fn parseFormValue(allocator: *mem.Allocator, in_stream: var, form_id: u64, is_64 DW.FORM_block2 => parseFormValueBlock(allocator, in_stream, 2), DW.FORM_block4 => parseFormValueBlock(allocator, in_stream, 4), DW.FORM_block => x: { - const block_len = try leb.readULEB128(usize, in_stream); + const block_len = try noasync leb.readULEB128(usize, in_stream); return parseFormValueBlockLen(allocator, in_stream, block_len); }, DW.FORM_data1 => parseFormValueConstant(allocator, in_stream, false, 1), @@ -1577,11 +1590,11 @@ fn parseFormValue(allocator: *mem.Allocator, in_stream: var, form_id: u64, is_64 return parseFormValueConstant(allocator, in_stream, signed, -1); }, DW.FORM_exprloc => { - const size = try leb.readULEB128(usize, in_stream); + const size = try noasync leb.readULEB128(usize, in_stream); const buf = try readAllocBytes(allocator, in_stream, size); return FormValue{ .ExprLoc = buf }; }, - DW.FORM_flag => FormValue{ .Flag = (try in_stream.readByte()) != 0 }, + DW.FORM_flag => FormValue{ .Flag = (try noasync in_stream.readByte()) != 0 }, DW.FORM_flag_present => FormValue{ .Flag = true }, DW.FORM_sec_offset => FormValue{ .SecOffset = try parseFormValueDwarfOffsetSize(in_stream, is_64) }, @@ -1592,12 +1605,12 @@ fn parseFormValue(allocator: *mem.Allocator, in_stream: var, form_id: u64, is_64 DW.FORM_ref_udata => parseFormValueRef(allocator, in_stream, -1), DW.FORM_ref_addr => FormValue{ .RefAddr = try parseFormValueDwarfOffsetSize(in_stream, is_64) }, - DW.FORM_ref_sig8 => FormValue{ .Ref = try in_stream.readIntLittle(u64) }, + DW.FORM_ref_sig8 => FormValue{ .Ref = try noasync in_stream.readIntLittle(u64) }, DW.FORM_string => FormValue{ .String = try readStringRaw(allocator, in_stream) }, DW.FORM_strp => FormValue{ .StrPtr = try parseFormValueDwarfOffsetSize(in_stream, is_64) }, DW.FORM_indirect => { - const child_form_id = try leb.readULEB128(u64, in_stream); + const child_form_id = try noasync leb.readULEB128(u64, in_stream); const F = @typeOf(async parseFormValue(allocator, in_stream, child_form_id, is_64)); var frame = try allocator.create(F); defer allocator.destroy(frame); @@ -2400,3 +2413,9 @@ stdcallcc fn handleSegfaultWindows(info: *windows.EXCEPTION_POINTERS) c_long { else => return windows.EXCEPTION_CONTINUE_SEARCH, } } + +pub fn dumpStackPointerAddr(prefix: []const u8) void { + const sp = asm ("" : [argc] "={rsp}" (-> usize)); + std.debug.warn("{} sp = 0x{x}\n", prefix, sp); +} + diff --git a/std/io/in_stream.zig b/std/io/in_stream.zig index c617f10500..44c74fcca4 100644 --- a/std/io/in_stream.zig +++ b/std/io/in_stream.zig @@ -6,7 +6,7 @@ const assert = std.debug.assert; const mem = std.mem; const Buffer = std.Buffer; -pub const default_stack_size = 4 * 1024 * 1024; +pub const default_stack_size = 1 * 1024 * 1024; pub const stack_size: usize = if (@hasDecl(root, "stack_size_std_io_InStream")) root.stack_size_std_io_InStream else diff --git a/std/os/linux/tls.zig b/std/os/linux/tls.zig index 62df870944..8afc751401 100644 --- a/std/os/linux/tls.zig +++ b/std/os/linux/tls.zig @@ -125,7 +125,7 @@ pub fn setThreadPointer(addr: usize) void { } } -pub fn initTLS() void { +pub fn initTLS() ?*elf.Phdr { var tls_phdr: ?*elf.Phdr = null; var img_base: usize = 0; @@ -152,10 +152,13 @@ pub fn initTLS() void { // Search the TLS section const phdrs = (@intToPtr([*]elf.Phdr, at_phdr))[0..at_phnum]; + var gnu_stack: ?*elf.Phdr = null; + for (phdrs) |*phdr| { switch (phdr.p_type) { elf.PT_PHDR => img_base = at_phdr - phdr.p_vaddr, elf.PT_TLS => tls_phdr = phdr, + elf.PT_GNU_STACK => gnu_stack = phdr, else => continue, } } @@ -217,6 +220,8 @@ pub fn initTLS() void { .data_offset = data_offset, }; } + + return gnu_stack; } pub fn copyTLS(addr: usize) usize { diff --git a/std/special/start.zig b/std/special/start.zig index fde79a4baf..31639821b4 100644 --- a/std/special/start.zig +++ b/std/special/start.zig @@ -5,7 +5,7 @@ const std = @import("std"); const builtin = @import("builtin"); const assert = std.debug.assert; -var argc_ptr: [*]usize = undefined; +var starting_stack_ptr: [*]usize = undefined; const is_wasm = switch (builtin.arch) { .wasm32, .wasm64 => true, @@ -35,17 +35,17 @@ nakedcc fn _start() noreturn { switch (builtin.arch) { .x86_64 => { - argc_ptr = asm ("" + starting_stack_ptr = asm ("" : [argc] "={rsp}" (-> [*]usize) ); }, .i386 => { - argc_ptr = asm ("" + starting_stack_ptr = asm ("" : [argc] "={esp}" (-> [*]usize) ); }, .aarch64, .aarch64_be, .arm => { - argc_ptr = asm ("mov %[argc], sp" + starting_stack_ptr = asm ("mov %[argc], sp" : [argc] "=r" (-> [*]usize) ); }, @@ -72,8 +72,8 @@ fn posixCallMainAndExit() noreturn { if (builtin.os == builtin.Os.freebsd) { @setAlignStack(16); } - const argc = argc_ptr[0]; - const argv = @ptrCast([*][*]u8, argc_ptr + 1); + const argc = starting_stack_ptr[0]; + const argv = @ptrCast([*][*]u8, starting_stack_ptr + 1); const envp_optional = @ptrCast([*]?[*]u8, argv + argc + 1); var envp_count: usize = 0; @@ -85,21 +85,40 @@ fn posixCallMainAndExit() noreturn { const auxv = @ptrCast([*]std.elf.Auxv, envp.ptr + envp_count + 1); std.os.linux.elf_aux_maybe = auxv; // Initialize the TLS area - std.os.linux.tls.initTLS(); + const gnu_stack_phdr = std.os.linux.tls.initTLS() orelse @panic("ELF missing stack size"); if (std.os.linux.tls.tls_image) |tls_img| { const tls_addr = std.os.linux.tls.allocateTLS(tls_img.alloc_size); const tp = std.os.linux.tls.copyTLS(tls_addr); std.os.linux.tls.setThreadPointer(tp); } + + // TODO This is disabled because what should we do when linking libc and this code + // does not execute? And also it's causing a test failure in stack traces in release modes. + + //// Linux ignores the stack size from the ELF file, and instead always does 8 MiB. A further + //// problem is that it uses PROT_GROWSDOWN which prevents stores to addresses too far down + //// the stack and requires "probing". So here we allocate our own stack. + //const wanted_stack_size = gnu_stack_phdr.p_memsz; + //assert(wanted_stack_size % std.mem.page_size == 0); + //// Allocate an extra page as the guard page. + //const total_size = wanted_stack_size + std.mem.page_size; + //const new_stack = std.os.mmap( + // null, + // total_size, + // std.os.PROT_READ | std.os.PROT_WRITE, + // std.os.MAP_PRIVATE | std.os.MAP_ANONYMOUS, + // -1, + // 0, + //) catch @panic("out of memory"); + //std.os.mprotect(new_stack[0..std.mem.page_size], std.os.PROT_NONE) catch {}; + //std.os.exit(@newStackCall(new_stack, callMainWithArgs, argc, argv, envp)); } - std.os.exit(callMainWithArgs(argc, argv, envp)); + std.os.exit(@inlineCall(callMainWithArgs, argc, argv, envp)); } -// This is marked inline because for some reason LLVM in release mode fails to inline it, -// and we want fewer call frames in stack traces. -inline fn callMainWithArgs(argc: usize, argv: [*][*]u8, envp: [][*]u8) u8 { +fn callMainWithArgs(argc: usize, argv: [*][*]u8, envp: [][*]u8) u8 { std.os.argv = argv[0..argc]; std.os.environ = envp; @@ -112,7 +131,7 @@ extern fn main(c_argc: i32, c_argv: [*][*]u8, c_envp: [*]?[*]u8) i32 { var env_count: usize = 0; while (c_envp[env_count] != null) : (env_count += 1) {} const envp = @ptrCast([*][*]u8, c_envp)[0..env_count]; - return callMainWithArgs(@intCast(usize, c_argc), c_argv, envp); + return @inlineCall(callMainWithArgs, @intCast(usize, c_argc), c_argv, envp); } // General error message for a malformed return type diff --git a/std/thread.zig b/std/thread.zig index abf2f1cae1..278fcc827c 100644 --- a/std/thread.zig +++ b/std/thread.zig @@ -145,7 +145,7 @@ pub const Thread = struct { if (builtin.single_threaded) @compileError("cannot spawn thread when building in single-threaded mode"); // TODO compile-time call graph analysis to determine stack upper bound // https://github.com/ziglang/zig/issues/157 - const default_stack_size = 8 * 1024 * 1024; + const default_stack_size = 16 * 1024 * 1024; const Context = @typeOf(context); comptime assert(@ArgType(@typeOf(startFn), 0) == Context);