diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 4c5b3f3121..9c22ee7f12 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -623,11 +623,15 @@ pub const StackIterator = struct { const module = try self.debug_info.?.getModuleForAddress(self.dwarf_context.pc); switch (native_os) { .macos, .ios, .watchos, .tvos => { - const o_file_info = try module.getOFileInfoForAddress(self.debug_info.?.allocator, self.dwarf_context.pc); - if (o_file_info.unwind_info == null) return error.MissingUnwindInfo; - - // TODO: Unwind using __unwind_info, - unreachable; + // __unwind_info is a requirement for unwinding on Darwin. It may fall back to DWARF, but unwinding + // via DWARF before attempting to use the compact unwind info will produce incorrect results. + if (module.unwind_info) |unwind_info| { + if (macho.unwindFrame(&self.dwarf_context, unwind_info, module.base_address)) |return_address| { + return return_address; + } else |err| { + if (err != error.RequiresDWARFUnwind) return err; + } + } else return error.MissingUnwindInfo; }, else => {}, } @@ -1236,7 +1240,16 @@ fn readMachODebugInfo(allocator: mem.Allocator, macho_file: File) !ModuleDebugIn .ncmds = hdr.ncmds, .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], }; + var unwind_info: ?[]const u8 = null; const symtab = while (it.next()) |cmd| switch (cmd.cmd()) { + .SEGMENT_64 => { + for (cmd.getSections()) |sect| { + if (std.mem.eql(u8, "__TEXT", sect.segName()) and mem.eql(u8, "__unwind_info", sect.sectName())) { + unwind_info = try chopSlice(mapped_mem, sect.offset, sect.size); + break; + } + } + }, .SYMTAB => break cmd.cast(macho.symtab_command).?, else => {}, } else return error.MissingDebugInfo; @@ -1346,6 +1359,7 @@ fn readMachODebugInfo(allocator: mem.Allocator, macho_file: File) !ModuleDebugIn .ofiles = ModuleDebugInfo.OFileTable.init(allocator), .symbols = symbols, .strings = strings, + .unwind_info = unwind_info, }; } @@ -1886,12 +1900,13 @@ pub const ModuleDebugInfo = switch (native_os) { symbols: []const MachoSymbol, strings: [:0]const u8, ofiles: OFileTable, + // Backed by mapped_memory + unwind_info: ?[]const u8, const OFileTable = std.StringHashMap(OFileInfo); const OFileInfo = struct { di: DW.DwarfInfo, addr_table: std.StringHashMap(u64), - unwind_info: ?[]const u8, }; fn deinit(self: *@This(), allocator: mem.Allocator) void { @@ -1949,24 +1964,21 @@ pub const ModuleDebugInfo = switch (native_os) { addr_table.putAssumeCapacityNoClobber(sym_name, sym.n_value); } - var unwind_info: ?[]const u8 = null; var sections: DW.DwarfInfo.SectionArray = DW.DwarfInfo.null_section_array; for (segcmd.?.getSections()) |sect| { - if (std.mem.eql(u8, "__TEXT", sect.segName()) and mem.eql(u8, "__unwind_info", sect.sectName())) { - unwind_info = try chopSlice(mapped_mem, sect.offset, sect.size); - } else if (std.mem.eql(u8, "__DWARF", sect.segName())) { - var section_index: ?usize = null; - inline for (@typeInfo(DW.DwarfSection).Enum.fields, 0..) |section, i| { - if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i; - } - if (section_index == null) continue; + if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; - const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size); - sections[section_index.?] = .{ - .data = section_bytes, - .owned = false, - }; + var section_index: ?usize = null; + inline for (@typeInfo(DW.DwarfSection).Enum.fields, 0..) |section, i| { + if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i; } + if (section_index == null) continue; + + const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size); + sections[section_index.?] = .{ + .data = section_bytes, + .owned = false, + }; } const missing_debug_info = @@ -1986,7 +1998,6 @@ pub const ModuleDebugInfo = switch (native_os) { var info = OFileInfo{ .di = di, .addr_table = addr_table, - .unwind_info = unwind_info, }; // Add the debug info to the cache diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig index b284ac4443..e51b883a99 100644 --- a/lib/std/dwarf.zig +++ b/lib/std/dwarf.zig @@ -1641,7 +1641,6 @@ pub const DwarfInfo = struct { // instead of the actual base address of the module. When using .eh_frame_hdr, PC can be used directly // as pointers will be decoded relative to the alreayd-mapped .eh_frame. var mapped_pc: usize = undefined; - if (di.eh_frame_hdr) |header| { const eh_frame_len = if (di.section(.eh_frame)) |eh_frame| eh_frame.len else null; mapped_pc = context.pc; @@ -1657,16 +1656,12 @@ pub const DwarfInfo = struct { mapped_pc = context.pc - module_base_address; const index = std.sort.binarySearch(FrameDescriptionEntry, mapped_pc, di.fde_list.items, {}, struct { pub fn compareFn(_: void, pc: usize, mid_item: FrameDescriptionEntry) std.math.Order { - if (pc < mid_item.pc_begin) { - return .lt; - } else { - const range_end = mid_item.pc_begin + mid_item.pc_range; - if (pc < range_end) { - return .eq; - } + if (pc < mid_item.pc_begin) return .lt; - return .gt; - } + const range_end = mid_item.pc_begin + mid_item.pc_range; + if (pc < range_end) return .eq; + + return .gt; } }.compareFn); @@ -2000,6 +1995,7 @@ pub const ExceptionFrameHeader = struct { } } + if (len == 0) return badDwarf(); try stream.seekTo(left * entry_size); // Read past the pc_begin field of the entry diff --git a/lib/std/dwarf/abi.zig b/lib/std/dwarf/abi.zig index f8a434dd7e..0857732e9b 100644 --- a/lib/std/dwarf/abi.zig +++ b/lib/std/dwarf/abi.zig @@ -45,15 +45,6 @@ pub fn spRegNum(reg_context: RegisterContext) u8 { }; } -fn RegBytesReturnType(comptime ContextPtrType: type) type { - const info = @typeInfo(ContextPtrType); - if (info != .Pointer or info.Pointer.child != std.debug.ThreadContext) { - @compileError("Expected a pointer to std.debug.ThreadContext, got " ++ @typeName(@TypeOf(ContextPtrType))); - } - - return if (info.Pointer.is_const) return []const u8 else []u8; -} - pub const RegisterContext = struct { eh_frame: bool, is_macho: bool, @@ -63,9 +54,47 @@ pub const AbiError = error{ InvalidRegister, UnimplementedArch, UnimplementedOs, + RegisterContextRequired, ThreadContextNotSupported, }; +fn RegValueReturnType(comptime ContextPtrType: type, comptime T: type) type { + const reg_bytes_type = comptime RegBytesReturnType(ContextPtrType); + const info = @typeInfo(reg_bytes_type).Pointer; + return @Type(.{ + .Pointer = .{ + .size = .One, + .is_const = info.is_const, + .is_volatile = info.is_volatile, + .is_allowzero = info.is_allowzero, + .alignment = info.alignment, + .address_space = info.address_space, + .child = T, + .sentinel = null, + }, + }); +} + +pub fn regValueNative( + comptime T: type, + thread_context_ptr: anytype, + reg_number: u8, + reg_context: ?RegisterContext, +) !RegValueReturnType(@TypeOf(thread_context_ptr), T) { + const reg_bytes = try regBytes(thread_context_ptr, reg_number, reg_context); + if (@sizeOf(T) != reg_bytes.len) return error.IncompatibleRegisterSize; + return mem.bytesAsValue(T, reg_bytes[0..@sizeOf(T)]); +} + +fn RegBytesReturnType(comptime ContextPtrType: type) type { + const info = @typeInfo(ContextPtrType); + if (info != .Pointer or info.Pointer.child != std.debug.ThreadContext) { + @compileError("Expected a pointer to std.debug.ThreadContext, got " ++ @typeName(@TypeOf(ContextPtrType))); + } + + return if (info.Pointer.is_const) return []const u8 else []u8; +} + /// Returns a slice containing the backing storage for `reg_number`. /// /// `reg_context` describes in what context the register number is used, as it can have different diff --git a/lib/std/macho.zig b/lib/std/macho.zig index 1b886e2d90..03cb02e3e8 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -2064,3 +2064,315 @@ pub const UNWIND_ARM64_FRAME_D14_D15_PAIR: u32 = 0x00000800; pub const UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK: u32 = 0x00FFF000; pub const UNWIND_ARM64_DWARF_SECTION_OFFSET: u32 = 0x00FFFFFF; + +pub const CompactUnwindEncoding = packed struct(u32) { + value: packed union { + x86_64: packed union { + frame: packed struct(u24) { + reg4: u3, + reg3: u3, + reg2: u3, + reg1: u3, + reg0: u3, + unused: u1 = 0, + frame_offset: u8, + }, + frameless: packed struct(u24) { + stack_reg_permutation: u10, + stack_reg_count: u3, + stack_adjust: u3, + stack_size: u8, + }, + dwarf: u24, + }, + arm64: packed union { + frame: packed struct(u24) { + x_reg_pairs: packed struct { + x19_x20: u1, + x21_x22: u1, + x23_x24: u1, + x25_x26: u1, + x27_x28: u1, + }, + d_reg_pairs: packed struct { + d8_d9: u1, + d10_d11: u1, + d12_d13: u1, + d14_d15: u1, + }, + unused: u15, + }, + frameless: packed struct(u24) { + unused: u12 = 0, + stack_size: u12, + }, + dwarf: u24, + }, + }, + mode: packed union { + x86_64: UNWIND_X86_64_MODE, + arm64: UNWIND_ARM64_MODE, + }, + personality_index: u2, + has_lsda: u1, + start: u1, +}; + +/// Returns the DWARF register number for an x86_64 register number found in compact unwind info +fn dwarfRegNumber(unwind_reg_number: u3) !u8 { + return switch (unwind_reg_number) { + 1 => 3, // RBX + 2 => 12, // R12 + 3 => 13, // R13 + 4 => 14, // R14 + 5 => 15, // R15 + 6 => 6, // RBP + else => error.InvalidUnwindRegisterNumber, + }; +} + +const dwarf = std.dwarf; +const abi = dwarf.abi; + +pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, module_base_address: usize) !usize { + const header = mem.bytesAsValue( + unwind_info_section_header, + unwind_info[0..@sizeOf(unwind_info_section_header)], + ); + const indices = mem.bytesAsSlice( + unwind_info_section_header_index_entry, + unwind_info[header.indexSectionOffset..][0 .. header.indexCount * @sizeOf(unwind_info_section_header_index_entry)], + ); + if (indices.len == 0) return error.MissingUnwindInfo; + + const mapped_pc = context.pc - module_base_address; + const second_level_index = blk: { + var left: usize = 0; + var len: usize = indices.len; + + while (len > 1) { + const mid = left + len / 2; + const offset = indices[mid].functionOffset; + if (mapped_pc < offset) { + len /= 2; + } else { + left = mid; + if (mapped_pc == offset) break; + len -= len / 2; + } + } + + // Last index is a sentinel containing the highest address as its functionOffset + if (len == 0 or indices[left].secondLevelPagesSectionOffset == 0) return error.MissingUnwindInfo; + break :blk &indices[left]; + }; + + const common_encodings = mem.bytesAsSlice( + compact_unwind_encoding_t, + unwind_info[header.commonEncodingsArraySectionOffset..][0 .. header.commonEncodingsArrayCount * @sizeOf(compact_unwind_encoding_t)], + ); + + const start_offset = second_level_index.secondLevelPagesSectionOffset; + const kind = mem.bytesAsValue( + UNWIND_SECOND_LEVEL, + unwind_info[start_offset..][0..@sizeOf(UNWIND_SECOND_LEVEL)], + ); + const raw_encoding = switch (kind.*) { + .REGULAR => blk: { + const page_header = mem.bytesAsValue( + unwind_info_regular_second_level_page_header, + unwind_info[start_offset..][0..@sizeOf(unwind_info_regular_second_level_page_header)], + ); + + const entries = mem.bytesAsSlice( + unwind_info_regular_second_level_entry, + unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(unwind_info_regular_second_level_entry)], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + const offset = entries[mid].functionOffset; + if (mapped_pc < offset) { + len /= 2; + } else { + left = mid; + if (mapped_pc == offset) break; + len -= len / 2; + } + } + + if (len == 0) return error.InvalidUnwindInfo; + break :blk entries[left].encoding; + }, + .COMPRESSED => blk: { + const page_header = mem.bytesAsValue( + unwind_info_compressed_second_level_page_header, + unwind_info[start_offset..][0..@sizeOf(unwind_info_compressed_second_level_page_header)], + ); + + const entries = mem.bytesAsSlice( + UnwindInfoCompressedEntry, + unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(UnwindInfoCompressedEntry)], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + const offset = second_level_index.functionOffset + entries[mid].funcOffset; + if (mapped_pc < offset) { + len /= 2; + } else { + left = mid; + if (mapped_pc == offset) break; + len -= len / 2; + } + } + + if (len == 0) return error.InvalidUnwindInfo; + const entry = entries[left]; + if (entry.encodingIndex < header.commonEncodingsArrayCount) { + if (entry.encodingIndex >= common_encodings.len) return error.InvalidUnwindInfo; + break :blk common_encodings[entry.encodingIndex]; + } else { + const local_index = try std.math.sub( + u8, + entry.encodingIndex, + std.math.cast(u8, header.commonEncodingsArrayCount) orelse return error.InvalidUnwindInfo, + ); + const local_encodings = mem.bytesAsSlice( + compact_unwind_encoding_t, + unwind_info[start_offset + page_header.encodingsPageOffset ..][0 .. page_header.encodingsCount * @sizeOf(compact_unwind_encoding_t)], + ); + if (local_index >= local_encodings.len) return error.InvalidUnwindInfo; + break :blk local_encodings[local_index]; + } + }, + else => return error.InvalidUnwindInfo, + }; + + if (raw_encoding == 0) return error.NoUnwindInfo; + const reg_context = dwarf.abi.RegisterContext{ + .eh_frame = false, + .is_macho = true, + }; + + const encoding: CompactUnwindEncoding = @bitCast(raw_encoding); + const new_ip = switch (builtin.cpu.arch) { + .x86_64 => switch (encoding.mode.x86_64) { + .OLD => return error.UnimplementedUnwindEncoding, + .RBP_FRAME => blk: { + const regs: [5]u3 = .{ + encoding.value.x86_64.frame.reg0, + encoding.value.x86_64.frame.reg1, + encoding.value.x86_64.frame.reg2, + encoding.value.x86_64.frame.reg3, + encoding.value.x86_64.frame.reg4, + }; + + const frame_offset = encoding.value.x86_64.frame.frame_offset * @sizeOf(usize); + var max_reg: usize = 0; + inline for (regs, 0..) |reg, i| { + if (reg > 0) max_reg = i; + } + + const fp = (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).*; + const new_sp = fp + 2 * @sizeOf(usize); + + // Verify the stack range we're about to read register values from is valid + if (!context.isValidMemory(new_sp) or !context.isValidMemory(fp - frame_offset + max_reg * @sizeOf(usize))) return error.InvalidUnwindInfo; + + const ip_ptr = fp + @sizeOf(usize); + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).* = new_fp; + (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp; + (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip; + + for (regs, 0..) |reg, i| { + if (reg == 0) continue; + const addr = fp - frame_offset + i * @sizeOf(usize); + const reg_number = try dwarfRegNumber(reg); + (try abi.regValueNative(usize, context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*; + } + + break :blk new_ip; + }, + .STACK_IMMD => blk: { + const sp = (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).*; + + // Decode Lehmer-coded sequence of registers. + // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h + + // Decode the variable-based permutation number into its digits. Each digit represents + // an index into the list of register numbers that weren't yet used in the sequence at + // the time the digit was added. + const reg_count = encoding.value.x86_64.frameless.stack_reg_count; + const ip_ptr = if (reg_count > 0) reg_blk: { + var digits: [6]u3 = undefined; + var accumulator: usize = encoding.value.x86_64.frameless.stack_reg_permutation; + var base: usize = 2; + for (0..reg_count) |i| { + const div = accumulator / base; + digits[digits.len - 1 - i] = @intCast(accumulator - base * div); + accumulator = div; + base += 1; + } + + const reg_numbers = [_]u3{ 1, 2, 3, 4, 5, 6 }; + var registers: [reg_numbers.len]u3 = undefined; + var used_indices = [_]bool{false} ** reg_numbers.len; + for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { + var unused_count: u8 = 0; + const unused_index = for (used_indices, 0..) |used, index| { + if (!used) { + if (target_unused_index == unused_count) break index; + unused_count += 1; + } + } else unreachable; + + registers[i] = reg_numbers[unused_index]; + used_indices[unused_index] = true; + } + + var reg_addr = sp + @as(usize, (encoding.value.x86_64.frameless.stack_size - reg_count - 1)) * @sizeOf(usize); + if (!context.isValidMemory(reg_addr)) return error.InvalidUnwindInfo; + for (0..reg_count) |i| { + const reg_number = try dwarfRegNumber(registers[i]); + (try abi.regValueNative(usize, context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + + break :reg_blk reg_addr; + } else sp + @as(usize, (encoding.value.x86_64.frameless.stack_size - 1)) * @sizeOf(usize); + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_sp = ip_ptr + @sizeOf(usize); + if (!context.isValidMemory(new_sp)) return error.InvalidUnwindInfo; + + (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp; + (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip; + + break :blk new_ip; + }, + .STACK_IND => { + return error.UnimplementedUnwindEncoding; // TODO + }, + .DWARF => return error.RequiresDWARFUnwind, + }, + .aarch64 => switch (encoding.mode.x86_64) { + .DWARF => return error.RequiresDWARFUnwind, + else => return error.UnimplementedUnwindEncoding, + }, + else => return error.UnimplementedArch, + }; + + context.pc = new_ip; + if (context.pc > 0) context.pc -= 1; + return new_ip; +} diff --git a/test/standalone/dwarf_unwinding/build.zig b/test/standalone/dwarf_unwinding/build.zig index c59effda9f..885207f068 100644 --- a/test/standalone/dwarf_unwinding/build.zig +++ b/test/standalone/dwarf_unwinding/build.zig @@ -16,6 +16,7 @@ pub fn build(b: *std.Build) void { .optimize = optimize, }); + if (target.isDarwin()) exe.unwind_tables = true; exe.omit_frame_pointer = true; const run_cmd = b.addRunArtifact(exe); @@ -43,6 +44,7 @@ pub fn build(b: *std.Build) void { .optimize = optimize, }); + if (target.isDarwin()) exe.unwind_tables = true; exe.omit_frame_pointer = true; exe.linkLibrary(c_shared_lib); diff --git a/test/standalone/dwarf_unwinding/zig_unwind.zig b/test/standalone/dwarf_unwinding/zig_unwind.zig index 3b13de24a1..d82bdaa7db 100644 --- a/test/standalone/dwarf_unwinding/zig_unwind.zig +++ b/test/standalone/dwarf_unwinding/zig_unwind.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const builtin = @import("builtin"); const debug = std.debug; const testing = std.testing; @@ -18,6 +19,24 @@ noinline fn frame3(expected: *[4]usize, unwound: *[4]usize) void { } noinline fn frame2(expected: *[4]usize, unwound: *[4]usize) void { + if (builtin.os.tag == .macos) { + // Excercise different __unwind_info encodings by forcing some registers to be restored + switch (builtin.cpu.arch) { + .x86_64 => { + asm volatile ( + \\movq $3, %%rbx + \\movq $12, %%r12 + \\movq $13, %%r13 + \\movq $14, %%r14 + \\movq $15, %%r15 + \\movq $6, %%rbp + ::: "rbx", "r12", "r13", "r14", "r15", "rbp"); + }, + .aarch64 => {}, + else => {}, + } + } + expected[1] = @returnAddress(); frame3(expected, unwound); }