From e1451f92f8e49f844c528386c4463c9b6fc9a0f3 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 26 Dec 2020 20:55:11 +0100 Subject: [PATCH 01/17] macho: move findFreeSpace into SegmentCommand One exception will be treatment of the __LINKEDIT segment which will be handled separately inside MachO directly since it doesn't include any sections. --- src/link/MachO.zig | 237 ++++++++++++++++-------------------- src/link/MachO/commands.zig | 35 +++++- 2 files changed, 142 insertions(+), 130 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 17816959a3..04702807b7 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -155,8 +155,8 @@ pub const PieFixup = struct { }; /// `alloc_num / alloc_den` is the factor of padding when allocating. -const alloc_num = 4; -const alloc_den = 3; +pub const alloc_num = 4; +pub const alloc_den = 3; /// Default path to dyld /// TODO instead of hardcoding it, we should probably look through some env vars and search paths @@ -1358,7 +1358,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { }; const flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS; const needed_size = self.base.options.program_code_size_hint; - const off = self.findFreeSpace(text_segment, needed_size, @as(u16, 1) << alignment); + const off = text_segment.findFreeSpace(needed_size, @as(u16, 1) << alignment, self.header_pad); log.debug("found __text section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); @@ -1386,7 +1386,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { const flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS; const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const off = self.findFreeSpace(text_segment, needed_size, @alignOf(u64)); + const off = text_segment.findFreeSpace(needed_size, @alignOf(u64), self.header_pad); assert(off + needed_size <= text_segment.inner.fileoff + text_segment.inner.filesize); // TODO Must expand __TEXT segment. log.debug("found __ziggot section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); @@ -1437,11 +1437,10 @@ pub fn populateMissingMetadata(self: *MachO) !void { } if (self.dyld_info_cmd_index == null) { self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len); - const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; // TODO Preallocate rebase, binding, and lazy binding info. const export_size = 2; - const export_off = self.findFreeSpace(&linkedit_segment, export_size, 1); + const export_off = self.findFreeSpaceLinkedit(export_size, 1); log.debug("found export info free space 0x{x} to 0x{x}", .{ export_off, export_off + export_size }); @@ -1466,16 +1465,15 @@ pub fn populateMissingMetadata(self: *MachO) !void { } if (self.symtab_cmd_index == null) { self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); - const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; const symtab_size = self.base.options.symbol_count_hint * @sizeOf(macho.nlist_64); - const symtab_off = self.findFreeSpace(&linkedit_segment, symtab_size, @sizeOf(macho.nlist_64)); + const symtab_off = self.findFreeSpaceLinkedit(symtab_size, @sizeOf(macho.nlist_64)); log.debug("found symbol table free space 0x{x} to 0x{x}", .{ symtab_off, symtab_off + symtab_size }); try self.string_table.append(self.base.allocator, 0); // Need a null at position 0. const strtab_size = self.string_table.items.len; - const strtab_off = self.findFreeSpace(&linkedit_segment, strtab_size, 1); + const strtab_off = self.findFreeSpaceLinkedit(strtab_size, 1); log.debug("found string table free space 0x{x} to 0x{x}", .{ strtab_off, strtab_off + strtab_size }); @@ -1613,7 +1611,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { } if (self.code_signature_cmd_index == null) { self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); - const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; try self.load_commands.append(self.base.allocator, .{ .LinkeditData = .{ .cmd = macho.LC_CODE_SIGNATURE, @@ -1790,48 +1787,41 @@ fn nextSegmentAddressAndOffset(self: *MachO) NextSegmentAddressAndOffset { }; } -fn allocatedSize(self: *MachO, segment: *const SegmentCommand, start: u64) u64 { +fn allocatedSizeLinkedit(self: *MachO, start: u64) u64 { assert(start > 0); var min_pos: u64 = std.math.maxInt(u64); - if (parseAndCmpName(&segment.inner.segname, "__LINKEDIT")) { - assert(segment.sections.items.len == 0); - // __LINKEDIT is a weird segment where sections get their own load commands so we - // special-case it. - if (self.dyld_info_cmd_index) |idx| { - const dyld_info = self.load_commands.items[idx].DyldInfoOnly; - if (dyld_info.rebase_off > start and dyld_info.rebase_off < min_pos) min_pos = dyld_info.rebase_off; - if (dyld_info.bind_off > start and dyld_info.bind_off < min_pos) min_pos = dyld_info.bind_off; - if (dyld_info.weak_bind_off > start and dyld_info.weak_bind_off < min_pos) min_pos = dyld_info.weak_bind_off; - if (dyld_info.lazy_bind_off > start and dyld_info.lazy_bind_off < min_pos) min_pos = dyld_info.lazy_bind_off; - if (dyld_info.export_off > start and dyld_info.export_off < min_pos) min_pos = dyld_info.export_off; - } + // __LINKEDIT is a weird segment where sections get their own load commands so we + // special-case it. + if (self.dyld_info_cmd_index) |idx| { + const dyld_info = self.load_commands.items[idx].DyldInfoOnly; + if (dyld_info.rebase_off > start and dyld_info.rebase_off < min_pos) min_pos = dyld_info.rebase_off; + if (dyld_info.bind_off > start and dyld_info.bind_off < min_pos) min_pos = dyld_info.bind_off; + if (dyld_info.weak_bind_off > start and dyld_info.weak_bind_off < min_pos) min_pos = dyld_info.weak_bind_off; + if (dyld_info.lazy_bind_off > start and dyld_info.lazy_bind_off < min_pos) min_pos = dyld_info.lazy_bind_off; + if (dyld_info.export_off > start and dyld_info.export_off < min_pos) min_pos = dyld_info.export_off; + } - if (self.function_starts_cmd_index) |idx| { - const fstart = self.load_commands.items[idx].LinkeditData; - if (fstart.dataoff > start and fstart.dataoff < min_pos) min_pos = fstart.dataoff; - } + if (self.function_starts_cmd_index) |idx| { + const fstart = self.load_commands.items[idx].LinkeditData; + if (fstart.dataoff > start and fstart.dataoff < min_pos) min_pos = fstart.dataoff; + } - if (self.data_in_code_cmd_index) |idx| { - const dic = self.load_commands.items[idx].LinkeditData; - if (dic.dataoff > start and dic.dataoff < min_pos) min_pos = dic.dataoff; - } + if (self.data_in_code_cmd_index) |idx| { + const dic = self.load_commands.items[idx].LinkeditData; + if (dic.dataoff > start and dic.dataoff < min_pos) min_pos = dic.dataoff; + } - if (self.dysymtab_cmd_index) |idx| { - const dysymtab = self.load_commands.items[idx].Dysymtab; - if (dysymtab.indirectsymoff > start and dysymtab.indirectsymoff < min_pos) min_pos = dysymtab.indirectsymoff; - // TODO Handle more dynamic symbol table sections. - } + if (self.dysymtab_cmd_index) |idx| { + const dysymtab = self.load_commands.items[idx].Dysymtab; + if (dysymtab.indirectsymoff > start and dysymtab.indirectsymoff < min_pos) min_pos = dysymtab.indirectsymoff; + // TODO Handle more dynamic symbol table sections. + } - if (self.symtab_cmd_index) |idx| { - const symtab = self.load_commands.items[idx].Symtab; - if (symtab.symoff > start and symtab.symoff < min_pos) min_pos = symtab.symoff; - if (symtab.stroff > start and symtab.stroff < min_pos) min_pos = symtab.stroff; - } - } else { - for (segment.sections.items) |section| { - if (section.offset > start and section.offset < min_pos) min_pos = section.offset; - } + if (self.symtab_cmd_index) |idx| { + const symtab = self.load_commands.items[idx].Symtab; + if (symtab.symoff > start and symtab.symoff < min_pos) min_pos = symtab.symoff; + if (symtab.stroff > start and symtab.stroff < min_pos) min_pos = symtab.stroff; } return min_pos - start; @@ -1846,101 +1836,90 @@ inline fn checkForCollision(start: u64, end: u64, off: u64, size: u64) ?u64 { return null; } -fn detectAllocCollision(self: *MachO, segment: *const SegmentCommand, start: u64, size: u64) ?u64 { +fn detectAllocCollisionLinkedit(self: *MachO, start: u64, size: u64) ?u64 { const end = start + satMul(size, alloc_num) / alloc_den; - if (parseAndCmpName(&segment.inner.segname, "__LINKEDIT")) { - assert(segment.sections.items.len == 0); - // __LINKEDIT is a weird segment where sections get their own load commands so we - // special-case it. - if (self.dyld_info_cmd_index) |idx| outer: { - if (self.load_commands.items.len == idx) break :outer; - const dyld_info = self.load_commands.items[idx].DyldInfoOnly; - if (checkForCollision(start, end, dyld_info.rebase_off, dyld_info.rebase_size)) |pos| { - return pos; - } - // Binding info - if (checkForCollision(start, end, dyld_info.bind_off, dyld_info.bind_size)) |pos| { - return pos; - } - // Weak binding info - if (checkForCollision(start, end, dyld_info.weak_bind_off, dyld_info.weak_bind_size)) |pos| { - return pos; - } - // Lazy binding info - if (checkForCollision(start, end, dyld_info.lazy_bind_off, dyld_info.lazy_bind_size)) |pos| { - return pos; - } - // Export info - if (checkForCollision(start, end, dyld_info.export_off, dyld_info.export_size)) |pos| { - return pos; - } + // __LINKEDIT is a weird segment where sections get their own load commands so we + // special-case it. + if (self.dyld_info_cmd_index) |idx| outer: { + if (self.load_commands.items.len == idx) break :outer; + const dyld_info = self.load_commands.items[idx].DyldInfoOnly; + if (checkForCollision(start, end, dyld_info.rebase_off, dyld_info.rebase_size)) |pos| { + return pos; } + // Binding info + if (checkForCollision(start, end, dyld_info.bind_off, dyld_info.bind_size)) |pos| { + return pos; + } + // Weak binding info + if (checkForCollision(start, end, dyld_info.weak_bind_off, dyld_info.weak_bind_size)) |pos| { + return pos; + } + // Lazy binding info + if (checkForCollision(start, end, dyld_info.lazy_bind_off, dyld_info.lazy_bind_size)) |pos| { + return pos; + } + // Export info + if (checkForCollision(start, end, dyld_info.export_off, dyld_info.export_size)) |pos| { + return pos; + } + } - if (self.function_starts_cmd_index) |idx| outer: { - if (self.load_commands.items.len == idx) break :outer; - const fstart = self.load_commands.items[idx].LinkeditData; - if (checkForCollision(start, end, fstart.dataoff, fstart.datasize)) |pos| { - return pos; - } + if (self.function_starts_cmd_index) |idx| outer: { + if (self.load_commands.items.len == idx) break :outer; + const fstart = self.load_commands.items[idx].LinkeditData; + if (checkForCollision(start, end, fstart.dataoff, fstart.datasize)) |pos| { + return pos; } + } - if (self.data_in_code_cmd_index) |idx| outer: { - if (self.load_commands.items.len == idx) break :outer; - const dic = self.load_commands.items[idx].LinkeditData; - if (checkForCollision(start, end, dic.dataoff, dic.datasize)) |pos| { - return pos; - } + if (self.data_in_code_cmd_index) |idx| outer: { + if (self.load_commands.items.len == idx) break :outer; + const dic = self.load_commands.items[idx].LinkeditData; + if (checkForCollision(start, end, dic.dataoff, dic.datasize)) |pos| { + return pos; } + } - if (self.dysymtab_cmd_index) |idx| outer: { - if (self.load_commands.items.len == idx) break :outer; - const dysymtab = self.load_commands.items[idx].Dysymtab; - // Indirect symbol table - const nindirectsize = dysymtab.nindirectsyms * @sizeOf(u32); - if (checkForCollision(start, end, dysymtab.indirectsymoff, nindirectsize)) |pos| { - return pos; - } - // TODO Handle more dynamic symbol table sections. + if (self.dysymtab_cmd_index) |idx| outer: { + if (self.load_commands.items.len == idx) break :outer; + const dysymtab = self.load_commands.items[idx].Dysymtab; + // Indirect symbol table + const nindirectsize = dysymtab.nindirectsyms * @sizeOf(u32); + if (checkForCollision(start, end, dysymtab.indirectsymoff, nindirectsize)) |pos| { + return pos; } + // TODO Handle more dynamic symbol table sections. + } - if (self.symtab_cmd_index) |idx| outer: { - if (self.load_commands.items.len == idx) break :outer; - const symtab = self.load_commands.items[idx].Symtab; - // Symbol table - const symsize = symtab.nsyms * @sizeOf(macho.nlist_64); - if (checkForCollision(start, end, symtab.symoff, symsize)) |pos| { - return pos; - } - // String table - if (checkForCollision(start, end, symtab.stroff, symtab.strsize)) |pos| { - return pos; - } + if (self.symtab_cmd_index) |idx| outer: { + if (self.load_commands.items.len == idx) break :outer; + const symtab = self.load_commands.items[idx].Symtab; + // Symbol table + const symsize = symtab.nsyms * @sizeOf(macho.nlist_64); + if (checkForCollision(start, end, symtab.symoff, symsize)) |pos| { + return pos; } - } else { - for (segment.sections.items) |section| { - if (checkForCollision(start, end, section.offset, section.size)) |pos| { - return pos; - } + // String table + if (checkForCollision(start, end, symtab.stroff, symtab.strsize)) |pos| { + return pos; } } return null; } -fn findFreeSpace(self: *MachO, segment: *const SegmentCommand, object_size: u64, min_alignment: u16) u64 { - var start: u64 = if (parseAndCmpName(&segment.inner.segname, "__TEXT")) - self.header_pad - else - segment.inner.fileoff; - while (self.detectAllocCollision(segment, start, object_size)) |item_end| { +fn findFreeSpaceLinkedit(self: *MachO, object_size: u64, min_alignment: u16) u64 { + const linkedit = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + var start: u64 = linkedit.inner.fileoff; + while (self.detectAllocCollisionLinkedit(start, object_size)) |item_end| { start = mem.alignForwardGeneric(u64, item_end, min_alignment); } return start; } /// Saturating multiplication -fn satMul(a: anytype, b: anytype) @TypeOf(a, b) { +pub fn satMul(a: anytype, b: anytype) @TypeOf(a, b) { const T = @TypeOf(a, b); return std.math.mul(T, a, b) catch std.math.maxInt(T); } @@ -1993,9 +1972,9 @@ fn relocateSymbolTable(self: *MachO) !void { if (symtab.nsyms < nsyms) { const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; const needed_size = nsyms * @sizeOf(macho.nlist_64); - if (needed_size > self.allocatedSize(&linkedit_segment, symtab.symoff)) { + if (needed_size > self.allocatedSizeLinkedit(symtab.symoff)) { // Move the entire symbol table to a new location - const new_symoff = self.findFreeSpace(&linkedit_segment, needed_size, @alignOf(macho.nlist_64)); + const new_symoff = self.findFreeSpaceLinkedit(needed_size, @alignOf(macho.nlist_64)); const existing_size = symtab.nsyms * @sizeOf(macho.nlist_64); log.debug("relocating symbol table from 0x{x}-0x{x} to 0x{x}-0x{x}", .{ @@ -2140,12 +2119,12 @@ fn writeExportTrie(self: *MachO) !void { const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - const allocated_size = self.allocatedSize(&linkedit_segment, dyld_info.export_off); + const allocated_size = self.allocatedSizeLinkedit(dyld_info.export_off); const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)); if (needed_size > allocated_size) { dyld_info.export_off = 0; - dyld_info.export_off = @intCast(u32, self.findFreeSpace(&linkedit_segment, needed_size, 1)); + dyld_info.export_off = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1)); } dyld_info.export_size = @intCast(u32, needed_size); log.debug("writing export info from 0x{x} to 0x{x}", .{ dyld_info.export_off, dyld_info.export_off + dyld_info.export_size }); @@ -2170,12 +2149,12 @@ fn writeBindingInfoTable(self: *MachO) !void { const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - const allocated_size = self.allocatedSize(&linkedit_segment, dyld_info.bind_off); + const allocated_size = self.allocatedSizeLinkedit(dyld_info.bind_off); const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)); if (needed_size > allocated_size) { dyld_info.bind_off = 0; - dyld_info.bind_off = @intCast(u32, self.findFreeSpace(&linkedit_segment, needed_size, 1)); + dyld_info.bind_off = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1)); } dyld_info.bind_size = @intCast(u32, needed_size); @@ -2198,12 +2177,12 @@ fn writeLazyBindingInfoTable(self: *MachO) !void { const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - const allocated_size = self.allocatedSize(&linkedit_segment, dyld_info.lazy_bind_off); + const allocated_size = self.allocatedSizeLinkedit(dyld_info.lazy_bind_off); const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)); if (needed_size > allocated_size) { dyld_info.lazy_bind_off = 0; - dyld_info.lazy_bind_off = @intCast(u32, self.findFreeSpace(&linkedit_segment, needed_size, 1)); + dyld_info.lazy_bind_off = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1)); } dyld_info.lazy_bind_size = @intCast(u32, needed_size); @@ -2222,12 +2201,12 @@ fn writeStringTable(self: *MachO) !void { const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - const allocated_size = self.allocatedSize(&linkedit_segment, symtab.stroff); + const allocated_size = self.allocatedSizeLinkedit(symtab.stroff); const needed_size = mem.alignForwardGeneric(u64, self.string_table.items.len, @alignOf(u64)); if (needed_size > allocated_size) { symtab.strsize = 0; - symtab.stroff = @intCast(u32, self.findFreeSpace(&linkedit_segment, needed_size, 1)); + symtab.stroff = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1)); } symtab.strsize = @intCast(u32, needed_size); log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); @@ -2280,7 +2259,7 @@ fn updateLinkeditSegmentSizes(self: *MachO) !void { const filesize = final_offset - linkedit_segment.inner.fileoff; linkedit_segment.inner.filesize = filesize; linkedit_segment.inner.vmsize = mem.alignForwardGeneric(u64, filesize, self.page_size); - try self.base.file.?.pwriteAll(&[_]u8{ 0 }, final_offset); + try self.base.file.?.pwriteAll(&[_]u8{0}, final_offset); self.load_commands_dirty = true; } @@ -2301,7 +2280,7 @@ fn writeLoadCommands(self: *MachO) !void { } const off = @sizeOf(macho.mach_header_64); - log.debug("writing {} load commands from 0x{x} to 0x{x}", .{self.load_commands.items.len, off, off + sizeofcmds}); + log.debug("writing {} load commands from 0x{x} to 0x{x}", .{ self.load_commands.items.len, off, off + sizeofcmds }); try self.base.file.?.pwriteAll(buffer, off); self.load_commands_dirty = false; } diff --git a/src/link/MachO/commands.zig b/src/link/MachO/commands.zig index 2fa1f867f5..27f67c8bcd 100644 --- a/src/link/MachO/commands.zig +++ b/src/link/MachO/commands.zig @@ -7,7 +7,11 @@ const macho = std.macho; const testing = std.testing; const Allocator = std.mem.Allocator; -const makeStaticString = @import("../MachO.zig").makeStaticString; +const MachO = @import("../MachO.zig"); +const makeStaticString = MachO.makeStaticString; +const satMul = MachO.satMul; +const alloc_num = MachO.alloc_num; +const alloc_den = MachO.alloc_den; pub const LoadCommand = union(enum) { Segment: SegmentCommand, @@ -188,6 +192,35 @@ pub const SegmentCommand = struct { self.sections.deinit(alloc); } + pub fn allocatedSize(self: SegmentCommand, start: u64) u64 { + assert(start > 0); + var min_pos: u64 = std.math.maxInt(u64); + for (self.sections.items) |section| { + if (section.offset > start and section.offset < min_pos) min_pos = section.offset; + } + return min_pos - start; + } + + fn detectAllocCollision(self: SegmentCommand, start: u64, size: u64) ?u64 { + const end = start + satMul(size, alloc_num) / alloc_den; + for (self.sections.items) |section| { + const increased_size = satMul(section.size, alloc_num) / alloc_den; + const test_end = section.offset + increased_size; + if (end > section.offset and start < test_end) { + return test_end; + } + } + return null; + } + + pub fn findFreeSpace(self: SegmentCommand, object_size: u64, min_alignment: u16, start: ?u64) u64 { + var st: u64 = if (start) |v| v else self.inner.fileoff; + while (self.detectAllocCollision(st, object_size)) |item_end| { + st = mem.alignForwardGeneric(u64, item_end, min_alignment); + } + return st; + } + fn eql(self: SegmentCommand, other: SegmentCommand) bool { if (!meta.eql(self.inner, other.inner)) return false; const lhs = self.sections.items; From 0ff56e8bb14273fa8abe4503855e9f53d699c8cd Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 26 Dec 2020 21:16:53 +0100 Subject: [PATCH 02/17] macho: add and populate UUID load command --- src/link/MachO.zig | 17 +++++++++++++++++ src/link/MachO/commands.zig | 8 ++++++++ 2 files changed, 25 insertions(+) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 04702807b7..5f35b26f22 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -74,6 +74,8 @@ main_cmd_index: ?u16 = null, version_min_cmd_index: ?u16 = null, /// Source version source_version_cmd_index: ?u16 = null, +/// UUID load command +uuid_cmd_index: ?u16 = null, /// Code signature code_signature_cmd_index: ?u16 = null, @@ -1609,6 +1611,18 @@ pub fn populateMissingMetadata(self: *MachO) !void { self.header_dirty = true; self.load_commands_dirty = true; } + if (self.uuid_cmd_index == null) { + self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); + var uuid_cmd: macho.uuid_command = .{ + .cmd = macho.LC_UUID, + .cmdsize = @sizeOf(macho.uuid_command), + .uuid = undefined, + }; + std.crypto.random.bytes(&uuid_cmd.uuid); + try self.load_commands.append(self.base.allocator, .{ .Uuid = uuid_cmd }); + self.header_dirty = true; + self.load_commands_dirty = true; + } if (self.code_signature_cmd_index == null) { self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); try self.load_commands.append(self.base.allocator, .{ @@ -2347,6 +2361,9 @@ fn parseFromFile(self: *MachO, file: fs.File) !void { macho.LC_SOURCE_VERSION => { self.source_version_cmd_index = i; }, + macho.LC_UUID => { + self.uuid_cmd_index = i; + }, macho.LC_MAIN => { self.main_cmd_index = i; }, diff --git a/src/link/MachO/commands.zig b/src/link/MachO/commands.zig index 27f67c8bcd..d0b0e76ec2 100644 --- a/src/link/MachO/commands.zig +++ b/src/link/MachO/commands.zig @@ -23,6 +23,7 @@ pub const LoadCommand = union(enum) { Main: macho.entry_point_command, VersionMin: macho.version_min_command, SourceVersion: macho.source_version_command, + Uuid: macho.uuid_command, LinkeditData: macho.linkedit_data_command, Unknown: GenericCommandWithData(macho.load_command), @@ -62,6 +63,9 @@ pub const LoadCommand = union(enum) { macho.LC_SOURCE_VERSION => LoadCommand{ .SourceVersion = try stream.reader().readStruct(macho.source_version_command), }, + macho.LC_UUID => LoadCommand{ + .Uuid = try stream.reader().readStruct(macho.uuid_command), + }, macho.LC_FUNCTION_STARTS, macho.LC_DATA_IN_CODE, macho.LC_CODE_SIGNATURE => LoadCommand{ .LinkeditData = try stream.reader().readStruct(macho.linkedit_data_command), }, @@ -79,6 +83,7 @@ pub const LoadCommand = union(enum) { .Main => |x| writeStruct(x, writer), .VersionMin => |x| writeStruct(x, writer), .SourceVersion => |x| writeStruct(x, writer), + .Uuid => |x| writeStruct(x, writer), .LinkeditData => |x| writeStruct(x, writer), .Segment => |x| x.write(writer), .Dylinker => |x| x.write(writer), @@ -95,6 +100,7 @@ pub const LoadCommand = union(enum) { .Main => |x| x.cmd, .VersionMin => |x| x.cmd, .SourceVersion => |x| x.cmd, + .Uuid => |x| x.cmd, .LinkeditData => |x| x.cmd, .Segment => |x| x.inner.cmd, .Dylinker => |x| x.inner.cmd, @@ -112,6 +118,7 @@ pub const LoadCommand = union(enum) { .VersionMin => |x| x.cmdsize, .SourceVersion => |x| x.cmdsize, .LinkeditData => |x| x.cmdsize, + .Uuid => |x| x.cmdsize, .Segment => |x| x.inner.cmdsize, .Dylinker => |x| x.inner.cmdsize, .Dylib => |x| x.inner.cmdsize, @@ -142,6 +149,7 @@ pub const LoadCommand = union(enum) { .Main => |x| meta.eql(x, other.Main), .VersionMin => |x| meta.eql(x, other.VersionMin), .SourceVersion => |x| meta.eql(x, other.SourceVersion), + .Uuid => |x| meta.eql(x, other.Uuid), .LinkeditData => |x| meta.eql(x, other.LinkeditData), .Segment => |x| x.eql(other.Segment), .Dylinker => |x| x.eql(other.Dylinker), From bd99a87dc224b7d84e764fc2a8a8f4e3068078b3 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 27 Dec 2020 09:30:03 +0100 Subject: [PATCH 03/17] macho: create dSym bundle next to final artefact macOS requires the debug symbols to either be part of the intermediate object file `whatever.o` or a companion `whatever.dSym` bundle. The former case seems ill-suited for our needs since it subscribes to the old-fashioned compilation strategy using intermediate compilation units; the latter is what we need however on macOS the debug symbols unlike in Elf are not part of the final artefact; rather they sit next to it in its own Mach-O file. --- src/link/MachO.zig | 22 ++++++++++++++++++ src/link/MachO/DebugSymbols.zig | 40 +++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 src/link/MachO/DebugSymbols.zig diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 5f35b26f22..5d5a5d76b2 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -3,6 +3,7 @@ const MachO = @This(); const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; +const fmt = std.fmt; const fs = std.fs; const log = std.log.scoped(.link); const macho = std.macho; @@ -21,6 +22,7 @@ const File = link.File; const Cache = @import("../Cache.zig"); const target_util = @import("../target.zig"); +const DebugSymbols = @import("MachO/DebugSymbols.zig"); const Trie = @import("MachO/Trie.zig"); const CodeSignature = @import("MachO/CodeSignature.zig"); @@ -31,6 +33,9 @@ pub const base_tag: File.Tag = File.Tag.macho; base: File, +/// Debug symbols bundle (or dSym). +d_sym: ?DebugSymbols = null, + /// Page size is dependent on the target cpu architecture. /// For x86_64 that's 4KB, whereas for aarch64, that's 16KB. page_size: u16, @@ -264,6 +269,20 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio self.base.file = file; + // Create dSym bundle. + const d_sym_path = try fmt.allocPrint(allocator, "{}.dSym/Contents/Resources/DWARF/", .{sub_path}); + defer allocator.free(d_sym_path); + var d_sym_bundle = try options.emit.?.directory.handle.makeOpenPath(d_sym_path, .{}); + defer d_sym_bundle.close(); + const d_sym_file = try d_sym_bundle.createFile(sub_path, .{ + .truncate = false, + .read = true, + }); + self.d_sym = .{ + .base = self, + .file = d_sym_file, + }; + // Index 0 is always a null symbol. try self.local_symbols.append(allocator, .{ .n_strx = 0, @@ -943,6 +962,9 @@ fn darwinArchString(arch: std.Target.Cpu.Arch) []const u8 { } pub fn deinit(self: *MachO) void { + if (self.d_sym) |*ds| { + ds.deinit(self.base.allocator); + } self.binding_info_table.deinit(self.base.allocator); self.lazy_binding_info_table.deinit(self.base.allocator); self.pie_fixups.deinit(self.base.allocator); diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig new file mode 100644 index 0000000000..1e30bdf564 --- /dev/null +++ b/src/link/MachO/DebugSymbols.zig @@ -0,0 +1,40 @@ +const DebugSymbols = @This(); + +const std = @import("std"); +const fs = std.fs; +const macho = std.macho; +const mem = std.mem; +const DW = std.dwarf; +const leb = std.leb; +const Allocator = mem.Allocator; + +const MachO = @import("../MachO.zig"); + +usingnamespace @import("commands.zig"); + +base: *MachO, +file: fs.File, + +/// Mach header +header: ?macho.mach_header_64 = null, + +/// Table of all load commands +load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, +/// __PAGEZERO segment +pagezero_segment_cmd_index: ?u16 = null, +/// __TEXT segment +text_segment_cmd_index: ?u16 = null, +/// __DWARF segment +dwarf_segment_cmd_index: ?u16 = null, +/// __DATA segment +data_segment_cmd_index: ?u16 = null, +/// __LINKEDIT segment +linkedit_segment_cmd_index: ?u16 = null, +/// Symbol table +symtab_cmd_index: ?u16 = null, +/// UUID load command +uuid_cmd_index: ?u16 = null, + +pub fn deinit(self: *DebugSymbols, allocator: *Allocator) void { + self.file.close(); +} From cf9434191004e57e1cd3a75dcd91d5ad7e378c63 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 27 Dec 2020 09:47:56 +0100 Subject: [PATCH 04/17] macho: write Mach-O dSym header --- src/link/MachO.zig | 6 +++++ src/link/MachO/DebugSymbols.zig | 45 +++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 5d5a5d76b2..c8449fbe1a 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -299,6 +299,7 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio } try self.populateMissingMetadata(); + try self.d_sym.?.populateMissingMetadata(allocator); return self; } @@ -352,6 +353,11 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { try self.writeStringTable(); try self.updateLinkeditSegmentSizes(); + if (self.d_sym) |*ds| { + // Flush debug symbols bundle. + try ds.flush(); + } + if (target.cpu.arch == .aarch64) { // Preallocate space for the code signature. // We need to do this at this stage so that we have the load commands with proper values diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 1e30bdf564..b485e391af 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -1,7 +1,9 @@ const DebugSymbols = @This(); const std = @import("std"); +const assert = std.debug.assert; const fs = std.fs; +const log = std.log.scoped(.link); const macho = std.macho; const mem = std.mem; const DW = std.dwarf; @@ -35,6 +37,49 @@ symtab_cmd_index: ?u16 = null, /// UUID load command uuid_cmd_index: ?u16 = null, +header_dirty: bool = false, +load_commands_dirty: bool = false, + +/// You must call this function *after* `MachO.populateMissingMetadata()` +/// has been called to get a viable debug symbols output. +pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void { + if (self.header == null) { + const base_header = self.base.header.?; + var header: macho.mach_header_64 = undefined; + header.magic = macho.MH_MAGIC_64; + header.cputype = base_header.cputype; + header.cpusubtype = base_header.cpusubtype; + header.filetype = macho.MH_DSYM; + // These will get populated at the end of flushing the results to file. + header.ncmds = 0; + header.sizeofcmds = 0; + header.flags = 0; + header.reserved = 0; + self.header = header; + self.header_dirty = true; + } +} + +pub fn flush(self: *DebugSymbols) !void { + try self.writeHeader(); + assert(!self.header_dirty); + assert(!self.load_commands_dirty); +} + pub fn deinit(self: *DebugSymbols, allocator: *Allocator) void { self.file.close(); } + +fn writeHeader(self: *DebugSymbols) !void { + if (!self.header_dirty) return; + + self.header.?.ncmds = @intCast(u32, self.load_commands.items.len); + var sizeofcmds: u32 = 0; + for (self.load_commands.items) |cmd| { + sizeofcmds += cmd.cmdsize(); + } + self.header.?.sizeofcmds = sizeofcmds; + log.debug("writing Mach-O dSym header {}", .{self.header.?}); + try self.file.pwriteAll(mem.asBytes(&self.header.?), 0); + self.header_dirty = false; +} From a7bae1b8579475eaf4a25907405d85b98d29977d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 27 Dec 2020 10:23:44 +0100 Subject: [PATCH 05/17] macho: write matching UUID to dSym bundle --- src/link/MachO.zig | 2 +- src/link/MachO/DebugSymbols.zig | 35 ++++++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index c8449fbe1a..510d7b74fb 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -355,7 +355,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { if (self.d_sym) |*ds| { // Flush debug symbols bundle. - try ds.flush(); + try ds.flush(self.base.allocator); } if (target.cpu.arch == .aarch64) { diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index b485e391af..3882eaca69 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -58,18 +58,51 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void self.header = header; self.header_dirty = true; } + if (self.uuid_cmd_index == null) { + const base_cmd = self.base.load_commands.items[self.base.uuid_cmd_index.?]; + self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(allocator, base_cmd); + self.header_dirty = true; + self.load_commands_dirty = true; + } } -pub fn flush(self: *DebugSymbols) !void { +pub fn flush(self: *DebugSymbols, allocator: *Allocator) !void { + try self.writeLoadCommands(allocator); try self.writeHeader(); assert(!self.header_dirty); assert(!self.load_commands_dirty); } pub fn deinit(self: *DebugSymbols, allocator: *Allocator) void { + for (self.load_commands.items) |*lc| { + lc.deinit(allocator); + } self.file.close(); } +/// Writes all load commands and section headers. +fn writeLoadCommands(self: *DebugSymbols, allocator: *Allocator) !void { + if (!self.load_commands_dirty) return; + + var sizeofcmds: usize = 0; + for (self.load_commands.items) |lc| { + sizeofcmds += lc.cmdsize(); + } + + var buffer = try allocator.alloc(u8, sizeofcmds); + defer allocator.free(buffer); + var writer = std.io.fixedBufferStream(buffer).writer(); + for (self.load_commands.items) |lc| { + try lc.write(writer); + } + + const off = @sizeOf(macho.mach_header_64); + log.debug("writing {} dSym load commands from 0x{x} to 0x{x}", .{ self.load_commands.items.len, off, off + sizeofcmds }); + try self.file.pwriteAll(buffer, off); + self.load_commands_dirty = false; +} + fn writeHeader(self: *DebugSymbols) !void { if (!self.header_dirty) return; From d9ce7a021bfeca7ba4b4e478617b4da590264a99 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 27 Dec 2020 10:59:54 +0100 Subject: [PATCH 06/17] macho: copy snapshots of segment commands --- src/link/MachO.zig | 10 ++++- src/link/MachO/DebugSymbols.zig | 70 ++++++++++++++++++++++++++++++++- 2 files changed, 76 insertions(+), 4 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 510d7b74fb..90c4757979 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1427,7 +1427,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { .addr = text_segment.inner.vmaddr + off, .size = needed_size, .offset = @intCast(u32, off), - .@"align" = @sizeOf(u64), + .@"align" = 3, // 2^@sizeOf(u64) .reloff = 0, .nreloc = 0, .flags = flags, @@ -1749,8 +1749,14 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, self.last_text_block = text_block; text_section.size = needed_size; - self.load_commands_dirty = true; // TODO Make more granular. + + if (self.d_sym) |*ds| { + const debug_text_seg = &ds.load_commands.items[ds.text_segment_cmd_index.?].Segment; + const debug_text_sect = &debug_text_seg.sections.items[ds.text_section_index.?]; + debug_text_sect.size = needed_size; + ds.load_commands_dirty = true; + } } text_block.size = new_block_size; diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 3882eaca69..22d7e04fef 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -26,17 +26,20 @@ load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, pagezero_segment_cmd_index: ?u16 = null, /// __TEXT segment text_segment_cmd_index: ?u16 = null, -/// __DWARF segment -dwarf_segment_cmd_index: ?u16 = null, /// __DATA segment data_segment_cmd_index: ?u16 = null, /// __LINKEDIT segment linkedit_segment_cmd_index: ?u16 = null, +/// __DWARF segment +dwarf_segment_cmd_index: ?u16 = null, /// Symbol table symtab_cmd_index: ?u16 = null, /// UUID load command uuid_cmd_index: ?u16 = null, +/// Index into __TEXT,__text section. +text_section_index: ?u16 = null, + header_dirty: bool = false, load_commands_dirty: bool = false, @@ -58,6 +61,22 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void self.header = header; self.header_dirty = true; } + if (self.pagezero_segment_cmd_index == null) { + self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + const base_cmd = self.base.load_commands.items[self.base.pagezero_segment_cmd_index.?].Segment; + try self.copySegmentCommand(allocator, base_cmd); + } + if (self.text_segment_cmd_index == null) { + self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + const base_cmd = self.base.load_commands.items[self.base.text_segment_cmd_index.?].Segment; + try self.copySegmentCommand(allocator, base_cmd); + } + if (self.data_segment_cmd_index == null) outer: { + if (self.base.data_segment_cmd_index == null) break :outer; // __DATA is optional + self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + const base_cmd = self.base.load_commands.items[self.base.data_segment_cmd_index.?].Segment; + try self.copySegmentCommand(allocator, base_cmd); + } if (self.uuid_cmd_index == null) { const base_cmd = self.base.load_commands.items[self.base.uuid_cmd_index.?]; self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); @@ -81,6 +100,53 @@ pub fn deinit(self: *DebugSymbols, allocator: *Allocator) void { self.file.close(); } +fn copySegmentCommand(self: *DebugSymbols, allocator: *Allocator, base_cmd: SegmentCommand) !void { + var cmd = SegmentCommand.empty(.{ + .cmd = macho.LC_SEGMENT_64, + .cmdsize = base_cmd.inner.cmdsize, + .segname = undefined, + .vmaddr = base_cmd.inner.vmaddr, + .vmsize = base_cmd.inner.vmsize, + .fileoff = 0, + .filesize = 0, + .maxprot = base_cmd.inner.maxprot, + .initprot = base_cmd.inner.initprot, + .nsects = base_cmd.inner.nsects, + .flags = base_cmd.inner.flags, + }); + mem.copy(u8, &cmd.inner.segname, &base_cmd.inner.segname); + + try cmd.sections.ensureCapacity(allocator, cmd.inner.nsects); + for (base_cmd.sections.items) |base_sect, i| { + var sect = macho.section_64{ + .sectname = undefined, + .segname = undefined, + .addr = base_sect.addr, + .size = base_sect.size, + .offset = 0, + .@"align" = base_sect.@"align", + .reloff = 0, + .nreloc = 0, + .flags = base_sect.flags, + .reserved1 = base_sect.reserved1, + .reserved2 = base_sect.reserved2, + .reserved3 = base_sect.reserved3, + }; + mem.copy(u8, §.sectname, &base_sect.sectname); + mem.copy(u8, §.segname, &base_sect.segname); + + if (self.base.text_section_index.? == i) { + self.text_section_index = @intCast(u16, i); + } + + cmd.sections.appendAssumeCapacity(sect); + } + + try self.load_commands.append(allocator, .{ .Segment = cmd }); + self.header_dirty = true; + self.load_commands_dirty = true; +} + /// Writes all load commands and section headers. fn writeLoadCommands(self: *DebugSymbols, allocator: *Allocator) !void { if (!self.load_commands_dirty) return; From 3174508903ed0cfdd06a395dd19b6fc2cbc395ed Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 27 Dec 2020 20:25:16 +0100 Subject: [PATCH 07/17] macho: write symbol and string tables to dSym --- src/link/MachO.zig | 8 +- src/link/MachO/DebugSymbols.zig | 208 +++++++++++++++++++++++++++++--- 2 files changed, 194 insertions(+), 22 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 90c4757979..c2a6e96a39 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -301,6 +301,9 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio try self.populateMissingMetadata(); try self.d_sym.?.populateMissingMetadata(allocator); + try self.writeLocalSymbol(0); + try self.d_sym.?.writeLocalSymbol(0); + return self; } @@ -1123,6 +1126,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { symbol.n_desc = 0; try self.writeLocalSymbol(decl.link.macho.local_sym_index); + try self.d_sym.?.writeLocalSymbol(decl.link.macho.local_sym_index); } else { const decl_name = mem.spanZ(decl.name); const name_str_index = try self.makeString(decl_name); @@ -1140,6 +1144,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { self.offset_table.items[decl.link.macho.offset_table_index] = addr; try self.writeLocalSymbol(decl.link.macho.local_sym_index); + try self.d_sym.?.writeLocalSymbol(decl.link.macho.local_sym_index); try self.writeOffsetTableEntry(decl.link.macho.offset_table_index); } @@ -1517,7 +1522,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .strsize = @intCast(u32, strtab_size), }, }); - try self.writeLocalSymbol(0); self.header_dirty = true; self.load_commands_dirty = true; self.string_table_dirty = true; @@ -1795,6 +1799,7 @@ fn makeString(self: *MachO, bytes: []const u8) !u32 { self.string_table.appendSliceAssumeCapacity(bytes); self.string_table.appendAssumeCapacity(0); self.string_table_dirty = true; + self.d_sym.?.string_table_dirty = true; return @intCast(u32, result); } @@ -2247,7 +2252,6 @@ fn writeStringTable(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const allocated_size = self.allocatedSizeLinkedit(symtab.stroff); const needed_size = mem.alignForwardGeneric(u64, self.string_table.items.len, @alignOf(u64)); diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 22d7e04fef..b1fc3fc4c5 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -10,7 +10,11 @@ const DW = std.dwarf; const leb = std.leb; const Allocator = mem.Allocator; +const trace = @import("../../tracy.zig").trace; const MachO = @import("../MachO.zig"); +const satMul = MachO.satMul; +const alloc_num = MachO.alloc_num; +const alloc_den = MachO.alloc_den; usingnamespace @import("commands.zig"); @@ -40,8 +44,12 @@ uuid_cmd_index: ?u16 = null, /// Index into __TEXT,__text section. text_section_index: ?u16 = null, +linkedit_off: u16 = 0x1000, +linkedit_size: u16 = 0x1000, + header_dirty: bool = false, load_commands_dirty: bool = false, +string_table_dirty: bool = false, /// You must call this function *after* `MachO.populateMissingMetadata()` /// has been called to get a viable debug symbols output. @@ -61,22 +69,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void self.header = header; self.header_dirty = true; } - if (self.pagezero_segment_cmd_index == null) { - self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.pagezero_segment_cmd_index.?].Segment; - try self.copySegmentCommand(allocator, base_cmd); - } - if (self.text_segment_cmd_index == null) { - self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.text_segment_cmd_index.?].Segment; - try self.copySegmentCommand(allocator, base_cmd); - } - if (self.data_segment_cmd_index == null) outer: { - if (self.base.data_segment_cmd_index == null) break :outer; // __DATA is optional - self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.data_segment_cmd_index.?].Segment; - try self.copySegmentCommand(allocator, base_cmd); - } if (self.uuid_cmd_index == null) { const base_cmd = self.base.load_commands.items[self.base.uuid_cmd_index.?]; self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); @@ -84,13 +76,79 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void self.header_dirty = true; self.load_commands_dirty = true; } + if (self.symtab_cmd_index == null) { + self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); + const base_cmd = self.base.load_commands.items[self.base.symtab_cmd_index.?].Symtab; + const symtab_size = base_cmd.nsyms * @sizeOf(macho.nlist_64); + const symtab_off = self.findFreeSpaceLinkedit(symtab_size, @sizeOf(macho.nlist_64)); + + log.debug("found dSym symbol table free space 0x{x} to 0x{x}", .{ symtab_off, symtab_off + symtab_size }); + + const strtab_off = self.findFreeSpaceLinkedit(base_cmd.strsize, 1); + + log.debug("found dSym string table free space 0x{x} to 0x{x}", .{ strtab_off, strtab_off + base_cmd.strsize }); + + try self.load_commands.append(allocator, .{ + .Symtab = .{ + .cmd = macho.LC_SYMTAB, + .cmdsize = @sizeOf(macho.symtab_command), + .symoff = @intCast(u32, symtab_off), + .nsyms = base_cmd.nsyms, + .stroff = @intCast(u32, strtab_off), + .strsize = base_cmd.strsize, + }, + }); + try self.writeLocalSymbol(0); + self.header_dirty = true; + self.load_commands_dirty = true; + self.string_table_dirty = true; + } + if (self.pagezero_segment_cmd_index == null) { + self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + const base_cmd = self.base.load_commands.items[self.base.pagezero_segment_cmd_index.?].Segment; + const cmd = try self.copySegmentCommand(allocator, base_cmd); + try self.load_commands.append(allocator, .{ .Segment = cmd }); + self.header_dirty = true; + self.load_commands_dirty = true; + } + if (self.text_segment_cmd_index == null) { + self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + const base_cmd = self.base.load_commands.items[self.base.text_segment_cmd_index.?].Segment; + const cmd = try self.copySegmentCommand(allocator, base_cmd); + try self.load_commands.append(allocator, .{ .Segment = cmd }); + self.header_dirty = true; + self.load_commands_dirty = true; + } + if (self.data_segment_cmd_index == null) outer: { + if (self.base.data_segment_cmd_index == null) break :outer; // __DATA is optional + self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + const base_cmd = self.base.load_commands.items[self.base.data_segment_cmd_index.?].Segment; + const cmd = try self.copySegmentCommand(allocator, base_cmd); + try self.load_commands.append(allocator, .{ .Segment = cmd }); + self.header_dirty = true; + self.load_commands_dirty = true; + } + if (self.linkedit_segment_cmd_index == null) { + self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + const base_cmd = self.base.load_commands.items[self.base.linkedit_segment_cmd_index.?].Segment; + var cmd = try self.copySegmentCommand(allocator, base_cmd); + cmd.inner.vmsize = self.linkedit_size; + cmd.inner.fileoff = self.linkedit_off; + cmd.inner.filesize = self.linkedit_size; + try self.load_commands.append(allocator, .{ .Segment = cmd }); + self.header_dirty = true; + self.load_commands_dirty = true; + } } pub fn flush(self: *DebugSymbols, allocator: *Allocator) !void { + try self.writeStringTable(); try self.writeLoadCommands(allocator); try self.writeHeader(); + assert(!self.header_dirty); assert(!self.load_commands_dirty); + assert(!self.string_table_dirty); } pub fn deinit(self: *DebugSymbols, allocator: *Allocator) void { @@ -100,7 +158,7 @@ pub fn deinit(self: *DebugSymbols, allocator: *Allocator) void { self.file.close(); } -fn copySegmentCommand(self: *DebugSymbols, allocator: *Allocator, base_cmd: SegmentCommand) !void { +fn copySegmentCommand(self: *DebugSymbols, allocator: *Allocator, base_cmd: SegmentCommand) !SegmentCommand { var cmd = SegmentCommand.empty(.{ .cmd = macho.LC_SEGMENT_64, .cmdsize = base_cmd.inner.cmdsize, @@ -142,9 +200,7 @@ fn copySegmentCommand(self: *DebugSymbols, allocator: *Allocator, base_cmd: Segm cmd.sections.appendAssumeCapacity(sect); } - try self.load_commands.append(allocator, .{ .Segment = cmd }); - self.header_dirty = true; - self.load_commands_dirty = true; + return cmd; } /// Writes all load commands and section headers. @@ -182,3 +238,115 @@ fn writeHeader(self: *DebugSymbols) !void { try self.file.pwriteAll(mem.asBytes(&self.header.?), 0); self.header_dirty = false; } + +fn allocatedSizeLinkedit(self: *DebugSymbols, start: u64) u64 { + assert(start > 0); + var min_pos: u64 = std.math.maxInt(u64); + + if (self.symtab_cmd_index) |idx| { + const symtab = self.load_commands.items[idx].Symtab; + if (symtab.symoff >= start and symtab.symoff < min_pos) min_pos = symtab.symoff; + if (symtab.stroff >= start and symtab.stroff < min_pos) min_pos = symtab.stroff; + } + + return min_pos - start; +} + +fn detectAllocCollisionLinkedit(self: *DebugSymbols, start: u64, size: u64) ?u64 { + const end = start + satMul(size, alloc_num) / alloc_den; + + if (self.symtab_cmd_index) |idx| outer: { + if (self.load_commands.items.len == idx) break :outer; + const symtab = self.load_commands.items[idx].Symtab; + { + // Symbol table + const symsize = symtab.nsyms * @sizeOf(macho.nlist_64); + const increased_size = satMul(symsize, alloc_num) / alloc_den; + const test_end = symtab.symoff + increased_size; + if (end > symtab.symoff and start < test_end) { + return test_end; + } + } + { + // String table + const increased_size = satMul(symtab.strsize, alloc_num) / alloc_den; + const test_end = symtab.stroff + increased_size; + if (end > symtab.stroff and start < test_end) { + return test_end; + } + } + } + + return null; +} + +fn findFreeSpaceLinkedit(self: *DebugSymbols, object_size: u64, min_alignment: u16) u64 { + var start: u64 = self.linkedit_off; + while (self.detectAllocCollisionLinkedit(start, object_size)) |item_end| { + start = mem.alignForwardGeneric(u64, item_end, min_alignment); + } + return start; +} + +fn relocateSymbolTable(self: *DebugSymbols) !void { + const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + const nlocals = self.base.local_symbols.items.len; + const nglobals = self.base.global_symbols.items.len; + const nsyms = nlocals + nglobals; + + if (symtab.nsyms < nsyms) { + const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const needed_size = nsyms * @sizeOf(macho.nlist_64); + if (needed_size > self.allocatedSizeLinkedit(symtab.symoff)) { + // Move the entire symbol table to a new location + const new_symoff = self.findFreeSpaceLinkedit(needed_size, @alignOf(macho.nlist_64)); + const existing_size = symtab.nsyms * @sizeOf(macho.nlist_64); + + assert(new_symoff + existing_size <= self.linkedit_off + self.linkedit_size); + log.debug("relocating dSym symbol table from 0x{x}-0x{x} to 0x{x}-0x{x}", .{ + symtab.symoff, + symtab.symoff + existing_size, + new_symoff, + new_symoff + existing_size, + }); + + const amt = try self.file.copyRangeAll(symtab.symoff, self.file, new_symoff, existing_size); + if (amt != existing_size) return error.InputOutput; + symtab.symoff = @intCast(u32, new_symoff); + } + symtab.nsyms = @intCast(u32, nsyms); + self.load_commands_dirty = true; + } +} + +pub fn writeLocalSymbol(self: *DebugSymbols, index: usize) !void { + const tracy = trace(@src()); + defer tracy.end(); + try self.relocateSymbolTable(); + const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + const off = symtab.symoff + @sizeOf(macho.nlist_64) * index; + log.debug("writing dSym local symbol {} at 0x{x}", .{ index, off }); + try self.file.pwriteAll(mem.asBytes(&self.base.local_symbols.items[index]), off); +} + +pub fn writeStringTable(self: *DebugSymbols) !void { + if (!self.string_table_dirty) return; + + const tracy = trace(@src()); + defer tracy.end(); + + const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + const allocated_size = self.allocatedSizeLinkedit(symtab.stroff); + const needed_size = mem.alignForwardGeneric(u64, self.base.string_table.items.len, @alignOf(u64)); + + if (needed_size > allocated_size) { + symtab.strsize = 0; + symtab.stroff = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1)); + } + symtab.strsize = @intCast(u32, needed_size); + log.debug("writing dSym string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); + + try self.file.pwriteAll(self.base.string_table.items, symtab.stroff); + self.load_commands_dirty = true; + self.string_table_dirty = false; +} From d4725cb40bf378959f254ba447a47f5e2f5726fa Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 27 Dec 2020 21:51:40 +0100 Subject: [PATCH 08/17] macho: prealloc space for debug sections in dSym --- src/link/MachO.zig | 4 + src/link/MachO/DebugSymbols.zig | 191 +++++++++++++++++++++++++++++++- 2 files changed, 192 insertions(+), 3 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index c2a6e96a39..6c94df7286 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -108,6 +108,9 @@ dyld_stub_binder_index: ?u16 = null, /// Table of symbol names aka the string table. string_table: std.ArrayListUnmanaged(u8) = .{}, +/// Table of debug symbol names aka the debug string table. +debug_string_table: std.ArrayListUnmanaged(u8) = .{}, + /// Table of trampolines to the actual symbols in __text section. offset_table: std.ArrayListUnmanaged(u64) = .{}, @@ -980,6 +983,7 @@ pub fn deinit(self: *MachO) void { self.text_block_free_list.deinit(self.base.allocator); self.offset_table.deinit(self.base.allocator); self.offset_table_free_list.deinit(self.base.allocator); + self.debug_string_table.deinit(self.base.allocator); self.string_table.deinit(self.base.allocator); self.undef_symbols.deinit(self.base.allocator); self.global_symbols.deinit(self.base.allocator); diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index b1fc3fc4c5..c08c1b9bec 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -15,9 +15,12 @@ const MachO = @import("../MachO.zig"); const satMul = MachO.satMul; const alloc_num = MachO.alloc_num; const alloc_den = MachO.alloc_den; +const makeStaticString = MachO.makeStaticString; usingnamespace @import("commands.zig"); +const page_size: u16 = 0x1000; + base: *MachO, file: fs.File, @@ -44,12 +47,25 @@ uuid_cmd_index: ?u16 = null, /// Index into __TEXT,__text section. text_section_index: ?u16 = null, -linkedit_off: u16 = 0x1000, -linkedit_size: u16 = 0x1000, +linkedit_off: u16 = page_size, +linkedit_size: u16 = page_size, + +debug_info_section_index: ?u16 = null, +debug_abbrev_section_index: ?u16 = null, +debug_str_section_index: ?u16 = null, +debug_aranges_section_index: ?u16 = null, +debug_line_section_index: ?u16 = null, + +debug_abbrev_table_offset: ?u64 = null, header_dirty: bool = false, load_commands_dirty: bool = false, string_table_dirty: bool = false, +debug_string_table_dirty: bool = false, +debug_abbrev_section_dirty: bool = false, +debug_aranges_section_dirty: bool = false, +debug_info_header_dirty: bool = false, +debug_line_header_dirty: bool = false, /// You must call this function *after* `MachO.populateMissingMetadata()` /// has been called to get a viable debug symbols output. @@ -98,7 +114,6 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void .strsize = base_cmd.strsize, }, }); - try self.writeLocalSymbol(0); self.header_dirty = true; self.load_commands_dirty = true; self.string_table_dirty = true; @@ -139,6 +154,176 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void self.header_dirty = true; self.load_commands_dirty = true; } + if (self.dwarf_segment_cmd_index == null) { + self.dwarf_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + + const linkedit = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const ideal_size: u16 = 200 + 128 + 160 + 250; + const needed_size = mem.alignForwardGeneric(u64, satMul(ideal_size, alloc_num) / alloc_den, page_size); + const off = linkedit.inner.fileoff + linkedit.inner.filesize; + const vmaddr = linkedit.inner.vmaddr + linkedit.inner.vmsize; + + log.debug("found dSym __DWARF segment free space 0x{x} to 0x{x}", .{ off, off + needed_size }); + + try self.load_commands.append(allocator, .{ + .Segment = SegmentCommand.empty(.{ + .cmd = macho.LC_SEGMENT_64, + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = makeStaticString("__DWARF"), + .vmaddr = vmaddr, + .vmsize = needed_size, + .fileoff = off, + .filesize = needed_size, + .maxprot = 0, + .initprot = 0, + .nsects = 0, + .flags = 0, + }), + }); + self.header_dirty = true; + self.load_commands_dirty = true; + } + if (self.debug_str_section_index == null) { + const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; + self.debug_str_section_index = @intCast(u16, dwarf_segment.sections.items.len); + assert(self.base.debug_string_table.items.len == 0); + + const file_size_hint = 200; + const p_align = 1; + const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); + + log.debug("found dSym __debug_strtab free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); + + try dwarf_segment.addSection(allocator, .{ + .sectname = makeStaticString("__debug_str"), + .segname = makeStaticString("__DWARF"), + .addr = dwarf_segment.inner.vmaddr + off, + .size = @intCast(u32, self.base.debug_string_table.items.len), + .offset = @intCast(u32, off), + .@"align" = 1, + .reloff = 0, + .nreloc = 0, + .flags = macho.S_REGULAR, + .reserved1 = 0, + .reserved2 = 0, + .reserved3 = 0, + }); + self.header_dirty = true; + self.load_commands_dirty = true; + self.debug_string_table_dirty = true; + } + if (self.debug_info_section_index == null) { + const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; + self.debug_info_section_index = @intCast(u16, dwarf_segment.sections.items.len); + + const file_size_hint = 200; + const p_align = 1; + const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); + + log.debug("found dSym __debug_info free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); + + try dwarf_segment.addSection(allocator, .{ + .sectname = makeStaticString("__debug_info"), + .segname = makeStaticString("__DWARF"), + .addr = dwarf_segment.inner.vmaddr + off, + .size = file_size_hint, + .offset = @intCast(u32, off), + .@"align" = p_align, + .reloff = 0, + .nreloc = 0, + .flags = macho.S_REGULAR, + .reserved1 = 0, + .reserved2 = 0, + .reserved3 = 0, + }); + self.header_dirty = true; + self.load_commands_dirty = true; + self.debug_info_header_dirty = true; + } + if (self.debug_abbrev_section_index == null) { + const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; + self.debug_abbrev_section_index = @intCast(u16, dwarf_segment.sections.items.len); + + const file_size_hint = 128; + const p_align = 1; + const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); + + log.debug("found dSym __debug_abbrev free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); + + try dwarf_segment.addSection(allocator, .{ + .sectname = makeStaticString("__debug_abbrev"), + .segname = makeStaticString("__DWARF"), + .addr = dwarf_segment.inner.vmaddr + off, + .size = file_size_hint, + .offset = @intCast(u32, off), + .@"align" = p_align, + .reloff = 0, + .nreloc = 0, + .flags = macho.S_REGULAR, + .reserved1 = 0, + .reserved2 = 0, + .reserved3 = 0, + }); + self.header_dirty = true; + self.load_commands_dirty = true; + self.debug_abbrev_section_dirty = true; + } + if (self.debug_aranges_section_index == null) { + const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; + self.debug_aranges_section_index = @intCast(u16, dwarf_segment.sections.items.len); + + const file_size_hint = 160; + const p_align = 16; + const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); + + log.debug("found dSym __debug_aranges free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); + + try dwarf_segment.addSection(allocator, .{ + .sectname = makeStaticString("__debug_aranges"), + .segname = makeStaticString("__DWARF"), + .addr = dwarf_segment.inner.vmaddr + off, + .size = file_size_hint, + .offset = @intCast(u32, off), + .@"align" = p_align, + .reloff = 0, + .nreloc = 0, + .flags = macho.S_REGULAR, + .reserved1 = 0, + .reserved2 = 0, + .reserved3 = 0, + }); + self.header_dirty = true; + self.load_commands_dirty = true; + self.debug_aranges_section_dirty = true; + } + if (self.debug_line_section_index == null) { + const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; + self.debug_line_section_index = @intCast(u16, dwarf_segment.sections.items.len); + + const file_size_hint = 250; + const p_align = 1; + const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); + + log.debug("found dSym __debug_line free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); + + try dwarf_segment.addSection(allocator, .{ + .sectname = makeStaticString("__debug_line"), + .segname = makeStaticString("__DWARF"), + .addr = dwarf_segment.inner.vmaddr + off, + .size = file_size_hint, + .offset = @intCast(u32, off), + .@"align" = p_align, + .reloff = 0, + .nreloc = 0, + .flags = macho.S_REGULAR, + .reserved1 = 0, + .reserved2 = 0, + .reserved3 = 0, + }); + self.header_dirty = true; + self.load_commands_dirty = true; + self.debug_line_header_dirty = true; + } } pub fn flush(self: *DebugSymbols, allocator: *Allocator) !void { From 2875a7335aa5363303019dae8b837036ed547d53 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 29 Dec 2020 22:43:07 +0100 Subject: [PATCH 09/17] macho: add Elf dwarf sections --- src/link/MachO.zig | 310 ++++++++++++- src/link/MachO/DebugSymbols.zig | 744 +++++++++++++++++++++++++++++++- src/link/MachO/commands.zig | 6 +- 3 files changed, 1035 insertions(+), 25 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 6c94df7286..131d06df49 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -11,9 +11,10 @@ const codegen = @import("../codegen.zig"); const aarch64 = @import("../codegen/aarch64.zig"); const math = std.math; const mem = std.mem; +const DW = std.dwarf; +const leb = std.leb; const trace = @import("../tracy.zig").trace; -const Type = @import("../type.zig").Type; const build_options = @import("build_options"); const Module = @import("../Module.zig"); const Compilation = @import("../Compilation.zig"); @@ -108,9 +109,6 @@ dyld_stub_binder_index: ?u16 = null, /// Table of symbol names aka the string table. string_table: std.ArrayListUnmanaged(u8) = .{}, -/// Table of debug symbol names aka the debug string table. -debug_string_table: std.ArrayListUnmanaged(u8) = .{}, - /// Table of trampolines to the actual symbols in __text section. offset_table: std.ArrayListUnmanaged(u64) = .{}, @@ -207,12 +205,25 @@ pub const TextBlock = struct { prev: ?*TextBlock, next: ?*TextBlock, + /// Previous/next linked list pointers. This value is `next ^ prev`. + /// This is the linked list node for this Decl's corresponding .debug_info tag. + dbg_info_prev: ?*TextBlock, + dbg_info_next: ?*TextBlock, + /// Offset into .debug_info pointing to the tag for this Decl. + dbg_info_off: u32, + /// Size of the .debug_info tag for this Decl, not including padding. + dbg_info_len: u32, + pub const empty = TextBlock{ .local_sym_index = 0, .offset_table_index = undefined, .size = 0, .prev = null, .next = null, + .dbg_info_prev = null, + .dbg_info_next = null, + .dbg_info_off = undefined, + .dbg_info_len = undefined, }; /// Returns how much room there is to grow in virtual address space. @@ -248,7 +259,23 @@ pub const Export = struct { }; pub const SrcFn = struct { - pub const empty = SrcFn{}; + /// Offset from the beginning of the Debug Line Program header that contains this function. + off: u32, + /// Size of the line number program component belonging to this function, not + /// including padding. + len: u32, + + /// Points to the previous and next neighbors, based on the offset from .debug_line. + /// This can be used to find, for example, the capacity of this `SrcFn`. + prev: ?*SrcFn, + next: ?*SrcFn, + + pub const empty: SrcFn = .{ + .off = 0, + .len = 0, + .prev = null, + .next = null, + }; }; pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Options) !*MachO { @@ -361,7 +388,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { if (self.d_sym) |*ds| { // Flush debug symbols bundle. - try ds.flush(self.base.allocator); + try ds.flushModule(self.base.allocator, self.base.options); } if (target.cpu.arch == .aarch64) { @@ -983,7 +1010,6 @@ pub fn deinit(self: *MachO) void { self.text_block_free_list.deinit(self.base.allocator); self.offset_table.deinit(self.base.allocator); self.offset_table_free_list.deinit(self.base.allocator); - self.debug_string_table.deinit(self.base.allocator); self.string_table.deinit(self.base.allocator); self.undef_symbols.deinit(self.base.allocator); self.global_symbols.deinit(self.base.allocator); @@ -1091,8 +1117,126 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { var code_buffer = std.ArrayList(u8).init(self.base.allocator); defer code_buffer.deinit(); + var dbg_line_buffer = std.ArrayList(u8).init(self.base.allocator); + defer dbg_line_buffer.deinit(); + + var dbg_info_buffer = std.ArrayList(u8).init(self.base.allocator); + defer dbg_info_buffer.deinit(); + + var dbg_info_type_relocs: File.DbgInfoTypeRelocsTable = .{}; + defer { + var it = dbg_info_type_relocs.iterator(); + while (it.next()) |entry| { + entry.value.relocs.deinit(self.base.allocator); + } + dbg_info_type_relocs.deinit(self.base.allocator); + } + const typed_value = decl.typed_value.most_recent.typed_value; - const res = try codegen.generateSymbol(&self.base, decl.src(), typed_value, &code_buffer, .none); + const is_fn: bool = switch (typed_value.ty.zigTypeTag()) { + .Fn => true, + else => false, + }; + if (is_fn) { + const zir_dumps = if (std.builtin.is_test) &[0][]const u8{} else build_options.zir_dumps; + if (zir_dumps.len != 0) { + for (zir_dumps) |fn_name| { + if (mem.eql(u8, mem.spanZ(decl.name), fn_name)) { + std.debug.print("\n{}\n", .{decl.name}); + typed_value.val.cast(Value.Payload.Function).?.func.dump(module.*); + } + } + } + + // For functions we need to add a prologue to the debug line program. + try dbg_line_buffer.ensureCapacity(26); + + const line_off: u28 = blk: { + if (decl.scope.cast(Module.Scope.Container)) |container_scope| { + const tree = container_scope.file_scope.contents.tree; + const file_ast_decls = tree.root_node.decls(); + // TODO Look into improving the performance here by adding a token-index-to-line + // lookup table. Currently this involves scanning over the source code for newlines. + const fn_proto = file_ast_decls[decl.src_index].castTag(.FnProto).?; + const block = fn_proto.getBodyNode().?.castTag(.Block).?; + const line_delta = std.zig.lineDelta(tree.source, 0, tree.token_locs[block.lbrace].start); + break :blk @intCast(u28, line_delta); + } else if (decl.scope.cast(Module.Scope.ZIRModule)) |zir_module| { + const byte_off = zir_module.contents.module.decls[decl.src_index].inst.src; + const line_delta = std.zig.lineDelta(zir_module.source.bytes, 0, byte_off); + break :blk @intCast(u28, line_delta); + } else { + unreachable; + } + }; + + dbg_line_buffer.appendSliceAssumeCapacity(&[_]u8{ + DW.LNS_extended_op, + @sizeOf(u64) + 1, + DW.LNE_set_address, + }); + // This is the "relocatable" vaddr, corresponding to `code_buffer` index `0`. + assert(DebugSymbols.dbg_line_vaddr_reloc_index == dbg_line_buffer.items.len); + dbg_line_buffer.items.len += @sizeOf(u64); + + dbg_line_buffer.appendAssumeCapacity(DW.LNS_advance_line); + // This is the "relocatable" relative line offset from the previous function's end curly + // to this function's begin curly. + assert(DebugSymbols.getRelocDbgLineOff() == dbg_line_buffer.items.len); + // Here we use a ULEB128-fixed-4 to make sure this field can be overwritten later. + leb.writeUnsignedFixed(4, dbg_line_buffer.addManyAsArrayAssumeCapacity(4), line_off); + + dbg_line_buffer.appendAssumeCapacity(DW.LNS_set_file); + assert(DebugSymbols.getRelocDbgFileIndex() == dbg_line_buffer.items.len); + // Once we support more than one source file, this will have the ability to be more + // than one possible value. + const file_index = 1; + leb.writeUnsignedFixed(4, dbg_line_buffer.addManyAsArrayAssumeCapacity(4), file_index); + + // Emit a line for the begin curly with prologue_end=false. The codegen will + // do the work of setting prologue_end=true and epilogue_begin=true. + dbg_line_buffer.appendAssumeCapacity(DW.LNS_copy); + + // .debug_info subprogram + const decl_name_with_null = decl.name[0 .. mem.lenZ(decl.name) + 1]; + try dbg_info_buffer.ensureCapacity(dbg_info_buffer.items.len + 25 + decl_name_with_null.len); + + const fn_ret_type = typed_value.ty.fnReturnType(); + const fn_ret_has_bits = fn_ret_type.hasCodeGenBits(); + if (fn_ret_has_bits) { + dbg_info_buffer.appendAssumeCapacity(DebugSymbols.abbrev_subprogram); + } else { + dbg_info_buffer.appendAssumeCapacity(DebugSymbols.abbrev_subprogram_retvoid); + } + // These get overwritten after generating the machine code. These values are + // "relocations" and have to be in this fixed place so that functions can be + // moved in virtual address space. + assert(DebugSymbols.dbg_info_low_pc_reloc_index == dbg_info_buffer.items.len); + dbg_info_buffer.items.len += @sizeOf(u64); // DW.AT_low_pc, DW.FORM_addr + assert(DebugSymbols.getRelocDbgInfoSubprogramHighPC() == dbg_info_buffer.items.len); + dbg_info_buffer.items.len += 4; // DW.AT_high_pc, DW.FORM_data4 + if (fn_ret_has_bits) { + const gop = try dbg_info_type_relocs.getOrPut(self.base.allocator, fn_ret_type); + if (!gop.found_existing) { + gop.entry.value = .{ + .off = undefined, + .relocs = .{}, + }; + } + try gop.entry.value.relocs.append(self.base.allocator, @intCast(u32, dbg_info_buffer.items.len)); + dbg_info_buffer.items.len += 4; // DW.AT_type, DW.FORM_ref4 + } + dbg_info_buffer.appendSliceAssumeCapacity(decl_name_with_null); // DW.AT_name, DW.FORM_string + } else { + // TODO implement .debug_info for global variables + } + const res = try codegen.generateSymbol(&self.base, decl.src(), typed_value, &code_buffer, .{ + .dwarf = .{ + .dbg_line = &dbg_line_buffer, + .dbg_info = &dbg_info_buffer, + .dbg_info_type_relocs = &dbg_info_type_relocs, + }, + }); const code = switch (res) { .externally_managed => |x| x, @@ -1178,12 +1322,160 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { const file_offset = text_section.offset + section_offset; try self.base.file.?.pwriteAll(code, file_offset); + const text_block = &decl.link.macho; + // If the Decl is a function, we need to update the __debug_line program. + if (is_fn) { + // Perform the relocations based on vaddr. + { + const ptr = dbg_line_buffer.items[DebugSymbols.dbg_line_vaddr_reloc_index..][0..8]; + mem.writeIntLittle(u64, ptr, symbol.n_value); + } + { + const ptr = dbg_info_buffer.items[DebugSymbols.dbg_info_low_pc_reloc_index..][0..8]; + mem.writeIntLittle(u64, ptr, symbol.n_value); + } + { + const ptr = dbg_info_buffer.items[DebugSymbols.getRelocDbgInfoSubprogramHighPC()..][0..4]; + mem.writeIntLittle(u32, ptr, @intCast(u32, text_block.size)); + } + + try dbg_line_buffer.appendSlice(&[_]u8{ DW.LNS_extended_op, 1, DW.LNE_end_sequence }); + + // Now we have the full contents and may allocate a region to store it. + + // This logic is nearly identical to the logic below in `updateDeclDebugInfo` for + // `TextBlock` and the .debug_info. If you are editing this logic, you + // probably need to edit that logic too. + + const dwarf_segment = &self.d_sym.?.load_commands.items[self.d_sym.?.dwarf_segment_cmd_index.?].Segment; + const debug_line_sect = &dwarf_segment.sections.items[self.d_sym.?.debug_line_section_index.?]; + const src_fn = &decl.fn_link.macho; + src_fn.len = @intCast(u32, dbg_line_buffer.items.len); + if (self.d_sym.?.dbg_line_fn_last) |last| { + if (src_fn.next) |next| { + // Update existing function - non-last item. + if (src_fn.off + src_fn.len + DebugSymbols.min_nop_size > next.off) { + // It grew too big, so we move it to a new location. + if (src_fn.prev) |prev| { + _ = self.d_sym.?.dbg_line_fn_free_list.put(self.base.allocator, prev, {}) catch {}; + prev.next = src_fn.next; + } + next.prev = src_fn.prev; + src_fn.next = null; + // Populate where it used to be with NOPs. + const file_pos = debug_line_sect.offset + src_fn.off; + try self.d_sym.?.pwriteDbgLineNops(0, &[0]u8{}, src_fn.len, file_pos); + // TODO Look at the free list before appending at the end. + src_fn.prev = last; + last.next = src_fn; + self.d_sym.?.dbg_line_fn_last = src_fn; + + src_fn.off = last.off + (last.len * alloc_num / alloc_den); + } + } else if (src_fn.prev == null) { + // Append new function. + // TODO Look at the free list before appending at the end. + src_fn.prev = last; + last.next = src_fn; + self.d_sym.?.dbg_line_fn_last = src_fn; + + src_fn.off = last.off + (last.len * alloc_num / alloc_den); + } + } else { + // This is the first function of the Line Number Program. + self.d_sym.?.dbg_line_fn_first = src_fn; + self.d_sym.?.dbg_line_fn_last = src_fn; + + src_fn.off = self.d_sym.?.dbgLineNeededHeaderBytes(module) * alloc_num / alloc_den; + } + + const last_src_fn = self.d_sym.?.dbg_line_fn_last.?; + const needed_size = last_src_fn.off + last_src_fn.len; + if (needed_size != debug_line_sect.size) { + if (needed_size > dwarf_segment.allocatedSize(debug_line_sect.offset)) { + const new_offset = dwarf_segment.findFreeSpace(needed_size, 1, null); + const existing_size = last_src_fn.off; + + assert(dwarf_segment.inner.fileoff + dwarf_segment.inner.filesize >= new_offset + needed_size); + + log.debug("moving __zdebug_line section: {} bytes from 0x{x} to 0x{x}", .{ + existing_size, + debug_line_sect.offset, + new_offset, + }); + + const amt = try self.d_sym.?.file.copyRangeAll(debug_line_sect.offset, self.d_sym.?.file, new_offset, existing_size); + if (amt != existing_size) return error.InputOutput; + debug_line_sect.offset = @intCast(u32, new_offset); + debug_line_sect.addr = dwarf_segment.inner.vmaddr + new_offset - dwarf_segment.inner.fileoff; + } + debug_line_sect.size = needed_size; + self.d_sym.?.load_commands_dirty = true; // TODO look into making only the one section dirty + self.d_sym.?.debug_line_header_dirty = true; + } + const prev_padding_size: u32 = if (src_fn.prev) |prev| src_fn.off - (prev.off + prev.len) else 0; + const next_padding_size: u32 = if (src_fn.next) |next| next.off - (src_fn.off + src_fn.len) else 0; + + // We only have support for one compilation unit so far, so the offsets are directly + // from the .debug_line section. + const file_pos = debug_line_sect.offset + src_fn.off; + try self.d_sym.?.pwriteDbgLineNops(prev_padding_size, dbg_line_buffer.items, next_padding_size, file_pos); + + // .debug_info - End the TAG_subprogram children. + try dbg_info_buffer.append(0); + } + + // Now we emit the .debug_info types of the Decl. These will count towards the size of + // the buffer, so we have to do it before computing the offset, and we can't perform the actual + // relocations yet. + var it = dbg_info_type_relocs.iterator(); + while (it.next()) |entry| { + entry.value.off = @intCast(u32, dbg_info_buffer.items.len); + try self.d_sym.?.addDbgInfoType(entry.key, &dbg_info_buffer, self.base.options.target); + } + + try self.d_sym.?.updateDeclDebugInfoAllocation(self.base.allocator, text_block, @intCast(u32, dbg_info_buffer.items.len)); + + // Now that we have the offset assigned we can finally perform type relocations. + it = dbg_info_type_relocs.iterator(); + while (it.next()) |entry| { + for (entry.value.relocs.items) |off| { + mem.writeIntLittle( + u32, + dbg_info_buffer.items[off..][0..4], + text_block.dbg_info_off + entry.value.off, + ); + } + } + + try self.d_sym.?.writeDeclDebugInfo(text_block, dbg_info_buffer.items); + // Since we updated the vaddr and the size, each corresponding export symbol also needs to be updated. const decl_exports = module.decl_exports.get(decl) orelse &[0]*Module.Export{}; try self.updateDeclExports(module, decl, decl_exports); } -pub fn updateDeclLineNumber(self: *MachO, module: *Module, decl: *const Module.Decl) !void {} +pub fn updateDeclLineNumber(self: *MachO, module: *Module, decl: *const Module.Decl) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const container_scope = decl.scope.cast(Module.Scope.Container).?; + const tree = container_scope.file_scope.contents.tree; + const file_ast_decls = tree.root_node.decls(); + // TODO Look into improving the performance here by adding a token-index-to-line + // lookup table. Currently this involves scanning over the source code for newlines. + const fn_proto = file_ast_decls[decl.src_index].castTag(.FnProto).?; + const block = fn_proto.getBodyNode().?.castTag(.Block).?; + const line_delta = std.zig.lineDelta(tree.source, 0, tree.token_locs[block.lbrace].start); + const casted_line_off = @intCast(u28, line_delta); + + const dwarf_segment = &self.d_sym.?.load_commands.items[self.d_sym.?.dwarf_segment_cmd_index.?].Segment; + const shdr = &dwarf_segment.sections.items[self.d_sym.?.debug_line_section_index.?]; + const file_pos = shdr.offset + decl.fn_link.macho.off + DebugSymbols.getRelocDbgLineOff(); + var data: [4]u8 = undefined; + leb.writeUnsignedFixed(4, &data, casted_line_off); + try self.d_sym.?.file.pwriteAll(&data, file_pos); +} pub fn updateDeclExports( self: *MachO, diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index c08c1b9bec..349ecd9d30 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -11,7 +11,12 @@ const leb = std.leb; const Allocator = mem.Allocator; const trace = @import("../../tracy.zig").trace; +const Module = @import("../../Module.zig"); +const Type = @import("../../type.zig").Type; +const link = @import("../../link.zig"); const MachO = @import("../MachO.zig"); +const SrcFn = MachO.SrcFn; +const TextBlock = MachO.TextBlock; const satMul = MachO.satMul; const alloc_num = MachO.alloc_num; const alloc_den = MachO.alloc_den; @@ -58,6 +63,21 @@ debug_line_section_index: ?u16 = null, debug_abbrev_table_offset: ?u64 = null, +/// A list of `SrcFn` whose Line Number Programs have surplus capacity. +/// This is the same concept as `text_block_free_list`; see those doc comments. +dbg_line_fn_free_list: std.AutoHashMapUnmanaged(*SrcFn, void) = .{}, +dbg_line_fn_first: ?*SrcFn = null, +dbg_line_fn_last: ?*SrcFn = null, + +/// A list of `TextBlock` whose corresponding .debug_info tags have surplus capacity. +/// This is the same concept as `text_block_free_list`; see those doc comments. +dbg_info_decl_free_list: std.AutoHashMapUnmanaged(*TextBlock, void) = .{}, +dbg_info_decl_first: ?*TextBlock = null, +dbg_info_decl_last: ?*TextBlock = null, + +/// Table of debug symbol names aka the debug string table. +debug_string_table: std.ArrayListUnmanaged(u8) = .{}, + header_dirty: bool = false, load_commands_dirty: bool = false, string_table_dirty: bool = false, @@ -67,6 +87,13 @@ debug_aranges_section_dirty: bool = false, debug_info_header_dirty: bool = false, debug_line_header_dirty: bool = false, +pub const abbrev_compile_unit = 1; +pub const abbrev_subprogram = 2; +pub const abbrev_subprogram_retvoid = 3; +pub const abbrev_base_type = 4; +pub const abbrev_pad1 = 5; +pub const abbrev_parameter = 6; + /// You must call this function *after* `MachO.populateMissingMetadata()` /// has been called to get a viable debug symbols output. pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void { @@ -186,20 +213,14 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void if (self.debug_str_section_index == null) { const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; self.debug_str_section_index = @intCast(u16, dwarf_segment.sections.items.len); - assert(self.base.debug_string_table.items.len == 0); - - const file_size_hint = 200; - const p_align = 1; - const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); - - log.debug("found dSym __debug_strtab free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); + assert(self.debug_string_table.items.len == 0); try dwarf_segment.addSection(allocator, .{ .sectname = makeStaticString("__debug_str"), .segname = makeStaticString("__DWARF"), - .addr = dwarf_segment.inner.vmaddr + off, - .size = @intCast(u32, self.base.debug_string_table.items.len), - .offset = @intCast(u32, off), + .addr = dwarf_segment.inner.vmaddr, + .size = @intCast(u32, self.debug_string_table.items.len), + .offset = @intCast(u32, dwarf_segment.inner.fileoff), .@"align" = 1, .reloff = 0, .nreloc = 0, @@ -225,7 +246,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void try dwarf_segment.addSection(allocator, .{ .sectname = makeStaticString("__debug_info"), .segname = makeStaticString("__DWARF"), - .addr = dwarf_segment.inner.vmaddr + off, + .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, .size = file_size_hint, .offset = @intCast(u32, off), .@"align" = p_align, @@ -253,7 +274,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void try dwarf_segment.addSection(allocator, .{ .sectname = makeStaticString("__debug_abbrev"), .segname = makeStaticString("__DWARF"), - .addr = dwarf_segment.inner.vmaddr + off, + .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, .size = file_size_hint, .offset = @intCast(u32, off), .@"align" = p_align, @@ -281,7 +302,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void try dwarf_segment.addSection(allocator, .{ .sectname = makeStaticString("__debug_aranges"), .segname = makeStaticString("__DWARF"), - .addr = dwarf_segment.inner.vmaddr + off, + .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, .size = file_size_hint, .offset = @intCast(u32, off), .@"align" = p_align, @@ -309,7 +330,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void try dwarf_segment.addSection(allocator, .{ .sectname = makeStaticString("__debug_line"), .segname = makeStaticString("__DWARF"), - .addr = dwarf_segment.inner.vmaddr + off, + .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, .size = file_size_hint, .offset = @intCast(u32, off), .@"align" = p_align, @@ -326,17 +347,346 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void } } -pub fn flush(self: *DebugSymbols, allocator: *Allocator) !void { +pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Options) !void { + // TODO This linker code currently assumes there is only 1 compilation unit and it corresponds to the + // Zig source code. + const module = options.module orelse return error.LinkingWithoutZigSourceUnimplemented; + const init_len_size: usize = 12; + + if (self.debug_abbrev_section_dirty) { + const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; + const debug_abbrev_sect = &dwarf_segment.sections.items[self.debug_abbrev_section_index.?]; + + // These are LEB encoded but since the values are all less than 127 + // we can simply append these bytes. + const abbrev_buf = [_]u8{ + abbrev_compile_unit, DW.TAG_compile_unit, DW.CHILDREN_yes, // header + DW.AT_stmt_list, DW.FORM_sec_offset, DW.AT_low_pc, + DW.FORM_addr, DW.AT_high_pc, DW.FORM_addr, + DW.AT_name, DW.FORM_strp, DW.AT_comp_dir, + DW.FORM_strp, DW.AT_producer, DW.FORM_strp, + DW.AT_language, DW.FORM_data2, 0, + 0, // table sentinel + abbrev_subprogram, + DW.TAG_subprogram, + DW.CHILDREN_yes, // header + DW.AT_low_pc, + DW.FORM_addr, + DW.AT_high_pc, + DW.FORM_data4, + DW.AT_type, + DW.FORM_ref4, + DW.AT_name, + DW.FORM_string, + 0, 0, // table sentinel + abbrev_subprogram_retvoid, + DW.TAG_subprogram, DW.CHILDREN_yes, // header + DW.AT_low_pc, DW.FORM_addr, + DW.AT_high_pc, DW.FORM_data4, + DW.AT_name, DW.FORM_string, + 0, + 0, // table sentinel + abbrev_base_type, + DW.TAG_base_type, + DW.CHILDREN_no, // header + DW.AT_encoding, + DW.FORM_data1, + DW.AT_byte_size, + DW.FORM_data1, + DW.AT_name, + DW.FORM_string, 0, 0, // table sentinel + abbrev_pad1, DW.TAG_unspecified_type, DW.CHILDREN_no, // header + 0, 0, // table sentinel + abbrev_parameter, + DW.TAG_formal_parameter, DW.CHILDREN_no, // header + DW.AT_location, DW.FORM_exprloc, + DW.AT_type, DW.FORM_ref4, + DW.AT_name, DW.FORM_string, + 0, + 0, // table sentinel + 0, + 0, + 0, // section sentinel + }; + + const needed_size = abbrev_buf.len; + const allocated_size = dwarf_segment.allocatedSize(debug_abbrev_sect.offset); + if (needed_size > allocated_size) { + debug_abbrev_sect.size = 0; // free the space + debug_abbrev_sect.offset = @intCast(u32, dwarf_segment.findFreeSpace(needed_size, 1, null)); + } + debug_abbrev_sect.size = needed_size; + log.debug("__debug_abbrev start=0x{x} end=0x{x}", .{ + debug_abbrev_sect.offset, + debug_abbrev_sect.offset + needed_size, + }); + + const abbrev_offset = 0; + self.debug_abbrev_table_offset = abbrev_offset; + try self.file.pwriteAll(&abbrev_buf, debug_abbrev_sect.offset + abbrev_offset); + self.load_commands_dirty = true; + self.debug_abbrev_section_dirty = false; + } + + if (self.debug_info_header_dirty) debug_info: { + // If this value is null it means there is an error in the module; + // leave debug_info_header_dirty=true. + const first_dbg_info_decl = self.dbg_info_decl_first orelse break :debug_info; + const last_dbg_info_decl = self.dbg_info_decl_last.?; + const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; + const debug_info_sect = &dwarf_segment.sections.items[self.debug_info_section_index.?]; + + var di_buf = std.ArrayList(u8).init(allocator); + defer di_buf.deinit(); + + // We have a function to compute the upper bound size, because it's needed + // for determining where to put the offset of the first `LinkBlock`. + try di_buf.ensureCapacity(self.dbgInfoNeededHeaderBytes()); + + // initial length - length of the .debug_info contribution for this compilation unit, + // not including the initial length itself. + // We have to come back and write it later after we know the size. + const after_init_len = di_buf.items.len + init_len_size; + // +1 for the final 0 that ends the compilation unit children. + const dbg_info_end = last_dbg_info_decl.dbg_info_off + last_dbg_info_decl.dbg_info_len + 1; + const init_len = dbg_info_end - after_init_len; + di_buf.appendNTimesAssumeCapacity(0xff, 4); + mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), init_len); + mem.writeIntLittle(u16, di_buf.addManyAsArrayAssumeCapacity(2), 4); // DWARF version + const abbrev_offset = self.debug_abbrev_table_offset.?; + mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), abbrev_offset); + di_buf.appendAssumeCapacity(8); // address size + // Write the form for the compile unit, which must match the abbrev table above. + const name_strp = try self.makeDebugString(allocator, module.root_pkg.root_src_path); + const comp_dir_strp = try self.makeDebugString(allocator, module.root_pkg.root_src_directory.path orelse "."); + const producer_strp = try self.makeDebugString(allocator, link.producer_string); + // Currently only one compilation unit is supported, so the address range is simply + // identical to the main program header virtual address and memory size. + const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const text_section = text_segment.sections.items[self.text_section_index.?]; + const low_pc = text_section.addr; + const high_pc = text_section.addr + text_section.size; + + di_buf.appendAssumeCapacity(abbrev_compile_unit); + mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), 0); // DW.AT_stmt_list, DW.FORM_sec_offset + mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), low_pc); + mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), high_pc); + mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), name_strp); + mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), comp_dir_strp); + mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), producer_strp); + // We are still waiting on dwarf-std.org to assign DW_LANG_Zig a number: + // http://dwarfstd.org/ShowIssue.php?issue=171115.1 + // Until then we say it is C99. + mem.writeIntLittle(u16, di_buf.addManyAsArrayAssumeCapacity(2), DW.LANG_C99); + + if (di_buf.items.len > first_dbg_info_decl.dbg_info_off) { + // Move the first N decls to the end to make more padding for the header. + @panic("TODO: handle __zdebug_info header exceeding its padding"); + } + const jmp_amt = first_dbg_info_decl.dbg_info_off - di_buf.items.len; + try self.pwriteDbgInfoNops(0, di_buf.items, jmp_amt, false, debug_info_sect.offset); + self.debug_info_header_dirty = false; + } + + if (self.debug_aranges_section_dirty) { + const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; + const debug_aranges_sect = &dwarf_segment.sections.items[self.debug_aranges_section_index.?]; + const debug_info_sect = dwarf_segment.sections.items[self.debug_info_section_index.?]; + + var di_buf = std.ArrayList(u8).init(allocator); + defer di_buf.deinit(); + + // Enough for all the data without resizing. When support for more compilation units + // is added, the size of this section will become more variable. + try di_buf.ensureCapacity(100); + + // initial length - length of the .debug_aranges contribution for this compilation unit, + // not including the initial length itself. + // We have to come back and write it later after we know the size. + const init_len_index = di_buf.items.len; + di_buf.items.len += init_len_size; + const after_init_len = di_buf.items.len; + mem.writeIntLittle(u16, di_buf.addManyAsArrayAssumeCapacity(2), 2); // version + // When more than one compilation unit is supported, this will be the offset to it. + // For now it is always at offset 0 in .debug_info. + mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), debug_info_sect.addr); // __debug_info offset + di_buf.appendAssumeCapacity(@sizeOf(u64)); // address_size + di_buf.appendAssumeCapacity(0); // segment_selector_size + + const end_header_offset = di_buf.items.len; + const begin_entries_offset = mem.alignForward(end_header_offset, @sizeOf(u64) * 2); + di_buf.appendNTimesAssumeCapacity(0, begin_entries_offset - end_header_offset); + + // Currently only one compilation unit is supported, so the address range is simply + // identical to the main program header virtual address and memory size. + const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const text_section = text_segment.sections.items[self.text_section_index.?]; + mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), text_section.addr); + mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), text_section.size); + + // Sentinel. + mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), 0); + mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), 0); + + // Go back and populate the initial length. + const init_len = di_buf.items.len - after_init_len; + // initial length - length of the .debug_aranges contribution for this compilation unit, + // not including the initial length itself. + di_buf.items[init_len_index..][0..4].* = [_]u8{ 0xff, 0xff, 0xff, 0xff }; + mem.writeIntLittle(u64, di_buf.items[init_len_index + 4 ..][0..8], init_len); + + const needed_size = di_buf.items.len; + const allocated_size = dwarf_segment.allocatedSize(debug_aranges_sect.offset); + if (needed_size > allocated_size) { + debug_aranges_sect.size = 0; // free the space + const offset = dwarf_segment.findFreeSpace(needed_size, 16, null); + debug_aranges_sect.offset = @intCast(u32, offset); + debug_aranges_sect.addr = dwarf_segment.inner.vmaddr + offset - dwarf_segment.inner.fileoff; + } + debug_aranges_sect.size = needed_size; + log.debug("__debug_aranges start=0x{x} end=0x{x}", .{ + debug_aranges_sect.offset, + debug_aranges_sect.offset + needed_size, + }); + + try self.file.pwriteAll(di_buf.items, debug_aranges_sect.offset); + self.load_commands_dirty = true; + self.debug_aranges_section_dirty = false; + } + if (self.debug_line_header_dirty) debug_line: { + if (self.dbg_line_fn_first == null) { + break :debug_line; // Error in module; leave debug_line_header_dirty=true. + } + const dbg_line_prg_off = self.getDebugLineProgramOff(); + const dbg_line_prg_end = self.getDebugLineProgramEnd(); + assert(dbg_line_prg_end != 0); + + const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; + const debug_line_sect = &dwarf_segment.sections.items[self.debug_line_section_index.?]; + + var di_buf = std.ArrayList(u8).init(allocator); + defer di_buf.deinit(); + + // The size of this header is variable, depending on the number of directories, + // files, and padding. We have a function to compute the upper bound size, however, + // because it's needed for determining where to put the offset of the first `SrcFn`. + try di_buf.ensureCapacity(self.dbgLineNeededHeaderBytes(module)); + + // initial length - length of the .debug_line contribution for this compilation unit, + // not including the initial length itself. + const after_init_len = di_buf.items.len + init_len_size; + const init_len = dbg_line_prg_end - after_init_len; + di_buf.appendNTimesAssumeCapacity(0xff, 4); + mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), init_len); + mem.writeIntLittle(u16, di_buf.addManyAsArrayAssumeCapacity(2), 4); // version + + // Empirically, debug info consumers do not respect this field, or otherwise + // consider it to be an error when it does not point exactly to the end of the header. + // Therefore we rely on the NOP jump at the beginning of the Line Number Program for + // padding rather than this field. + const before_header_len = di_buf.items.len; + di_buf.items.len += @sizeOf(u64); // We will come back and write this. + const after_header_len = di_buf.items.len; + + const opcode_base = DW.LNS_set_isa + 1; + di_buf.appendSliceAssumeCapacity(&[_]u8{ + 1, // minimum_instruction_length + 1, // maximum_operations_per_instruction + 1, // default_is_stmt + 1, // line_base (signed) + 1, // line_range + opcode_base, + + // Standard opcode lengths. The number of items here is based on `opcode_base`. + // The value is the number of LEB128 operands the instruction takes. + 0, // `DW.LNS_copy` + 1, // `DW.LNS_advance_pc` + 1, // `DW.LNS_advance_line` + 1, // `DW.LNS_set_file` + 1, // `DW.LNS_set_column` + 0, // `DW.LNS_negate_stmt` + 0, // `DW.LNS_set_basic_block` + 0, // `DW.LNS_const_add_pc` + 1, // `DW.LNS_fixed_advance_pc` + 0, // `DW.LNS_set_prologue_end` + 0, // `DW.LNS_set_epilogue_begin` + 1, // `DW.LNS_set_isa` + 0, // include_directories (none except the compilation unit cwd) + }); + // file_names[0] + di_buf.appendSliceAssumeCapacity(module.root_pkg.root_src_path); // relative path name + di_buf.appendSliceAssumeCapacity(&[_]u8{ + 0, // null byte for the relative path name + 0, // directory_index + 0, // mtime (TODO supply this) + 0, // file size bytes (TODO supply this) + 0, // file_names sentinel + }); + + const header_len = di_buf.items.len - after_header_len; + mem.writeIntLittle(u64, di_buf.items[before_header_len..][0..8], header_len); + + // We use NOPs because consumers empirically do not respect the header length field. + if (di_buf.items.len > dbg_line_prg_off) { + // Move the first N files to the end to make more padding for the header. + @panic("TODO: handle __debug_line header exceeding its padding"); + } + const jmp_amt = dbg_line_prg_off - di_buf.items.len; + try self.pwriteDbgLineNops(0, di_buf.items, jmp_amt, debug_line_sect.offset); + self.debug_line_header_dirty = false; + } + { + const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; + const debug_strtab_sect = &dwarf_segment.sections.items[self.debug_str_section_index.?]; + if (self.debug_string_table_dirty or self.debug_string_table.items.len != debug_strtab_sect.size) { + const allocated_size = dwarf_segment.allocatedSize(debug_strtab_sect.offset); + const needed_size = self.debug_string_table.items.len; + + if (needed_size > allocated_size) { + debug_strtab_sect.size = 0; // free the space + const new_offset = dwarf_segment.findFreeSpace(needed_size, 1, null); + debug_strtab_sect.addr = dwarf_segment.inner.vmaddr + new_offset - dwarf_segment.inner.fileoff; + debug_strtab_sect.offset = @intCast(u32, new_offset); + } + debug_strtab_sect.size = @intCast(u32, needed_size); + + log.debug("__debug_strtab start=0x{x} end=0x{x}", .{ + debug_strtab_sect.offset, + debug_strtab_sect.offset + needed_size, + }); + + try self.file.pwriteAll(self.debug_string_table.items, debug_strtab_sect.offset); + self.load_commands_dirty = true; + self.debug_string_table_dirty = false; + } + } + try self.writeStringTable(); + + { + const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; + var file_size: u64 = 0; + for (dwarf_segment.sections.items) |sect| { + file_size += sect.size; + } + dwarf_segment.inner.filesize = file_size; + } + try self.writeLoadCommands(allocator); try self.writeHeader(); assert(!self.header_dirty); assert(!self.load_commands_dirty); assert(!self.string_table_dirty); + assert(!self.debug_abbrev_section_dirty); + assert(!self.debug_aranges_section_dirty); + assert(!self.debug_string_table_dirty); } pub fn deinit(self: *DebugSymbols, allocator: *Allocator) void { + self.dbg_info_decl_free_list.deinit(allocator); + self.dbg_line_fn_free_list.deinit(allocator); + self.debug_string_table.deinit(allocator); for (self.load_commands.items) |*lc| { lc.deinit(allocator); } @@ -535,3 +885,367 @@ pub fn writeStringTable(self: *DebugSymbols) !void { self.load_commands_dirty = true; self.string_table_dirty = false; } + +/// Asserts the type has codegen bits. +pub fn addDbgInfoType( + self: *DebugSymbols, + ty: Type, + dbg_info_buffer: *std.ArrayList(u8), + target: std.Target, +) !void { + switch (ty.zigTypeTag()) { + .Void => unreachable, + .NoReturn => unreachable, + .Bool => { + try dbg_info_buffer.appendSlice(&[_]u8{ + abbrev_base_type, + DW.ATE_boolean, // DW.AT_encoding , DW.FORM_data1 + 1, // DW.AT_byte_size, DW.FORM_data1 + 'b', + 'o', + 'o', + 'l', + 0, // DW.AT_name, DW.FORM_string + }); + }, + .Int => { + const info = ty.intInfo(target); + try dbg_info_buffer.ensureCapacity(dbg_info_buffer.items.len + 12); + dbg_info_buffer.appendAssumeCapacity(abbrev_base_type); + // DW.AT_encoding, DW.FORM_data1 + dbg_info_buffer.appendAssumeCapacity(switch (info.signedness) { + .signed => DW.ATE_signed, + .unsigned => DW.ATE_unsigned, + }); + // DW.AT_byte_size, DW.FORM_data1 + dbg_info_buffer.appendAssumeCapacity(@intCast(u8, ty.abiSize(target))); + // DW.AT_name, DW.FORM_string + try dbg_info_buffer.writer().print("{}\x00", .{ty}); + }, + else => { + std.log.scoped(.compiler).err("TODO implement .debug_info for type '{}'", .{ty}); + try dbg_info_buffer.append(abbrev_pad1); + }, + } +} + +pub fn updateDeclDebugInfoAllocation( + self: *DebugSymbols, + allocator: *Allocator, + text_block: *TextBlock, + len: u32, +) !void { + const tracy = trace(@src()); + defer tracy.end(); + + // This logic is nearly identical to the logic above in `updateDecl` for + // `SrcFn` and the line number programs. If you are editing this logic, you + // probably need to edit that logic too. + + const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; + const debug_info_sect = &dwarf_segment.sections.items[self.debug_info_section_index.?]; + text_block.dbg_info_len = len; + if (self.dbg_info_decl_last) |last| { + if (text_block.dbg_info_next) |next| { + // Update existing Decl - non-last item. + if (text_block.dbg_info_off + text_block.dbg_info_len + min_nop_size > next.dbg_info_off) { + // It grew too big, so we move it to a new location. + if (text_block.dbg_info_prev) |prev| { + _ = self.dbg_info_decl_free_list.put(allocator, prev, {}) catch {}; + prev.dbg_info_next = text_block.dbg_info_next; + } + next.dbg_info_prev = text_block.dbg_info_prev; + text_block.dbg_info_next = null; + // Populate where it used to be with NOPs. + const file_pos = debug_info_sect.offset + text_block.dbg_info_off; + try self.pwriteDbgInfoNops(0, &[0]u8{}, text_block.dbg_info_len, false, file_pos); + // TODO Look at the free list before appending at the end. + text_block.dbg_info_prev = last; + last.dbg_info_next = text_block; + self.dbg_info_decl_last = text_block; + + text_block.dbg_info_off = last.dbg_info_off + (last.dbg_info_len * alloc_num / alloc_den); + } + } else if (text_block.dbg_info_prev == null) { + // Append new Decl. + // TODO Look at the free list before appending at the end. + text_block.dbg_info_prev = last; + last.dbg_info_next = text_block; + self.dbg_info_decl_last = text_block; + + text_block.dbg_info_off = last.dbg_info_off + (last.dbg_info_len * alloc_num / alloc_den); + } + } else { + // This is the first Decl of the .debug_info + self.dbg_info_decl_first = text_block; + self.dbg_info_decl_last = text_block; + + text_block.dbg_info_off = self.dbgInfoNeededHeaderBytes() * alloc_num / alloc_den; + } +} + +pub fn writeDeclDebugInfo(self: *DebugSymbols, text_block: *TextBlock, dbg_info_buf: []const u8) !void { + const tracy = trace(@src()); + defer tracy.end(); + + // This logic is nearly identical to the logic above in `updateDecl` for + // `SrcFn` and the line number programs. If you are editing this logic, you + // probably need to edit that logic too. + + const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; + const debug_info_sect = &dwarf_segment.sections.items[self.debug_info_section_index.?]; + + const last_decl = self.dbg_info_decl_last.?; + // +1 for a trailing zero to end the children of the decl tag. + const needed_size = last_decl.dbg_info_off + last_decl.dbg_info_len + 1; + if (needed_size != debug_info_sect.size) { + if (needed_size > dwarf_segment.allocatedSize(debug_info_sect.offset)) { + const new_offset = dwarf_segment.findFreeSpace(needed_size, 1, null); + const existing_size = last_decl.dbg_info_off; + + // TODO + assert(dwarf_segment.inner.fileoff + dwarf_segment.inner.filesize >= new_offset + needed_size); + + log.debug("moving _debug_info section: {} bytes from 0x{x} to 0x{x}", .{ + existing_size, + debug_info_sect.offset, + new_offset, + }); + + const amt = try self.file.copyRangeAll(debug_info_sect.offset, self.file, new_offset, existing_size); + if (amt != existing_size) return error.InputOutput; + debug_info_sect.offset = @intCast(u32, new_offset); + debug_info_sect.addr = dwarf_segment.inner.vmaddr + new_offset - dwarf_segment.inner.fileoff; + } + debug_info_sect.size = needed_size; + self.load_commands_dirty = true; // TODO look into making only the one section dirty + self.debug_info_header_dirty = true; + } + const prev_padding_size: u32 = if (text_block.dbg_info_prev) |prev| + text_block.dbg_info_off - (prev.dbg_info_off + prev.dbg_info_len) + else + 0; + const next_padding_size: u32 = if (text_block.dbg_info_next) |next| + next.dbg_info_off - (text_block.dbg_info_off + text_block.dbg_info_len) + else + 0; + + // To end the children of the decl tag. + const trailing_zero = text_block.dbg_info_next == null; + + // We only have support for one compilation unit so far, so the offsets are directly + // from the .debug_info section. + const file_pos = debug_info_sect.offset + text_block.dbg_info_off; + try self.pwriteDbgInfoNops(prev_padding_size, dbg_info_buf, next_padding_size, trailing_zero, file_pos); +} + +fn getDebugLineProgramOff(self: DebugSymbols) u32 { + return self.dbg_line_fn_first.?.off; +} + +fn getDebugLineProgramEnd(self: DebugSymbols) u32 { + return self.dbg_line_fn_last.?.off + self.dbg_line_fn_last.?.len; +} + +/// TODO Improve this to use a table. +fn makeDebugString(self: *DebugSymbols, allocator: *Allocator, bytes: []const u8) !u32 { + try self.debug_string_table.ensureCapacity(allocator, self.debug_string_table.items.len + bytes.len + 1); + const result = self.debug_string_table.items.len; + self.debug_string_table.appendSliceAssumeCapacity(bytes); + self.debug_string_table.appendAssumeCapacity(0); + return @intCast(u32, result); +} + +/// The reloc offset for the virtual address of a function in its Line Number Program. +/// Size is a virtual address integer. +pub const dbg_line_vaddr_reloc_index = 3; +/// The reloc offset for the virtual address of a function in its .debug_info TAG_subprogram. +/// Size is a virtual address integer. +pub const dbg_info_low_pc_reloc_index = 1; + +/// The reloc offset for the line offset of a function from the previous function's line. +/// It's a fixed-size 4-byte ULEB128. +pub fn getRelocDbgLineOff() usize { + return dbg_line_vaddr_reloc_index + @sizeOf(u64) + 1; +} + +pub fn getRelocDbgFileIndex() usize { + return getRelocDbgLineOff() + 5; +} + +pub fn getRelocDbgInfoSubprogramHighPC() u32 { + return dbg_info_low_pc_reloc_index + @sizeOf(u64); +} + +pub fn dbgLineNeededHeaderBytes(self: DebugSymbols, module: *Module) u32 { + const directory_entry_format_count = 1; + const file_name_entry_format_count = 1; + const directory_count = 1; + const file_name_count = 1; + const root_src_dir_path_len = if (module.root_pkg.root_src_directory.path) |p| p.len else 1; // "." + return @intCast(u32, 53 + directory_entry_format_count * 2 + file_name_entry_format_count * 2 + + directory_count * 8 + file_name_count * 8 + + // These are encoded as DW.FORM_string rather than DW.FORM_strp as we would like + // because of a workaround for readelf and gdb failing to understand DWARFv5 correctly. + root_src_dir_path_len + + module.root_pkg.root_src_path.len); +} + +fn dbgInfoNeededHeaderBytes(self: DebugSymbols) u32 { + return 120; +} + +pub const min_nop_size = 2; + +/// Writes to the file a buffer, prefixed and suffixed by the specified number of +/// bytes of NOPs. Asserts each padding size is at least `min_nop_size` and total padding bytes +/// are less than 126,976 bytes (if this limit is ever reached, this function can be +/// improved to make more than one pwritev call, or the limit can be raised by a fixed +/// amount by increasing the length of `vecs`). +pub fn pwriteDbgLineNops( + self: *DebugSymbols, + prev_padding_size: usize, + buf: []const u8, + next_padding_size: usize, + offset: u64, +) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const page_of_nops = [1]u8{DW.LNS_negate_stmt} ** 4096; + const three_byte_nop = [3]u8{ DW.LNS_advance_pc, 0b1000_0000, 0 }; + var vecs: [32]std.os.iovec_const = undefined; + var vec_index: usize = 0; + { + var padding_left = prev_padding_size; + if (padding_left % 2 != 0) { + vecs[vec_index] = .{ + .iov_base = &three_byte_nop, + .iov_len = three_byte_nop.len, + }; + vec_index += 1; + padding_left -= three_byte_nop.len; + } + while (padding_left > page_of_nops.len) { + vecs[vec_index] = .{ + .iov_base = &page_of_nops, + .iov_len = page_of_nops.len, + }; + vec_index += 1; + padding_left -= page_of_nops.len; + } + if (padding_left > 0) { + vecs[vec_index] = .{ + .iov_base = &page_of_nops, + .iov_len = padding_left, + }; + vec_index += 1; + } + } + + vecs[vec_index] = .{ + .iov_base = buf.ptr, + .iov_len = buf.len, + }; + vec_index += 1; + + { + var padding_left = next_padding_size; + if (padding_left % 2 != 0) { + vecs[vec_index] = .{ + .iov_base = &three_byte_nop, + .iov_len = three_byte_nop.len, + }; + vec_index += 1; + padding_left -= three_byte_nop.len; + } + while (padding_left > page_of_nops.len) { + vecs[vec_index] = .{ + .iov_base = &page_of_nops, + .iov_len = page_of_nops.len, + }; + vec_index += 1; + padding_left -= page_of_nops.len; + } + if (padding_left > 0) { + vecs[vec_index] = .{ + .iov_base = &page_of_nops, + .iov_len = padding_left, + }; + vec_index += 1; + } + } + try self.file.pwritevAll(vecs[0..vec_index], offset - prev_padding_size); +} + +/// Writes to the file a buffer, prefixed and suffixed by the specified number of +/// bytes of padding. +pub fn pwriteDbgInfoNops( + self: *DebugSymbols, + prev_padding_size: usize, + buf: []const u8, + next_padding_size: usize, + trailing_zero: bool, + offset: u64, +) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const page_of_nops = [1]u8{abbrev_pad1} ** 4096; + var vecs: [32]std.os.iovec_const = undefined; + var vec_index: usize = 0; + { + var padding_left = prev_padding_size; + while (padding_left > page_of_nops.len) { + vecs[vec_index] = .{ + .iov_base = &page_of_nops, + .iov_len = page_of_nops.len, + }; + vec_index += 1; + padding_left -= page_of_nops.len; + } + if (padding_left > 0) { + vecs[vec_index] = .{ + .iov_base = &page_of_nops, + .iov_len = padding_left, + }; + vec_index += 1; + } + } + + vecs[vec_index] = .{ + .iov_base = buf.ptr, + .iov_len = buf.len, + }; + vec_index += 1; + + { + var padding_left = next_padding_size; + while (padding_left > page_of_nops.len) { + vecs[vec_index] = .{ + .iov_base = &page_of_nops, + .iov_len = page_of_nops.len, + }; + vec_index += 1; + padding_left -= page_of_nops.len; + } + if (padding_left > 0) { + vecs[vec_index] = .{ + .iov_base = &page_of_nops, + .iov_len = padding_left, + }; + vec_index += 1; + } + } + + if (trailing_zero) { + var zbuf = [1]u8{0}; + vecs[vec_index] = .{ + .iov_base = &zbuf, + .iov_len = zbuf.len, + }; + vec_index += 1; + } + + try self.file.pwritevAll(vecs[0..vec_index], offset - prev_padding_size); +} diff --git a/src/link/MachO/commands.zig b/src/link/MachO/commands.zig index d0b0e76ec2..e94ca1c8e5 100644 --- a/src/link/MachO/commands.zig +++ b/src/link/MachO/commands.zig @@ -5,6 +5,7 @@ const mem = std.mem; const meta = std.meta; const macho = std.macho; const testing = std.testing; +const assert = std.debug.assert; const Allocator = std.mem.Allocator; const MachO = @import("../MachO.zig"); @@ -202,9 +203,12 @@ pub const SegmentCommand = struct { pub fn allocatedSize(self: SegmentCommand, start: u64) u64 { assert(start > 0); + if (start == self.inner.fileoff) + return 0; var min_pos: u64 = std.math.maxInt(u64); for (self.sections.items) |section| { - if (section.offset > start and section.offset < min_pos) min_pos = section.offset; + if (section.offset <= start) continue; + if (section.offset < min_pos) min_pos = section.offset; } return min_pos - start; } From 9318656ce297de09b1247fde300ad2f794d51feb Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 29 Dec 2020 23:39:58 +0100 Subject: [PATCH 10/17] macho: use 32bit DWARF format --- src/link/MachO/DebugSymbols.zig | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 349ecd9d30..faa7a00139 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -351,7 +351,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Opt // TODO This linker code currently assumes there is only 1 compilation unit and it corresponds to the // Zig source code. const module = options.module orelse return error.LinkingWithoutZigSourceUnimplemented; - const init_len_size: usize = 12; + const init_len_size: usize = 4; if (self.debug_abbrev_section_dirty) { const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; @@ -450,11 +450,10 @@ pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Opt // +1 for the final 0 that ends the compilation unit children. const dbg_info_end = last_dbg_info_decl.dbg_info_off + last_dbg_info_decl.dbg_info_len + 1; const init_len = dbg_info_end - after_init_len; - di_buf.appendNTimesAssumeCapacity(0xff, 4); - mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), init_len); + mem.writeIntLittle(u32, di_buf.addManyAsArrayAssumeCapacity(4), @intCast(u32, init_len)); mem.writeIntLittle(u16, di_buf.addManyAsArrayAssumeCapacity(2), 4); // DWARF version const abbrev_offset = self.debug_abbrev_table_offset.?; - mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), abbrev_offset); + mem.writeIntLittle(u32, di_buf.addManyAsArrayAssumeCapacity(4), @intCast(u32, abbrev_offset)); di_buf.appendAssumeCapacity(8); // address size // Write the form for the compile unit, which must match the abbrev table above. const name_strp = try self.makeDebugString(allocator, module.root_pkg.root_src_path); @@ -468,12 +467,12 @@ pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Opt const high_pc = text_section.addr + text_section.size; di_buf.appendAssumeCapacity(abbrev_compile_unit); - mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), 0); // DW.AT_stmt_list, DW.FORM_sec_offset + mem.writeIntLittle(u32, di_buf.addManyAsArrayAssumeCapacity(4), 0); // DW.AT_stmt_list, DW.FORM_sec_offset mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), low_pc); mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), high_pc); - mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), name_strp); - mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), comp_dir_strp); - mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), producer_strp); + mem.writeIntLittle(u32, di_buf.addManyAsArrayAssumeCapacity(4), @intCast(u32, name_strp)); + mem.writeIntLittle(u32, di_buf.addManyAsArrayAssumeCapacity(4), @intCast(u32, comp_dir_strp)); + mem.writeIntLittle(u32, di_buf.addManyAsArrayAssumeCapacity(4), @intCast(u32, producer_strp)); // We are still waiting on dwarf-std.org to assign DW_LANG_Zig a number: // http://dwarfstd.org/ShowIssue.php?issue=171115.1 // Until then we say it is C99. @@ -509,7 +508,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Opt mem.writeIntLittle(u16, di_buf.addManyAsArrayAssumeCapacity(2), 2); // version // When more than one compilation unit is supported, this will be the offset to it. // For now it is always at offset 0 in .debug_info. - mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), debug_info_sect.addr); // __debug_info offset + mem.writeIntLittle(u32, di_buf.addManyAsArrayAssumeCapacity(4), 0); // __debug_info offset di_buf.appendAssumeCapacity(@sizeOf(u64)); // address_size di_buf.appendAssumeCapacity(0); // segment_selector_size @@ -532,8 +531,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Opt const init_len = di_buf.items.len - after_init_len; // initial length - length of the .debug_aranges contribution for this compilation unit, // not including the initial length itself. - di_buf.items[init_len_index..][0..4].* = [_]u8{ 0xff, 0xff, 0xff, 0xff }; - mem.writeIntLittle(u64, di_buf.items[init_len_index + 4 ..][0..8], init_len); + mem.writeIntLittle(u32, di_buf.items[init_len_index..][0..4], @intCast(u32, init_len)); const needed_size = di_buf.items.len; const allocated_size = dwarf_segment.allocatedSize(debug_aranges_sect.offset); @@ -576,8 +574,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Opt // not including the initial length itself. const after_init_len = di_buf.items.len + init_len_size; const init_len = dbg_line_prg_end - after_init_len; - di_buf.appendNTimesAssumeCapacity(0xff, 4); - mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), init_len); + mem.writeIntLittle(u32, di_buf.addManyAsArrayAssumeCapacity(4), @intCast(u32, init_len)); mem.writeIntLittle(u16, di_buf.addManyAsArrayAssumeCapacity(2), 4); // version // Empirically, debug info consumers do not respect this field, or otherwise @@ -585,7 +582,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Opt // Therefore we rely on the NOP jump at the beginning of the Line Number Program for // padding rather than this field. const before_header_len = di_buf.items.len; - di_buf.items.len += @sizeOf(u64); // We will come back and write this. + di_buf.items.len += @sizeOf(u32); // We will come back and write this. const after_header_len = di_buf.items.len; const opcode_base = DW.LNS_set_isa + 1; @@ -624,7 +621,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Opt }); const header_len = di_buf.items.len - after_header_len; - mem.writeIntLittle(u64, di_buf.items[before_header_len..][0..8], header_len); + mem.writeIntLittle(u32, di_buf.items[before_header_len..][0..4], @intCast(u32, header_len)); // We use NOPs because consumers empirically do not respect the header length field. if (di_buf.items.len > dbg_line_prg_off) { From fa28f7006ddba699048c54a363244dcad49af1e2 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 30 Dec 2020 13:37:13 +0100 Subject: [PATCH 11/17] macho: fix bundle name to .dSYM --- src/link/MachO.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 131d06df49..9840dccf4a 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -299,8 +299,8 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio self.base.file = file; - // Create dSym bundle. - const d_sym_path = try fmt.allocPrint(allocator, "{}.dSym/Contents/Resources/DWARF/", .{sub_path}); + // Create dSYM bundle. + const d_sym_path = try fmt.allocPrint(allocator, "{}.dSYM/Contents/Resources/DWARF/", .{sub_path}); defer allocator.free(d_sym_path); var d_sym_bundle = try options.emit.?.directory.handle.makeOpenPath(d_sym_path, .{}); defer d_sym_bundle.close(); From 364691fa1f5a1c7da024cc58b9b3138425a5e475 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 30 Dec 2020 15:55:27 +0100 Subject: [PATCH 12/17] macho: add decl line and file info to subprogram --- src/link/MachO.zig | 4 +- src/link/MachO/DebugSymbols.zig | 67 ++++++++++++++------------------- 2 files changed, 31 insertions(+), 40 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 9840dccf4a..eaca814627 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1199,7 +1199,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { // .debug_info subprogram const decl_name_with_null = decl.name[0 .. mem.lenZ(decl.name) + 1]; - try dbg_info_buffer.ensureCapacity(dbg_info_buffer.items.len + 25 + decl_name_with_null.len); + try dbg_info_buffer.ensureCapacity(dbg_info_buffer.items.len + 27 + decl_name_with_null.len); const fn_ret_type = typed_value.ty.fnReturnType(); const fn_ret_has_bits = fn_ret_type.hasCodeGenBits(); @@ -1227,6 +1227,8 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { dbg_info_buffer.items.len += 4; // DW.AT_type, DW.FORM_ref4 } dbg_info_buffer.appendSliceAssumeCapacity(decl_name_with_null); // DW.AT_name, DW.FORM_string + mem.writeIntLittle(u32, dbg_info_buffer.addManyAsArrayAssumeCapacity(4), line_off + 1); // DW.AT_decl_line, DW.FORM_data4 + dbg_info_buffer.appendAssumeCapacity(file_index); // DW.AT_decl_file, DW.FORM_data1 } else { // TODO implement .debug_info for global variables } diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index faa7a00139..b809d3a2aa 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -361,52 +361,41 @@ pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Opt // we can simply append these bytes. const abbrev_buf = [_]u8{ abbrev_compile_unit, DW.TAG_compile_unit, DW.CHILDREN_yes, // header - DW.AT_stmt_list, DW.FORM_sec_offset, DW.AT_low_pc, - DW.FORM_addr, DW.AT_high_pc, DW.FORM_addr, - DW.AT_name, DW.FORM_strp, DW.AT_comp_dir, - DW.FORM_strp, DW.AT_producer, DW.FORM_strp, - DW.AT_language, DW.FORM_data2, 0, - 0, // table sentinel - abbrev_subprogram, - DW.TAG_subprogram, - DW.CHILDREN_yes, // header - DW.AT_low_pc, - DW.FORM_addr, - DW.AT_high_pc, - DW.FORM_data4, - DW.AT_type, - DW.FORM_ref4, - DW.AT_name, - DW.FORM_string, + DW.AT_stmt_list, DW.FORM_sec_offset, // offset + DW.AT_low_pc, DW.FORM_addr, + DW.AT_high_pc, DW.FORM_addr, + DW.AT_name, DW.FORM_strp, + DW.AT_comp_dir, DW.FORM_strp, + DW.AT_producer, DW.FORM_strp, + DW.AT_language, DW.FORM_data2, + 0, 0, // table sentinel + abbrev_subprogram, DW.TAG_subprogram, DW.CHILDREN_yes, // header + DW.AT_low_pc, DW.FORM_addr, // start VM address + DW.AT_high_pc, DW.FORM_data4, + DW.AT_type, DW.FORM_ref4, + DW.AT_name, DW.FORM_string, + DW.AT_decl_line, DW.FORM_data4, + DW.AT_decl_file, DW.FORM_data1, 0, 0, // table sentinel abbrev_subprogram_retvoid, DW.TAG_subprogram, DW.CHILDREN_yes, // header DW.AT_low_pc, DW.FORM_addr, DW.AT_high_pc, DW.FORM_data4, DW.AT_name, DW.FORM_string, - 0, - 0, // table sentinel - abbrev_base_type, - DW.TAG_base_type, - DW.CHILDREN_no, // header - DW.AT_encoding, - DW.FORM_data1, - DW.AT_byte_size, - DW.FORM_data1, - DW.AT_name, - DW.FORM_string, 0, 0, // table sentinel + DW.AT_decl_line, DW.FORM_data4, + DW.AT_decl_file, DW.FORM_data1, + 0, 0, // table sentinel + abbrev_base_type, DW.TAG_base_type, DW.CHILDREN_no, // header + DW.AT_encoding, DW.FORM_data1, DW.AT_byte_size, + DW.FORM_data1, DW.AT_name, DW.FORM_string, + 0, 0, // table sentinel abbrev_pad1, DW.TAG_unspecified_type, DW.CHILDREN_no, // header - 0, 0, // table sentinel - abbrev_parameter, - DW.TAG_formal_parameter, DW.CHILDREN_no, // header - DW.AT_location, DW.FORM_exprloc, - DW.AT_type, DW.FORM_ref4, - DW.AT_name, DW.FORM_string, - 0, - 0, // table sentinel - 0, - 0, - 0, // section sentinel + 0, 0, // table sentinel + abbrev_parameter, DW.TAG_formal_parameter, DW.CHILDREN_no, // header + DW.AT_location, DW.FORM_exprloc, DW.AT_type, + DW.FORM_ref4, DW.AT_name, DW.FORM_string, + 0, 0, // table sentinel + 0, 0, 0, // section sentinel }; const needed_size = abbrev_buf.len; From 60b3c4ae3ce1359779745392d4fb169fdff158d4 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 30 Dec 2020 21:34:49 +0100 Subject: [PATCH 13/17] macho: refactor and fix stage2 tests --- src/link/MachO.zig | 21 +++++++------ src/link/MachO/DebugSymbols.zig | 56 ++++++++++++++++++--------------- 2 files changed, 42 insertions(+), 35 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index eaca814627..c1ee052d6d 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -329,10 +329,12 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio } try self.populateMissingMetadata(); - try self.d_sym.?.populateMissingMetadata(allocator); - try self.writeLocalSymbol(0); - try self.d_sym.?.writeLocalSymbol(0); + + if (self.d_sym) |*ds| { + try ds.populateMissingMetadata(allocator); + try ds.writeLocalSymbol(0); + } return self; } @@ -1276,7 +1278,8 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { symbol.n_desc = 0; try self.writeLocalSymbol(decl.link.macho.local_sym_index); - try self.d_sym.?.writeLocalSymbol(decl.link.macho.local_sym_index); + if (self.d_sym) |*ds| + try ds.writeLocalSymbol(decl.link.macho.local_sym_index); } else { const decl_name = mem.spanZ(decl.name); const name_str_index = try self.makeString(decl_name); @@ -1294,7 +1297,8 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { self.offset_table.items[decl.link.macho.offset_table_index] = addr; try self.writeLocalSymbol(decl.link.macho.local_sym_index); - try self.d_sym.?.writeLocalSymbol(decl.link.macho.local_sym_index); + if (self.d_sym) |*ds| + try ds.writeLocalSymbol(decl.link.macho.local_sym_index); try self.writeOffsetTableEntry(decl.link.macho.offset_table_index); } @@ -1398,9 +1402,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { const new_offset = dwarf_segment.findFreeSpace(needed_size, 1, null); const existing_size = last_src_fn.off; - assert(dwarf_segment.inner.fileoff + dwarf_segment.inner.filesize >= new_offset + needed_size); - - log.debug("moving __zdebug_line section: {} bytes from 0x{x} to 0x{x}", .{ + log.debug("moving __debug_line section: {} bytes from 0x{x} to 0x{x}", .{ existing_size, debug_line_sect.offset, new_offset, @@ -2097,7 +2099,8 @@ fn makeString(self: *MachO, bytes: []const u8) !u32 { self.string_table.appendSliceAssumeCapacity(bytes); self.string_table.appendAssumeCapacity(0); self.string_table_dirty = true; - self.d_sym.?.string_table_dirty = true; + if (self.d_sym) |*ds| + ds.string_table_dirty = true; return @intCast(u32, result); } diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index b809d3a2aa..3e5053af0b 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -94,6 +94,15 @@ pub const abbrev_base_type = 4; pub const abbrev_pad1 = 5; pub const abbrev_parameter = 6; +/// The reloc offset for the virtual address of a function in its Line Number Program. +/// Size is a virtual address integer. +pub const dbg_line_vaddr_reloc_index = 3; +/// The reloc offset for the virtual address of a function in its .debug_info TAG_subprogram. +/// Size is a virtual address integer. +pub const dbg_info_low_pc_reloc_index = 1; + +pub const min_nop_size = 2; + /// You must call this function *after* `MachO.populateMissingMetadata()` /// has been called to get a viable debug symbols output. pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void { @@ -469,7 +478,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Opt if (di_buf.items.len > first_dbg_info_decl.dbg_info_off) { // Move the first N decls to the end to make more padding for the header. - @panic("TODO: handle __zdebug_info header exceeding its padding"); + @panic("TODO: handle __debug_info header exceeding its padding"); } const jmp_amt = first_dbg_info_decl.dbg_info_off - di_buf.items.len; try self.pwriteDbgInfoNops(0, di_buf.items, jmp_amt, false, debug_info_sect.offset); @@ -648,16 +657,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Opt } try self.writeStringTable(); - - { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; - var file_size: u64 = 0; - for (dwarf_segment.sections.items) |sect| { - file_size += sect.size; - } - dwarf_segment.inner.filesize = file_size; - } - + self.updateDwarfSegment(); try self.writeLoadCommands(allocator); try self.writeHeader(); @@ -676,6 +676,7 @@ pub fn deinit(self: *DebugSymbols, allocator: *Allocator) void { for (self.load_commands.items) |*lc| { lc.deinit(allocator); } + self.load_commands.deinit(allocator); self.file.close(); } @@ -724,6 +725,21 @@ fn copySegmentCommand(self: *DebugSymbols, allocator: *Allocator, base_cmd: Segm return cmd; } +fn updateDwarfSegment(self: *DebugSymbols) void { + const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; + var file_size: u64 = 0; + for (dwarf_segment.sections.items) |sect| { + file_size += sect.size; + } + if (file_size != dwarf_segment.inner.filesize) { + dwarf_segment.inner.filesize = file_size; + if (dwarf_segment.inner.vmsize < dwarf_segment.inner.filesize) { + dwarf_segment.inner.vmsize = mem.alignForwardGeneric(u64, dwarf_segment.inner.filesize, page_size); + } + self.load_commands_dirty = true; + } +} + /// Writes all load commands and section headers. fn writeLoadCommands(self: *DebugSymbols, allocator: *Allocator) !void { if (!self.load_commands_dirty) return; @@ -823,7 +839,7 @@ fn relocateSymbolTable(self: *DebugSymbols) !void { const new_symoff = self.findFreeSpaceLinkedit(needed_size, @alignOf(macho.nlist_64)); const existing_size = symtab.nsyms * @sizeOf(macho.nlist_64); - assert(new_symoff + existing_size <= self.linkedit_off + self.linkedit_size); + assert(new_symoff + existing_size <= self.linkedit_off + self.linkedit_size); // TODO expand LINKEDIT segment. log.debug("relocating dSym symbol table from 0x{x}-0x{x} to 0x{x}-0x{x}", .{ symtab.symoff, symtab.symoff + existing_size, @@ -850,7 +866,7 @@ pub fn writeLocalSymbol(self: *DebugSymbols, index: usize) !void { try self.file.pwriteAll(mem.asBytes(&self.base.local_symbols.items[index]), off); } -pub fn writeStringTable(self: *DebugSymbols) !void { +fn writeStringTable(self: *DebugSymbols) !void { if (!self.string_table_dirty) return; const tracy = trace(@src()); @@ -989,10 +1005,7 @@ pub fn writeDeclDebugInfo(self: *DebugSymbols, text_block: *TextBlock, dbg_info_ const new_offset = dwarf_segment.findFreeSpace(needed_size, 1, null); const existing_size = last_decl.dbg_info_off; - // TODO - assert(dwarf_segment.inner.fileoff + dwarf_segment.inner.filesize >= new_offset + needed_size); - - log.debug("moving _debug_info section: {} bytes from 0x{x} to 0x{x}", .{ + log.debug("moving __debug_info section: {} bytes from 0x{x} to 0x{x}", .{ existing_size, debug_info_sect.offset, new_offset, @@ -1042,13 +1055,6 @@ fn makeDebugString(self: *DebugSymbols, allocator: *Allocator, bytes: []const u8 return @intCast(u32, result); } -/// The reloc offset for the virtual address of a function in its Line Number Program. -/// Size is a virtual address integer. -pub const dbg_line_vaddr_reloc_index = 3; -/// The reloc offset for the virtual address of a function in its .debug_info TAG_subprogram. -/// Size is a virtual address integer. -pub const dbg_info_low_pc_reloc_index = 1; - /// The reloc offset for the line offset of a function from the previous function's line. /// It's a fixed-size 4-byte ULEB128. pub fn getRelocDbgLineOff() usize { @@ -1081,8 +1087,6 @@ fn dbgInfoNeededHeaderBytes(self: DebugSymbols) u32 { return 120; } -pub const min_nop_size = 2; - /// Writes to the file a buffer, prefixed and suffixed by the specified number of /// bytes of NOPs. Asserts each padding size is at least `min_nop_size` and total padding bytes /// are less than 126,976 bytes (if this limit is ever reached, this function can be From d189614647d6c4195081e40f08f92e15db153592 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 30 Dec 2020 21:49:06 +0100 Subject: [PATCH 14/17] macho: move updateDeclLineNumber logic to DebugSymbols --- src/link/MachO.zig | 22 +++------------------- src/link/MachO/DebugSymbols.zig | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index c1ee052d6d..4cb0b016bc 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1460,25 +1460,9 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { } pub fn updateDeclLineNumber(self: *MachO, module: *Module, decl: *const Module.Decl) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const container_scope = decl.scope.cast(Module.Scope.Container).?; - const tree = container_scope.file_scope.contents.tree; - const file_ast_decls = tree.root_node.decls(); - // TODO Look into improving the performance here by adding a token-index-to-line - // lookup table. Currently this involves scanning over the source code for newlines. - const fn_proto = file_ast_decls[decl.src_index].castTag(.FnProto).?; - const block = fn_proto.getBodyNode().?.castTag(.Block).?; - const line_delta = std.zig.lineDelta(tree.source, 0, tree.token_locs[block.lbrace].start); - const casted_line_off = @intCast(u28, line_delta); - - const dwarf_segment = &self.d_sym.?.load_commands.items[self.d_sym.?.dwarf_segment_cmd_index.?].Segment; - const shdr = &dwarf_segment.sections.items[self.d_sym.?.debug_line_section_index.?]; - const file_pos = shdr.offset + decl.fn_link.macho.off + DebugSymbols.getRelocDbgLineOff(); - var data: [4]u8 = undefined; - leb.writeUnsignedFixed(4, &data, casted_line_off); - try self.d_sym.?.file.pwriteAll(&data, file_pos); + if (self.d_sym) |*ds| { + try ds.updateDeclLineNumber(module, decl); + } } pub fn updateDeclExports( diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 3e5053af0b..a6ac2dfde6 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -1239,3 +1239,25 @@ pub fn pwriteDbgInfoNops( try self.file.pwritevAll(vecs[0..vec_index], offset - prev_padding_size); } + +pub fn updateDeclLineNumber(self: *DebugSymbols, module: *Module, decl: *const Module.Decl) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const container_scope = decl.scope.cast(Module.Scope.Container).?; + const tree = container_scope.file_scope.contents.tree; + const file_ast_decls = tree.root_node.decls(); + // TODO Look into improving the performance here by adding a token-index-to-line + // lookup table. Currently this involves scanning over the source code for newlines. + const fn_proto = file_ast_decls[decl.src_index].castTag(.FnProto).?; + const block = fn_proto.getBodyNode().?.castTag(.Block).?; + const line_delta = std.zig.lineDelta(tree.source, 0, tree.token_locs[block.lbrace].start); + const casted_line_off = @intCast(u28, line_delta); + + const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; + const shdr = &dwarf_segment.sections.items[self.debug_line_section_index.?]; + const file_pos = shdr.offset + decl.fn_link.macho.off + getRelocDbgLineOff(); + var data: [4]u8 = undefined; + leb.writeUnsignedFixed(4, &data, casted_line_off); + try self.file.pwriteAll(&data, file_pos); +} From ea4ff34e13d9768eedcc9add608be2366d6ad030 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 30 Dec 2020 22:18:21 +0100 Subject: [PATCH 15/17] macho: put all DWARF-related logic into DebugSymbols --- src/link/MachO.zig | 270 +++--------------------- src/link/MachO/DebugSymbols.zig | 355 ++++++++++++++++++++++++++++---- 2 files changed, 342 insertions(+), 283 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 4cb0b016bc..2be7cb5199 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -11,8 +11,6 @@ const codegen = @import("../codegen.zig"); const aarch64 = @import("../codegen/aarch64.zig"); const math = std.math; const mem = std.mem; -const DW = std.dwarf; -const leb = std.leb; const trace = @import("../tracy.zig").trace; const build_options = @import("build_options"); @@ -1119,128 +1117,30 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { var code_buffer = std.ArrayList(u8).init(self.base.allocator); defer code_buffer.deinit(); - var dbg_line_buffer = std.ArrayList(u8).init(self.base.allocator); - defer dbg_line_buffer.deinit(); - - var dbg_info_buffer = std.ArrayList(u8).init(self.base.allocator); - defer dbg_info_buffer.deinit(); - - var dbg_info_type_relocs: File.DbgInfoTypeRelocsTable = .{}; + var debug_buffers = if (self.d_sym) |*ds| try ds.initDeclDebugBuffers(self.base.allocator, module, decl) else null; defer { - var it = dbg_info_type_relocs.iterator(); - while (it.next()) |entry| { - entry.value.relocs.deinit(self.base.allocator); + if (debug_buffers) |*dbg| { + dbg.dbg_line_buffer.deinit(); + dbg.dbg_info_buffer.deinit(); + var it = dbg.dbg_info_type_relocs.iterator(); + while (it.next()) |entry| { + entry.value.relocs.deinit(self.base.allocator); + } + dbg.dbg_info_type_relocs.deinit(self.base.allocator); } - dbg_info_type_relocs.deinit(self.base.allocator); } const typed_value = decl.typed_value.most_recent.typed_value; - const is_fn: bool = switch (typed_value.ty.zigTypeTag()) { - .Fn => true, - else => false, - }; - if (is_fn) { - const zir_dumps = if (std.builtin.is_test) &[0][]const u8{} else build_options.zir_dumps; - if (zir_dumps.len != 0) { - for (zir_dumps) |fn_name| { - if (mem.eql(u8, mem.spanZ(decl.name), fn_name)) { - std.debug.print("\n{}\n", .{decl.name}); - typed_value.val.cast(Value.Payload.Function).?.func.dump(module.*); - } - } - } - - // For functions we need to add a prologue to the debug line program. - try dbg_line_buffer.ensureCapacity(26); - - const line_off: u28 = blk: { - if (decl.scope.cast(Module.Scope.Container)) |container_scope| { - const tree = container_scope.file_scope.contents.tree; - const file_ast_decls = tree.root_node.decls(); - // TODO Look into improving the performance here by adding a token-index-to-line - // lookup table. Currently this involves scanning over the source code for newlines. - const fn_proto = file_ast_decls[decl.src_index].castTag(.FnProto).?; - const block = fn_proto.getBodyNode().?.castTag(.Block).?; - const line_delta = std.zig.lineDelta(tree.source, 0, tree.token_locs[block.lbrace].start); - break :blk @intCast(u28, line_delta); - } else if (decl.scope.cast(Module.Scope.ZIRModule)) |zir_module| { - const byte_off = zir_module.contents.module.decls[decl.src_index].inst.src; - const line_delta = std.zig.lineDelta(zir_module.source.bytes, 0, byte_off); - break :blk @intCast(u28, line_delta); - } else { - unreachable; - } - }; - - dbg_line_buffer.appendSliceAssumeCapacity(&[_]u8{ - DW.LNS_extended_op, - @sizeOf(u64) + 1, - DW.LNE_set_address, - }); - // This is the "relocatable" vaddr, corresponding to `code_buffer` index `0`. - assert(DebugSymbols.dbg_line_vaddr_reloc_index == dbg_line_buffer.items.len); - dbg_line_buffer.items.len += @sizeOf(u64); - - dbg_line_buffer.appendAssumeCapacity(DW.LNS_advance_line); - // This is the "relocatable" relative line offset from the previous function's end curly - // to this function's begin curly. - assert(DebugSymbols.getRelocDbgLineOff() == dbg_line_buffer.items.len); - // Here we use a ULEB128-fixed-4 to make sure this field can be overwritten later. - leb.writeUnsignedFixed(4, dbg_line_buffer.addManyAsArrayAssumeCapacity(4), line_off); - - dbg_line_buffer.appendAssumeCapacity(DW.LNS_set_file); - assert(DebugSymbols.getRelocDbgFileIndex() == dbg_line_buffer.items.len); - // Once we support more than one source file, this will have the ability to be more - // than one possible value. - const file_index = 1; - leb.writeUnsignedFixed(4, dbg_line_buffer.addManyAsArrayAssumeCapacity(4), file_index); - - // Emit a line for the begin curly with prologue_end=false. The codegen will - // do the work of setting prologue_end=true and epilogue_begin=true. - dbg_line_buffer.appendAssumeCapacity(DW.LNS_copy); - - // .debug_info subprogram - const decl_name_with_null = decl.name[0 .. mem.lenZ(decl.name) + 1]; - try dbg_info_buffer.ensureCapacity(dbg_info_buffer.items.len + 27 + decl_name_with_null.len); - - const fn_ret_type = typed_value.ty.fnReturnType(); - const fn_ret_has_bits = fn_ret_type.hasCodeGenBits(); - if (fn_ret_has_bits) { - dbg_info_buffer.appendAssumeCapacity(DebugSymbols.abbrev_subprogram); - } else { - dbg_info_buffer.appendAssumeCapacity(DebugSymbols.abbrev_subprogram_retvoid); - } - // These get overwritten after generating the machine code. These values are - // "relocations" and have to be in this fixed place so that functions can be - // moved in virtual address space. - assert(DebugSymbols.dbg_info_low_pc_reloc_index == dbg_info_buffer.items.len); - dbg_info_buffer.items.len += @sizeOf(u64); // DW.AT_low_pc, DW.FORM_addr - assert(DebugSymbols.getRelocDbgInfoSubprogramHighPC() == dbg_info_buffer.items.len); - dbg_info_buffer.items.len += 4; // DW.AT_high_pc, DW.FORM_data4 - if (fn_ret_has_bits) { - const gop = try dbg_info_type_relocs.getOrPut(self.base.allocator, fn_ret_type); - if (!gop.found_existing) { - gop.entry.value = .{ - .off = undefined, - .relocs = .{}, - }; - } - try gop.entry.value.relocs.append(self.base.allocator, @intCast(u32, dbg_info_buffer.items.len)); - dbg_info_buffer.items.len += 4; // DW.AT_type, DW.FORM_ref4 - } - dbg_info_buffer.appendSliceAssumeCapacity(decl_name_with_null); // DW.AT_name, DW.FORM_string - mem.writeIntLittle(u32, dbg_info_buffer.addManyAsArrayAssumeCapacity(4), line_off + 1); // DW.AT_decl_line, DW.FORM_data4 - dbg_info_buffer.appendAssumeCapacity(file_index); // DW.AT_decl_file, DW.FORM_data1 - } else { - // TODO implement .debug_info for global variables - } - const res = try codegen.generateSymbol(&self.base, decl.src(), typed_value, &code_buffer, .{ - .dwarf = .{ - .dbg_line = &dbg_line_buffer, - .dbg_info = &dbg_info_buffer, - .dbg_info_type_relocs = &dbg_info_type_relocs, - }, - }); + const res = if (debug_buffers) |*dbg| + try codegen.generateSymbol(&self.base, decl.src(), typed_value, &code_buffer, .{ + .dwarf = .{ + .dbg_line = &dbg.dbg_line_buffer, + .dbg_info = &dbg.dbg_info_buffer, + .dbg_info_type_relocs = &dbg.dbg_info_type_relocs, + }, + }) + else + try codegen.generateSymbol(&self.base, decl.src(), typed_value, &code_buffer, .none); const code = switch (res) { .externally_managed => |x| x, @@ -1328,132 +1228,16 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { const file_offset = text_section.offset + section_offset; try self.base.file.?.pwriteAll(code, file_offset); - const text_block = &decl.link.macho; - // If the Decl is a function, we need to update the __debug_line program. - if (is_fn) { - // Perform the relocations based on vaddr. - { - const ptr = dbg_line_buffer.items[DebugSymbols.dbg_line_vaddr_reloc_index..][0..8]; - mem.writeIntLittle(u64, ptr, symbol.n_value); - } - { - const ptr = dbg_info_buffer.items[DebugSymbols.dbg_info_low_pc_reloc_index..][0..8]; - mem.writeIntLittle(u64, ptr, symbol.n_value); - } - { - const ptr = dbg_info_buffer.items[DebugSymbols.getRelocDbgInfoSubprogramHighPC()..][0..4]; - mem.writeIntLittle(u32, ptr, @intCast(u32, text_block.size)); - } - - try dbg_line_buffer.appendSlice(&[_]u8{ DW.LNS_extended_op, 1, DW.LNE_end_sequence }); - - // Now we have the full contents and may allocate a region to store it. - - // This logic is nearly identical to the logic below in `updateDeclDebugInfo` for - // `TextBlock` and the .debug_info. If you are editing this logic, you - // probably need to edit that logic too. - - const dwarf_segment = &self.d_sym.?.load_commands.items[self.d_sym.?.dwarf_segment_cmd_index.?].Segment; - const debug_line_sect = &dwarf_segment.sections.items[self.d_sym.?.debug_line_section_index.?]; - const src_fn = &decl.fn_link.macho; - src_fn.len = @intCast(u32, dbg_line_buffer.items.len); - if (self.d_sym.?.dbg_line_fn_last) |last| { - if (src_fn.next) |next| { - // Update existing function - non-last item. - if (src_fn.off + src_fn.len + DebugSymbols.min_nop_size > next.off) { - // It grew too big, so we move it to a new location. - if (src_fn.prev) |prev| { - _ = self.d_sym.?.dbg_line_fn_free_list.put(self.base.allocator, prev, {}) catch {}; - prev.next = src_fn.next; - } - next.prev = src_fn.prev; - src_fn.next = null; - // Populate where it used to be with NOPs. - const file_pos = debug_line_sect.offset + src_fn.off; - try self.d_sym.?.pwriteDbgLineNops(0, &[0]u8{}, src_fn.len, file_pos); - // TODO Look at the free list before appending at the end. - src_fn.prev = last; - last.next = src_fn; - self.d_sym.?.dbg_line_fn_last = src_fn; - - src_fn.off = last.off + (last.len * alloc_num / alloc_den); - } - } else if (src_fn.prev == null) { - // Append new function. - // TODO Look at the free list before appending at the end. - src_fn.prev = last; - last.next = src_fn; - self.d_sym.?.dbg_line_fn_last = src_fn; - - src_fn.off = last.off + (last.len * alloc_num / alloc_den); - } - } else { - // This is the first function of the Line Number Program. - self.d_sym.?.dbg_line_fn_first = src_fn; - self.d_sym.?.dbg_line_fn_last = src_fn; - - src_fn.off = self.d_sym.?.dbgLineNeededHeaderBytes(module) * alloc_num / alloc_den; - } - - const last_src_fn = self.d_sym.?.dbg_line_fn_last.?; - const needed_size = last_src_fn.off + last_src_fn.len; - if (needed_size != debug_line_sect.size) { - if (needed_size > dwarf_segment.allocatedSize(debug_line_sect.offset)) { - const new_offset = dwarf_segment.findFreeSpace(needed_size, 1, null); - const existing_size = last_src_fn.off; - - log.debug("moving __debug_line section: {} bytes from 0x{x} to 0x{x}", .{ - existing_size, - debug_line_sect.offset, - new_offset, - }); - - const amt = try self.d_sym.?.file.copyRangeAll(debug_line_sect.offset, self.d_sym.?.file, new_offset, existing_size); - if (amt != existing_size) return error.InputOutput; - debug_line_sect.offset = @intCast(u32, new_offset); - debug_line_sect.addr = dwarf_segment.inner.vmaddr + new_offset - dwarf_segment.inner.fileoff; - } - debug_line_sect.size = needed_size; - self.d_sym.?.load_commands_dirty = true; // TODO look into making only the one section dirty - self.d_sym.?.debug_line_header_dirty = true; - } - const prev_padding_size: u32 = if (src_fn.prev) |prev| src_fn.off - (prev.off + prev.len) else 0; - const next_padding_size: u32 = if (src_fn.next) |next| next.off - (src_fn.off + src_fn.len) else 0; - - // We only have support for one compilation unit so far, so the offsets are directly - // from the .debug_line section. - const file_pos = debug_line_sect.offset + src_fn.off; - try self.d_sym.?.pwriteDbgLineNops(prev_padding_size, dbg_line_buffer.items, next_padding_size, file_pos); - - // .debug_info - End the TAG_subprogram children. - try dbg_info_buffer.append(0); + if (debug_buffers) |*db| { + try self.d_sym.?.commitDeclDebugInfo( + self.base.allocator, + module, + decl, + db, + self.base.options.target, + ); } - // Now we emit the .debug_info types of the Decl. These will count towards the size of - // the buffer, so we have to do it before computing the offset, and we can't perform the actual - // relocations yet. - var it = dbg_info_type_relocs.iterator(); - while (it.next()) |entry| { - entry.value.off = @intCast(u32, dbg_info_buffer.items.len); - try self.d_sym.?.addDbgInfoType(entry.key, &dbg_info_buffer, self.base.options.target); - } - - try self.d_sym.?.updateDeclDebugInfoAllocation(self.base.allocator, text_block, @intCast(u32, dbg_info_buffer.items.len)); - - // Now that we have the offset assigned we can finally perform type relocations. - it = dbg_info_type_relocs.iterator(); - while (it.next()) |entry| { - for (entry.value.relocs.items) |off| { - mem.writeIntLittle( - u32, - dbg_info_buffer.items[off..][0..4], - text_block.dbg_info_off + entry.value.off, - ); - } - } - - try self.d_sym.?.writeDeclDebugInfo(text_block, dbg_info_buffer.items); - // Since we updated the vaddr and the size, each corresponding export symbol also needs to be updated. const decl_exports = module.decl_exports.get(decl) orelse &[0]*Module.Export{}; try self.updateDeclExports(module, decl, decl_exports); diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index a6ac2dfde6..90f8cf9a00 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -10,6 +10,7 @@ const DW = std.dwarf; const leb = std.leb; const Allocator = mem.Allocator; +const build_options = @import("build_options"); const trace = @import("../../tracy.zig").trace; const Module = @import("../../Module.zig"); const Type = @import("../../type.zig").Type; @@ -87,21 +88,21 @@ debug_aranges_section_dirty: bool = false, debug_info_header_dirty: bool = false, debug_line_header_dirty: bool = false, -pub const abbrev_compile_unit = 1; -pub const abbrev_subprogram = 2; -pub const abbrev_subprogram_retvoid = 3; -pub const abbrev_base_type = 4; -pub const abbrev_pad1 = 5; -pub const abbrev_parameter = 6; +const abbrev_compile_unit = 1; +const abbrev_subprogram = 2; +const abbrev_subprogram_retvoid = 3; +const abbrev_base_type = 4; +const abbrev_pad1 = 5; +const abbrev_parameter = 6; /// The reloc offset for the virtual address of a function in its Line Number Program. /// Size is a virtual address integer. -pub const dbg_line_vaddr_reloc_index = 3; +const dbg_line_vaddr_reloc_index = 3; /// The reloc offset for the virtual address of a function in its .debug_info TAG_subprogram. /// Size is a virtual address integer. -pub const dbg_info_low_pc_reloc_index = 1; +const dbg_info_low_pc_reloc_index = 1; -pub const min_nop_size = 2; +const min_nop_size = 2; /// You must call this function *after* `MachO.populateMissingMetadata()` /// has been called to get a viable debug symbols output. @@ -888,8 +889,304 @@ fn writeStringTable(self: *DebugSymbols) !void { self.string_table_dirty = false; } +pub fn updateDeclLineNumber(self: *DebugSymbols, module: *Module, decl: *const Module.Decl) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const container_scope = decl.scope.cast(Module.Scope.Container).?; + const tree = container_scope.file_scope.contents.tree; + const file_ast_decls = tree.root_node.decls(); + // TODO Look into improving the performance here by adding a token-index-to-line + // lookup table. Currently this involves scanning over the source code for newlines. + const fn_proto = file_ast_decls[decl.src_index].castTag(.FnProto).?; + const block = fn_proto.getBodyNode().?.castTag(.Block).?; + const line_delta = std.zig.lineDelta(tree.source, 0, tree.token_locs[block.lbrace].start); + const casted_line_off = @intCast(u28, line_delta); + + const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; + const shdr = &dwarf_segment.sections.items[self.debug_line_section_index.?]; + const file_pos = shdr.offset + decl.fn_link.macho.off + getRelocDbgLineOff(); + var data: [4]u8 = undefined; + leb.writeUnsignedFixed(4, &data, casted_line_off); + try self.file.pwriteAll(&data, file_pos); +} + +pub const DeclDebugBuffers = struct { + dbg_line_buffer: std.ArrayList(u8), + dbg_info_buffer: std.ArrayList(u8), + dbg_info_type_relocs: link.File.DbgInfoTypeRelocsTable, +}; + +/// Caller owns the returned memory. +pub fn initDeclDebugBuffers( + self: *DebugSymbols, + allocator: *Allocator, + module: *Module, + decl: *Module.Decl, +) !DeclDebugBuffers { + const tracy = trace(@src()); + defer tracy.end(); + + var dbg_line_buffer = std.ArrayList(u8).init(allocator); + var dbg_info_buffer = std.ArrayList(u8).init(allocator); + var dbg_info_type_relocs: link.File.DbgInfoTypeRelocsTable = .{}; + + const typed_value = decl.typed_value.most_recent.typed_value; + switch (typed_value.ty.zigTypeTag()) { + .Fn => { + const zir_dumps = if (std.builtin.is_test) &[0][]const u8{} else build_options.zir_dumps; + if (zir_dumps.len != 0) { + for (zir_dumps) |fn_name| { + if (mem.eql(u8, mem.spanZ(decl.name), fn_name)) { + std.debug.print("\n{}\n", .{decl.name}); + typed_value.val.cast(Value.Payload.Function).?.func.dump(module.*); + } + } + } + + // For functions we need to add a prologue to the debug line program. + try dbg_line_buffer.ensureCapacity(26); + + const line_off: u28 = blk: { + if (decl.scope.cast(Module.Scope.Container)) |container_scope| { + const tree = container_scope.file_scope.contents.tree; + const file_ast_decls = tree.root_node.decls(); + // TODO Look into improving the performance here by adding a token-index-to-line + // lookup table. Currently this involves scanning over the source code for newlines. + const fn_proto = file_ast_decls[decl.src_index].castTag(.FnProto).?; + const block = fn_proto.getBodyNode().?.castTag(.Block).?; + const line_delta = std.zig.lineDelta(tree.source, 0, tree.token_locs[block.lbrace].start); + break :blk @intCast(u28, line_delta); + } else if (decl.scope.cast(Module.Scope.ZIRModule)) |zir_module| { + const byte_off = zir_module.contents.module.decls[decl.src_index].inst.src; + const line_delta = std.zig.lineDelta(zir_module.source.bytes, 0, byte_off); + break :blk @intCast(u28, line_delta); + } else { + unreachable; + } + }; + + dbg_line_buffer.appendSliceAssumeCapacity(&[_]u8{ + DW.LNS_extended_op, + @sizeOf(u64) + 1, + DW.LNE_set_address, + }); + // This is the "relocatable" vaddr, corresponding to `code_buffer` index `0`. + assert(dbg_line_vaddr_reloc_index == dbg_line_buffer.items.len); + dbg_line_buffer.items.len += @sizeOf(u64); + + dbg_line_buffer.appendAssumeCapacity(DW.LNS_advance_line); + // This is the "relocatable" relative line offset from the previous function's end curly + // to this function's begin curly. + assert(getRelocDbgLineOff() == dbg_line_buffer.items.len); + // Here we use a ULEB128-fixed-4 to make sure this field can be overwritten later. + leb.writeUnsignedFixed(4, dbg_line_buffer.addManyAsArrayAssumeCapacity(4), line_off); + + dbg_line_buffer.appendAssumeCapacity(DW.LNS_set_file); + assert(getRelocDbgFileIndex() == dbg_line_buffer.items.len); + // Once we support more than one source file, this will have the ability to be more + // than one possible value. + const file_index = 1; + leb.writeUnsignedFixed(4, dbg_line_buffer.addManyAsArrayAssumeCapacity(4), file_index); + + // Emit a line for the begin curly with prologue_end=false. The codegen will + // do the work of setting prologue_end=true and epilogue_begin=true. + dbg_line_buffer.appendAssumeCapacity(DW.LNS_copy); + + // .debug_info subprogram + const decl_name_with_null = decl.name[0 .. mem.lenZ(decl.name) + 1]; + try dbg_info_buffer.ensureCapacity(dbg_info_buffer.items.len + 27 + decl_name_with_null.len); + + const fn_ret_type = typed_value.ty.fnReturnType(); + const fn_ret_has_bits = fn_ret_type.hasCodeGenBits(); + if (fn_ret_has_bits) { + dbg_info_buffer.appendAssumeCapacity(abbrev_subprogram); + } else { + dbg_info_buffer.appendAssumeCapacity(abbrev_subprogram_retvoid); + } + // These get overwritten after generating the machine code. These values are + // "relocations" and have to be in this fixed place so that functions can be + // moved in virtual address space. + assert(dbg_info_low_pc_reloc_index == dbg_info_buffer.items.len); + dbg_info_buffer.items.len += @sizeOf(u64); // DW.AT_low_pc, DW.FORM_addr + assert(getRelocDbgInfoSubprogramHighPC() == dbg_info_buffer.items.len); + dbg_info_buffer.items.len += 4; // DW.AT_high_pc, DW.FORM_data4 + if (fn_ret_has_bits) { + const gop = try dbg_info_type_relocs.getOrPut(allocator, fn_ret_type); + if (!gop.found_existing) { + gop.entry.value = .{ + .off = undefined, + .relocs = .{}, + }; + } + try gop.entry.value.relocs.append(allocator, @intCast(u32, dbg_info_buffer.items.len)); + dbg_info_buffer.items.len += 4; // DW.AT_type, DW.FORM_ref4 + } + dbg_info_buffer.appendSliceAssumeCapacity(decl_name_with_null); // DW.AT_name, DW.FORM_string + mem.writeIntLittle(u32, dbg_info_buffer.addManyAsArrayAssumeCapacity(4), line_off + 1); // DW.AT_decl_line, DW.FORM_data4 + dbg_info_buffer.appendAssumeCapacity(file_index); // DW.AT_decl_file, DW.FORM_data1 + }, + else => { + // TODO implement .debug_info for global variables + }, + } + + return DeclDebugBuffers{ + .dbg_info_buffer = dbg_info_buffer, + .dbg_line_buffer = dbg_line_buffer, + .dbg_info_type_relocs = dbg_info_type_relocs, + }; +} + +pub fn commitDeclDebugInfo( + self: *DebugSymbols, + allocator: *Allocator, + module: *Module, + decl: *Module.Decl, + debug_buffers: *DeclDebugBuffers, + target: std.Target, +) !void { + const tracy = trace(@src()); + defer tracy.end(); + + var dbg_line_buffer = &debug_buffers.dbg_line_buffer; + var dbg_info_buffer = &debug_buffers.dbg_info_buffer; + var dbg_info_type_relocs = &debug_buffers.dbg_info_type_relocs; + + const symbol = self.base.local_symbols.items[decl.link.macho.local_sym_index]; + const text_block = &decl.link.macho; + // If the Decl is a function, we need to update the __debug_line program. + const typed_value = decl.typed_value.most_recent.typed_value; + switch (typed_value.ty.zigTypeTag()) { + .Fn => { + // Perform the relocations based on vaddr. + { + const ptr = dbg_line_buffer.items[dbg_line_vaddr_reloc_index..][0..8]; + mem.writeIntLittle(u64, ptr, symbol.n_value); + } + { + const ptr = dbg_info_buffer.items[dbg_info_low_pc_reloc_index..][0..8]; + mem.writeIntLittle(u64, ptr, symbol.n_value); + } + { + const ptr = dbg_info_buffer.items[getRelocDbgInfoSubprogramHighPC()..][0..4]; + mem.writeIntLittle(u32, ptr, @intCast(u32, text_block.size)); + } + + try dbg_line_buffer.appendSlice(&[_]u8{ DW.LNS_extended_op, 1, DW.LNE_end_sequence }); + + // Now we have the full contents and may allocate a region to store it. + + // This logic is nearly identical to the logic below in `updateDeclDebugInfo` for + // `TextBlock` and the .debug_info. If you are editing this logic, you + // probably need to edit that logic too. + + const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; + const debug_line_sect = &dwarf_segment.sections.items[self.debug_line_section_index.?]; + const src_fn = &decl.fn_link.macho; + src_fn.len = @intCast(u32, dbg_line_buffer.items.len); + if (self.dbg_line_fn_last) |last| { + if (src_fn.next) |next| { + // Update existing function - non-last item. + if (src_fn.off + src_fn.len + min_nop_size > next.off) { + // It grew too big, so we move it to a new location. + if (src_fn.prev) |prev| { + _ = self.dbg_line_fn_free_list.put(allocator, prev, {}) catch {}; + prev.next = src_fn.next; + } + next.prev = src_fn.prev; + src_fn.next = null; + // Populate where it used to be with NOPs. + const file_pos = debug_line_sect.offset + src_fn.off; + try self.pwriteDbgLineNops(0, &[0]u8{}, src_fn.len, file_pos); + // TODO Look at the free list before appending at the end. + src_fn.prev = last; + last.next = src_fn; + self.dbg_line_fn_last = src_fn; + + src_fn.off = last.off + (last.len * alloc_num / alloc_den); + } + } else if (src_fn.prev == null) { + // Append new function. + // TODO Look at the free list before appending at the end. + src_fn.prev = last; + last.next = src_fn; + self.dbg_line_fn_last = src_fn; + + src_fn.off = last.off + (last.len * alloc_num / alloc_den); + } + } else { + // This is the first function of the Line Number Program. + self.dbg_line_fn_first = src_fn; + self.dbg_line_fn_last = src_fn; + + src_fn.off = self.dbgLineNeededHeaderBytes(module) * alloc_num / alloc_den; + } + + const last_src_fn = self.dbg_line_fn_last.?; + const needed_size = last_src_fn.off + last_src_fn.len; + if (needed_size != debug_line_sect.size) { + if (needed_size > dwarf_segment.allocatedSize(debug_line_sect.offset)) { + const new_offset = dwarf_segment.findFreeSpace(needed_size, 1, null); + const existing_size = last_src_fn.off; + + log.debug("moving __debug_line section: {} bytes from 0x{x} to 0x{x}", .{ + existing_size, + debug_line_sect.offset, + new_offset, + }); + + const amt = try self.file.copyRangeAll(debug_line_sect.offset, self.file, new_offset, existing_size); + if (amt != existing_size) return error.InputOutput; + debug_line_sect.offset = @intCast(u32, new_offset); + debug_line_sect.addr = dwarf_segment.inner.vmaddr + new_offset - dwarf_segment.inner.fileoff; + } + debug_line_sect.size = needed_size; + self.load_commands_dirty = true; // TODO look into making only the one section dirty + self.debug_line_header_dirty = true; + } + const prev_padding_size: u32 = if (src_fn.prev) |prev| src_fn.off - (prev.off + prev.len) else 0; + const next_padding_size: u32 = if (src_fn.next) |next| next.off - (src_fn.off + src_fn.len) else 0; + + // We only have support for one compilation unit so far, so the offsets are directly + // from the .debug_line section. + const file_pos = debug_line_sect.offset + src_fn.off; + try self.pwriteDbgLineNops(prev_padding_size, dbg_line_buffer.items, next_padding_size, file_pos); + + // .debug_info - End the TAG_subprogram children. + try dbg_info_buffer.append(0); + }, + else => {}, + } + + // Now we emit the .debug_info types of the Decl. These will count towards the size of + // the buffer, so we have to do it before computing the offset, and we can't perform the actual + // relocations yet. + var it = dbg_info_type_relocs.iterator(); + while (it.next()) |entry| { + entry.value.off = @intCast(u32, dbg_info_buffer.items.len); + try self.addDbgInfoType(entry.key, dbg_info_buffer, target); + } + + try self.updateDeclDebugInfoAllocation(allocator, text_block, @intCast(u32, dbg_info_buffer.items.len)); + + // Now that we have the offset assigned we can finally perform type relocations. + it = dbg_info_type_relocs.iterator(); + while (it.next()) |entry| { + for (entry.value.relocs.items) |off| { + mem.writeIntLittle( + u32, + dbg_info_buffer.items[off..][0..4], + text_block.dbg_info_off + entry.value.off, + ); + } + } + + try self.writeDeclDebugInfo(text_block, dbg_info_buffer.items); +} + /// Asserts the type has codegen bits. -pub fn addDbgInfoType( +fn addDbgInfoType( self: *DebugSymbols, ty: Type, dbg_info_buffer: *std.ArrayList(u8), @@ -931,7 +1228,7 @@ pub fn addDbgInfoType( } } -pub fn updateDeclDebugInfoAllocation( +fn updateDeclDebugInfoAllocation( self: *DebugSymbols, allocator: *Allocator, text_block: *TextBlock, @@ -986,7 +1283,7 @@ pub fn updateDeclDebugInfoAllocation( } } -pub fn writeDeclDebugInfo(self: *DebugSymbols, text_block: *TextBlock, dbg_info_buf: []const u8) !void { +fn writeDeclDebugInfo(self: *DebugSymbols, text_block: *TextBlock, dbg_info_buf: []const u8) !void { const tracy = trace(@src()); defer tracy.end(); @@ -1057,19 +1354,19 @@ fn makeDebugString(self: *DebugSymbols, allocator: *Allocator, bytes: []const u8 /// The reloc offset for the line offset of a function from the previous function's line. /// It's a fixed-size 4-byte ULEB128. -pub fn getRelocDbgLineOff() usize { +fn getRelocDbgLineOff() usize { return dbg_line_vaddr_reloc_index + @sizeOf(u64) + 1; } -pub fn getRelocDbgFileIndex() usize { +fn getRelocDbgFileIndex() usize { return getRelocDbgLineOff() + 5; } -pub fn getRelocDbgInfoSubprogramHighPC() u32 { +fn getRelocDbgInfoSubprogramHighPC() u32 { return dbg_info_low_pc_reloc_index + @sizeOf(u64); } -pub fn dbgLineNeededHeaderBytes(self: DebugSymbols, module: *Module) u32 { +fn dbgLineNeededHeaderBytes(self: DebugSymbols, module: *Module) u32 { const directory_entry_format_count = 1; const file_name_entry_format_count = 1; const directory_count = 1; @@ -1092,7 +1389,7 @@ fn dbgInfoNeededHeaderBytes(self: DebugSymbols) u32 { /// are less than 126,976 bytes (if this limit is ever reached, this function can be /// improved to make more than one pwritev call, or the limit can be raised by a fixed /// amount by increasing the length of `vecs`). -pub fn pwriteDbgLineNops( +fn pwriteDbgLineNops( self: *DebugSymbols, prev_padding_size: usize, buf: []const u8, @@ -1170,7 +1467,7 @@ pub fn pwriteDbgLineNops( /// Writes to the file a buffer, prefixed and suffixed by the specified number of /// bytes of padding. -pub fn pwriteDbgInfoNops( +fn pwriteDbgInfoNops( self: *DebugSymbols, prev_padding_size: usize, buf: []const u8, @@ -1239,25 +1536,3 @@ pub fn pwriteDbgInfoNops( try self.file.pwritevAll(vecs[0..vec_index], offset - prev_padding_size); } - -pub fn updateDeclLineNumber(self: *DebugSymbols, module: *Module, decl: *const Module.Decl) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const container_scope = decl.scope.cast(Module.Scope.Container).?; - const tree = container_scope.file_scope.contents.tree; - const file_ast_decls = tree.root_node.decls(); - // TODO Look into improving the performance here by adding a token-index-to-line - // lookup table. Currently this involves scanning over the source code for newlines. - const fn_proto = file_ast_decls[decl.src_index].castTag(.FnProto).?; - const block = fn_proto.getBodyNode().?.castTag(.Block).?; - const line_delta = std.zig.lineDelta(tree.source, 0, tree.token_locs[block.lbrace].start); - const casted_line_off = @intCast(u28, line_delta); - - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; - const shdr = &dwarf_segment.sections.items[self.debug_line_section_index.?]; - const file_pos = shdr.offset + decl.fn_link.macho.off + getRelocDbgLineOff(); - var data: [4]u8 = undefined; - leb.writeUnsignedFixed(4, &data, casted_line_off); - try self.file.pwriteAll(&data, file_pos); -} From 9369176332a11fa92d3a0ed4493a22f4ed2701af Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 31 Dec 2020 08:45:27 +0100 Subject: [PATCH 16/17] macho: advance VM address of DWARF sections when relocating --- src/link/MachO/DebugSymbols.zig | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 90f8cf9a00..7103f5e8bf 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -412,7 +412,9 @@ pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Opt const allocated_size = dwarf_segment.allocatedSize(debug_abbrev_sect.offset); if (needed_size > allocated_size) { debug_abbrev_sect.size = 0; // free the space - debug_abbrev_sect.offset = @intCast(u32, dwarf_segment.findFreeSpace(needed_size, 1, null)); + const offset = dwarf_segment.findFreeSpace(needed_size, 1, null); + debug_abbrev_sect.offset = @intCast(u32, offset); + debug_abbrev_sect.addr = dwarf_segment.inner.vmaddr + offset - dwarf_segment.inner.fileoff; } debug_abbrev_sect.size = needed_size; log.debug("__debug_abbrev start=0x{x} end=0x{x}", .{ @@ -536,9 +538,9 @@ pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Opt const allocated_size = dwarf_segment.allocatedSize(debug_aranges_sect.offset); if (needed_size > allocated_size) { debug_aranges_sect.size = 0; // free the space - const offset = dwarf_segment.findFreeSpace(needed_size, 16, null); - debug_aranges_sect.offset = @intCast(u32, offset); - debug_aranges_sect.addr = dwarf_segment.inner.vmaddr + offset - dwarf_segment.inner.fileoff; + const new_offset = dwarf_segment.findFreeSpace(needed_size, 16, null); + debug_aranges_sect.addr = dwarf_segment.inner.vmaddr + new_offset - dwarf_segment.inner.fileoff; + debug_aranges_sect.offset = @intCast(u32, new_offset); } debug_aranges_sect.size = needed_size; log.debug("__debug_aranges start=0x{x} end=0x{x}", .{ From 0fd3015e558a8b4decf535e75481cdbc29540ff8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 31 Dec 2020 10:15:07 +0100 Subject: [PATCH 17/17] macho: sentinel can be 4 byte long --- src/link/MachO/DebugSymbols.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 7103f5e8bf..e4509281d3 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -525,8 +525,8 @@ pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Opt mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), text_section.size); // Sentinel. - mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), 0); - mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), 0); + mem.writeIntLittle(u32, di_buf.addManyAsArrayAssumeCapacity(4), 0); + mem.writeIntLittle(u32, di_buf.addManyAsArrayAssumeCapacity(4), 0); // Go back and populate the initial length. const init_len = di_buf.items.len - after_init_len;