From 819ef521042e7e21db4ab5dd7a0dbe180bd31c57 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 13 Aug 2021 15:12:28 +0200 Subject: [PATCH 01/78] macho: merge linkWithZld with flush --- src/link/MachO.zig | 421 ++++++++++++++++++++++----------------------- 1 file changed, 207 insertions(+), 214 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 10e079d4f1..04f32ae6c4 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -415,171 +415,6 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { } } - const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; - if (use_stage1) { - return self.linkWithZld(comp); - } else { - switch (self.base.options.effectiveOutputMode()) { - .Exe, .Obj => {}, - .Lib => return error.TODOImplementWritingLibFiles, - } - return self.flushModule(comp); - } -} - -pub fn flushModule(self: *MachO, comp: *Compilation) !void { - _ = comp; - const tracy = trace(@src()); - defer tracy.end(); - - const output_mode = self.base.options.output_mode; - const target = self.base.options.target; - - switch (output_mode) { - .Exe => { - if (self.entry_addr) |addr| { - // Update LC_MAIN with entry offset. - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const main_cmd = &self.load_commands.items[self.main_cmd_index.?].Main; - main_cmd.entryoff = addr - text_segment.inner.vmaddr; - main_cmd.stacksize = self.base.options.stack_size_override orelse 0; - self.load_commands_dirty = true; - } - try self.writeRebaseInfoTable(); - try self.writeBindInfoTable(); - try self.writeLazyBindInfoTable(); - try self.writeExportInfo(); - try self.writeAllGlobalAndUndefSymbols(); - try self.writeIndirectSymbolTable(); - try self.writeStringTable(); - try self.updateLinkeditSegmentSizes(); - - if (self.d_sym) |*ds| { - // Flush debug symbols bundle. - try ds.flushModule(self.base.allocator, self.base.options); - } - - if (target.cpu.arch == .aarch64) { - // Preallocate space for the code signature. - // We need to do this at this stage so that we have the load commands with proper values - // written out to the file. - // The most important here is to have the correct vm and filesize of the __LINKEDIT segment - // where the code signature goes into. - try self.writeCodeSignaturePadding(); - } - }, - .Obj => {}, - .Lib => return error.TODOImplementWritingLibFiles, - } - - try self.writeLoadCommands(); - try self.writeHeader(); - - if (self.entry_addr == null and self.base.options.output_mode == .Exe) { - log.debug("flushing. no_entry_point_found = true", .{}); - self.error_flags.no_entry_point_found = true; - } else { - log.debug("flushing. no_entry_point_found = false", .{}); - self.error_flags.no_entry_point_found = false; - } - - assert(!self.got_entries_count_dirty); - assert(!self.load_commands_dirty); - assert(!self.rebase_info_dirty); - assert(!self.binding_info_dirty); - assert(!self.lazy_binding_info_dirty); - assert(!self.export_info_dirty); - assert(!self.strtab_dirty); - assert(!self.strtab_needs_relocation); - - if (target.cpu.arch == .aarch64) { - switch (output_mode) { - .Exe, .Lib => try self.writeCodeSignature(), // code signing always comes last - else => {}, - } - } -} - -fn resolveSearchDir( - arena: *Allocator, - dir: []const u8, - syslibroot: ?[]const u8, -) !?[]const u8 { - var candidates = std.ArrayList([]const u8).init(arena); - - if (fs.path.isAbsolute(dir)) { - if (syslibroot) |root| { - const full_path = try fs.path.join(arena, &[_][]const u8{ root, dir }); - try candidates.append(full_path); - } - } - - try candidates.append(dir); - - for (candidates.items) |candidate| { - // Verify that search path actually exists - var tmp = fs.cwd().openDir(candidate, .{}) catch |err| switch (err) { - error.FileNotFound => continue, - else => |e| return e, - }; - defer tmp.close(); - - return candidate; - } - - return null; -} - -fn resolveLib( - arena: *Allocator, - search_dirs: []const []const u8, - name: []const u8, - ext: []const u8, -) !?[]const u8 { - const search_name = try std.fmt.allocPrint(arena, "lib{s}{s}", .{ name, ext }); - - for (search_dirs) |dir| { - const full_path = try fs.path.join(arena, &[_][]const u8{ dir, search_name }); - - // Check if the file exists. - const tmp = fs.cwd().openFile(full_path, .{}) catch |err| switch (err) { - error.FileNotFound => continue, - else => |e| return e, - }; - defer tmp.close(); - - return full_path; - } - - return null; -} - -fn resolveFramework( - arena: *Allocator, - search_dirs: []const []const u8, - name: []const u8, - ext: []const u8, -) !?[]const u8 { - const search_name = try std.fmt.allocPrint(arena, "{s}{s}", .{ name, ext }); - const prefix_path = try std.fmt.allocPrint(arena, "{s}.framework", .{name}); - - for (search_dirs) |dir| { - const full_path = try fs.path.join(arena, &[_][]const u8{ dir, prefix_path, search_name }); - - // Check if the file exists. - const tmp = fs.cwd().openFile(full_path, .{}) catch |err| switch (err) { - error.FileNotFound => continue, - else => |e| return e, - }; - defer tmp.close(); - - return full_path; - } - - return null; -} - -fn linkWithZld(self: *MachO, comp: *Compilation) !void { const tracy = trace(@src()); defer tracy.end(); @@ -588,11 +423,11 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { const arena = &arena_allocator.allocator; const directory = self.base.options.emit.?.directory; // Just an alias to make it shorter to type. + const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; // If there is no Zig code to compile, then we should skip flushing the output file because it // will not be part of the linker line anyway. const module_obj_path: ?[]const u8 = if (self.base.options.module) |module| blk: { - const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; if (use_stage1) { const obj_basename = try std.zig.binNameAlloc(arena, .{ .root_name = self.base.options.root_name, @@ -604,8 +439,8 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { break :blk full_obj_path; } + const obj_basename = self.base.intermediary_basename orelse break :blk null; try self.flushModule(comp); - const obj_basename = self.base.intermediary_basename.?; const full_obj_path = try directory.join(arena, &[_][]const u8{obj_basename}); break :blk full_obj_path; } else null; @@ -714,7 +549,9 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { try positionals.append(p); } - try positionals.append(comp.compiler_rt_static_lib.?.full_object_path); + if (comp.compiler_rt_static_lib) |lib| { + try positionals.append(lib.full_object_path); + } // libc++ dep if (self.base.options.link_libcpp) { @@ -899,56 +736,60 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { Compilation.dump_argv(argv.items); } - const sub_path = self.base.options.emit.?.sub_path; - self.base.file = try directory.handle.createFile(sub_path, .{ - .truncate = true, - .read = true, - .mode = link.determineMode(self.base.options), - }); + if (use_stage1) { + const sub_path = self.base.options.emit.?.sub_path; + self.base.file = try directory.handle.createFile(sub_path, .{ + .truncate = true, + .read = true, + .mode = link.determineMode(self.base.options), + }); - // TODO mimicking insertion of null symbol from incremental linker. - // This will need to moved. - try self.locals.append(self.base.allocator, .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try self.strtab.append(self.base.allocator, 0); + // TODO mimicking insertion of null symbol from incremental linker. + // This will need to moved. + try self.locals.append(self.base.allocator, .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.strtab.append(self.base.allocator, 0); - try self.populateMetadata(); - try self.addRpathLCs(rpaths.items); - try self.parseInputFiles(positionals.items, self.base.options.sysroot); - try self.parseLibs(libs.items, self.base.options.sysroot); - try self.resolveSymbols(); - try self.parseTextBlocks(); - try self.addLoadDylibLCs(); - try self.addDataInCodeLC(); - try self.addCodeSignatureLC(); + try self.populateMetadata(); + try self.addRpathLCs(rpaths.items); + try self.parseInputFiles(positionals.items, self.base.options.sysroot); + try self.parseLibs(libs.items, self.base.options.sysroot); + try self.resolveSymbols(); + try self.parseTextBlocks(); + try self.addLoadDylibLCs(); + try self.addDataInCodeLC(); + try self.addCodeSignatureLC(); - { - // Add dyld_stub_binder as the final GOT entry. - const n_strx = self.strtab_dir.getAdapted(@as([]const u8, "dyld_stub_binder"), StringSliceAdapter{ - .strtab = &self.strtab, - }) orelse unreachable; - const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; - const got_index = @intCast(u32, self.got_entries.items.len); - const got_entry = GotIndirectionKey{ - .where = .undef, - .where_index = resolv.where_index, - }; - try self.got_entries.append(self.base.allocator, got_entry); - try self.got_entries_map.putNoClobber(self.base.allocator, got_entry, got_index); + { + // Add dyld_stub_binder as the final GOT entry. + const n_strx = self.strtab_dir.getAdapted(@as([]const u8, "dyld_stub_binder"), StringSliceAdapter{ + .strtab = &self.strtab, + }) orelse unreachable; + const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; + const got_index = @intCast(u32, self.got_entries.items.len); + const got_entry = GotIndirectionKey{ + .where = .undef, + .where_index = resolv.where_index, + }; + try self.got_entries.append(self.base.allocator, got_entry); + try self.got_entries_map.putNoClobber(self.base.allocator, got_entry, got_index); + } + + try self.sortSections(); + try self.allocateTextSegment(); + try self.allocateDataConstSegment(); + try self.allocateDataSegment(); + self.allocateLinkeditSegment(); + try self.allocateTextBlocks(); + try self.flushZld(); + } else { + try self.flushModule(comp); } - - try self.sortSections(); - try self.allocateTextSegment(); - try self.allocateDataConstSegment(); - try self.allocateDataSegment(); - self.allocateLinkeditSegment(); - try self.allocateTextBlocks(); - try self.flushZld(); } if (!self.base.options.disable_lld_caching) { @@ -967,6 +808,158 @@ fn linkWithZld(self: *MachO, comp: *Compilation) !void { } } +pub fn flushModule(self: *MachO, comp: *Compilation) !void { + _ = comp; + const tracy = trace(@src()); + defer tracy.end(); + + const output_mode = self.base.options.output_mode; + const target = self.base.options.target; + + switch (output_mode) { + .Exe => { + if (self.entry_addr) |addr| { + // Update LC_MAIN with entry offset. + const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const main_cmd = &self.load_commands.items[self.main_cmd_index.?].Main; + main_cmd.entryoff = addr - text_segment.inner.vmaddr; + main_cmd.stacksize = self.base.options.stack_size_override orelse 0; + self.load_commands_dirty = true; + } + try self.writeRebaseInfoTable(); + try self.writeBindInfoTable(); + try self.writeLazyBindInfoTable(); + try self.writeExportInfo(); + try self.writeAllGlobalAndUndefSymbols(); + try self.writeIndirectSymbolTable(); + try self.writeStringTable(); + try self.updateLinkeditSegmentSizes(); + + if (self.d_sym) |*ds| { + // Flush debug symbols bundle. + try ds.flushModule(self.base.allocator, self.base.options); + } + + if (target.cpu.arch == .aarch64) { + // Preallocate space for the code signature. + // We need to do this at this stage so that we have the load commands with proper values + // written out to the file. + // The most important here is to have the correct vm and filesize of the __LINKEDIT segment + // where the code signature goes into. + try self.writeCodeSignaturePadding(); + } + }, + .Obj => {}, + .Lib => return error.TODOImplementWritingLibFiles, + } + + try self.writeLoadCommands(); + try self.writeHeader(); + + if (self.entry_addr == null and self.base.options.output_mode == .Exe) { + log.debug("flushing. no_entry_point_found = true", .{}); + self.error_flags.no_entry_point_found = true; + } else { + log.debug("flushing. no_entry_point_found = false", .{}); + self.error_flags.no_entry_point_found = false; + } + + assert(!self.got_entries_count_dirty); + assert(!self.load_commands_dirty); + assert(!self.rebase_info_dirty); + assert(!self.binding_info_dirty); + assert(!self.lazy_binding_info_dirty); + assert(!self.export_info_dirty); + assert(!self.strtab_dirty); + assert(!self.strtab_needs_relocation); + + if (target.cpu.arch == .aarch64) { + switch (output_mode) { + .Exe, .Lib => try self.writeCodeSignature(), // code signing always comes last + else => {}, + } + } +} + +fn resolveSearchDir( + arena: *Allocator, + dir: []const u8, + syslibroot: ?[]const u8, +) !?[]const u8 { + var candidates = std.ArrayList([]const u8).init(arena); + + if (fs.path.isAbsolute(dir)) { + if (syslibroot) |root| { + const full_path = try fs.path.join(arena, &[_][]const u8{ root, dir }); + try candidates.append(full_path); + } + } + + try candidates.append(dir); + + for (candidates.items) |candidate| { + // Verify that search path actually exists + var tmp = fs.cwd().openDir(candidate, .{}) catch |err| switch (err) { + error.FileNotFound => continue, + else => |e| return e, + }; + defer tmp.close(); + + return candidate; + } + + return null; +} + +fn resolveLib( + arena: *Allocator, + search_dirs: []const []const u8, + name: []const u8, + ext: []const u8, +) !?[]const u8 { + const search_name = try std.fmt.allocPrint(arena, "lib{s}{s}", .{ name, ext }); + + for (search_dirs) |dir| { + const full_path = try fs.path.join(arena, &[_][]const u8{ dir, search_name }); + + // Check if the file exists. + const tmp = fs.cwd().openFile(full_path, .{}) catch |err| switch (err) { + error.FileNotFound => continue, + else => |e| return e, + }; + defer tmp.close(); + + return full_path; + } + + return null; +} + +fn resolveFramework( + arena: *Allocator, + search_dirs: []const []const u8, + name: []const u8, + ext: []const u8, +) !?[]const u8 { + const search_name = try std.fmt.allocPrint(arena, "{s}{s}", .{ name, ext }); + const prefix_path = try std.fmt.allocPrint(arena, "{s}.framework", .{name}); + + for (search_dirs) |dir| { + const full_path = try fs.path.join(arena, &[_][]const u8{ dir, prefix_path, search_name }); + + // Check if the file exists. + const tmp = fs.cwd().openFile(full_path, .{}) catch |err| switch (err) { + error.FileNotFound => continue, + else => |e| return e, + }; + defer tmp.close(); + + return full_path; + } + + return null; +} + fn parseObject(self: *MachO, path: []const u8) !bool { const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { error.FileNotFound => return false, From b20b6d7da9b63783a4f481fd0a4b48a47843abc8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 14 Aug 2021 22:18:18 +0200 Subject: [PATCH 02/78] macho: move bit adding rpaths to common codepath --- src/link/MachO.zig | 51 ++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 04f32ae6c4..4188e71686 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -536,6 +536,27 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try fs.cwd().copyFile(the_object_path, fs.cwd(), full_out_path, .{}); } } else { + if (use_stage1) { + const sub_path = self.base.options.emit.?.sub_path; + self.base.file = try directory.handle.createFile(sub_path, .{ + .truncate = true, + .read = true, + .mode = link.determineMode(self.base.options), + }); + try self.populateMetadata(); + + // TODO mimicking insertion of null symbol from incremental linker. + // This will need to moved. + try self.locals.append(self.base.allocator, .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.strtab.append(self.base.allocator, 0); + } + // Positional arguments to the linker such as object files and static archives. var positionals = std.ArrayList([]const u8).init(arena); @@ -673,12 +694,6 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try rpath_table.putNoClobber(rpath, {}); } - var rpaths = std.ArrayList([]const u8).init(arena); - try rpaths.ensureCapacity(rpath_table.count()); - for (rpath_table.keys()) |*key| { - rpaths.appendAssumeCapacity(key.*); - } - if (self.base.options.verbose_link) { var argv = std.ArrayList([]const u8).init(arena); @@ -704,7 +719,7 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try argv.append(syslibroot); } - for (rpaths.items) |rpath| { + for (rpath_table.keys()) |rpath| { try argv.append("-rpath"); try argv.append(rpath); } @@ -736,27 +751,9 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { Compilation.dump_argv(argv.items); } + try self.addRpathLCs(rpath_table.keys()); + if (use_stage1) { - const sub_path = self.base.options.emit.?.sub_path; - self.base.file = try directory.handle.createFile(sub_path, .{ - .truncate = true, - .read = true, - .mode = link.determineMode(self.base.options), - }); - - // TODO mimicking insertion of null symbol from incremental linker. - // This will need to moved. - try self.locals.append(self.base.allocator, .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try self.strtab.append(self.base.allocator, 0); - - try self.populateMetadata(); - try self.addRpathLCs(rpaths.items); try self.parseInputFiles(positionals.items, self.base.options.sysroot); try self.parseLibs(libs.items, self.base.options.sysroot); try self.resolveSymbols(); From a51edc978f90fa3b96e4b1d4886030848f97a7b0 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 16 Aug 2021 19:15:04 +0200 Subject: [PATCH 03/78] macho: converge populateMetadata with populateMissingMetadata --- src/link/MachO.zig | 547 +++++++++++---------------------------------- 1 file changed, 127 insertions(+), 420 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index f39ab0d0c2..3c024f6867 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -386,6 +386,36 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio try self.populateMissingMetadata(); try self.writeLocalSymbol(0); + if (!self.strtab_dir.containsAdapted(@as([]const u8, "dyld_stub_binder"), StringSliceAdapter{ + .strtab = &self.strtab, + })) { + const import_sym_index = @intCast(u32, self.undefs.items.len); + const n_strx = try self.makeString("dyld_stub_binder"); + try self.undefs.append(self.base.allocator, .{ + .n_strx = n_strx, + .n_type = macho.N_UNDF | macho.N_EXT, + .n_sect = 0, + .n_desc = @intCast(u8, 1) * macho.N_SYMBOL_RESOLVER, + .n_value = 0, + }); + try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ + .where = .undef, + .where_index = import_sym_index, + }); + const got_key = GotIndirectionKey{ + .where = .undef, + .where_index = import_sym_index, + }; + const got_index = @intCast(u32, self.got_entries.items.len); + try self.got_entries.append(self.base.allocator, got_key); + try self.got_entries_map.putNoClobber(self.base.allocator, got_key, got_index); + try self.writeGotEntry(got_index); + self.binding_info_dirty = true; + } + if (self.stub_helper_stubs_start_off == null) { + try self.writeStubHelperPreamble(); + } + if (self.d_sym) |*ds| { try ds.populateMissingMetadata(allocator); try ds.writeLocalSymbol(0); @@ -556,7 +586,7 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { .read = true, .mode = link.determineMode(self.base.options), }); - try self.populateMetadata(); + try self.populateMissingMetadata(); // TODO mimicking insertion of null symbol from incremental linker. // This will need to moved. @@ -798,6 +828,17 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.allocateTextBlocks(); try self.flushZld(); } else { + // TODO this is just a temp; libsystem load command will be autoresolved when parsing libSystem from + // the linker line and actually referencing symbols. + if (self.libsystem_cmd_index == null) { + self.libsystem_cmd_index = @intCast(u16, self.load_commands.items.len); + var dylib_cmd = try commands.createLoadDylibCommand(self.base.allocator, mem.spanZ(LIB_SYSTEM_PATH), 2, 0, 0); + errdefer dylib_cmd.deinit(self.base.allocator); + try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); + self.load_commands_dirty = true; + } + try self.addDataInCodeLC(); + try self.addCodeSignatureLC(); try self.flushModule(comp); } } @@ -2503,328 +2544,6 @@ fn parseTextBlocks(self: *MachO) !void { } } -fn populateMetadata(self: *MachO) !void { - if (self.pagezero_segment_cmd_index == null) { - self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__PAGEZERO", .{ - .vmsize = 0x100000000, // size always set to 4GB - }), - }); - } - - if (self.text_segment_cmd_index == null) { - self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__TEXT", .{ - .vmaddr = 0x100000000, // always starts at 4GB - .maxprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, - .initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, - }), - }); - } - - if (self.text_section_index == null) { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.text_section_index = @intCast(u16, text_seg.sections.items.len); - const alignment: u2 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - try text_seg.addSection(self.base.allocator, "__text", .{ - .@"align" = alignment, - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }); - } - - if (self.stubs_section_index == null) { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.stubs_section_index = @intCast(u16, text_seg.sections.items.len); - const alignment: u2 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - const stub_size: u4 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 6, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, // unhandled architecture type - }; - try text_seg.addSection(self.base.allocator, "__stubs", .{ - .@"align" = alignment, - .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - .reserved2 = stub_size, - }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stubs_section_index.?, - }); - } - - if (self.stub_helper_section_index == null) { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.stub_helper_section_index = @intCast(u16, text_seg.sections.items.len); - const alignment: u2 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; - const stub_helper_size: u6 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 15, - .aarch64 => 6 * @sizeOf(u32), - else => unreachable, - }; - try text_seg.addSection(self.base.allocator, "__stub_helper", .{ - .size = stub_helper_size, - .@"align" = alignment, - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, - }); - } - - if (self.data_const_segment_cmd_index == null) { - self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__DATA_CONST", .{ - .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - }), - }); - } - - if (self.got_section_index == null) { - const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - self.got_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__got", .{ - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, - }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.got_section_index.?, - }); - } - - if (self.data_segment_cmd_index == null) { - self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__DATA", .{ - .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - }), - }); - } - - if (self.la_symbol_ptr_section_index == null) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.la_symbol_ptr_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__la_symbol_ptr", .{ - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = macho.S_LAZY_SYMBOL_POINTERS, - }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.la_symbol_ptr_section_index.?, - }); - } - - if (self.data_section_index == null) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__data", .{ - .@"align" = 3, // 2^3 = @sizeOf(u64) - }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }); - } - - if (self.linkedit_segment_cmd_index == null) { - self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__LINKEDIT", .{ - .maxprot = macho.VM_PROT_READ, - .initprot = macho.VM_PROT_READ, - }), - }); - } - - if (self.dyld_info_cmd_index == null) { - self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .DyldInfoOnly = .{ - .cmd = macho.LC_DYLD_INFO_ONLY, - .cmdsize = @sizeOf(macho.dyld_info_command), - .rebase_off = 0, - .rebase_size = 0, - .bind_off = 0, - .bind_size = 0, - .weak_bind_off = 0, - .weak_bind_size = 0, - .lazy_bind_off = 0, - .lazy_bind_size = 0, - .export_off = 0, - .export_size = 0, - }, - }); - } - - if (self.symtab_cmd_index == null) { - self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Symtab = .{ - .cmd = macho.LC_SYMTAB, - .cmdsize = @sizeOf(macho.symtab_command), - .symoff = 0, - .nsyms = 0, - .stroff = 0, - .strsize = 0, - }, - }); - } - - if (self.dysymtab_cmd_index == null) { - self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Dysymtab = .{ - .cmd = macho.LC_DYSYMTAB, - .cmdsize = @sizeOf(macho.dysymtab_command), - .ilocalsym = 0, - .nlocalsym = 0, - .iextdefsym = 0, - .nextdefsym = 0, - .iundefsym = 0, - .nundefsym = 0, - .tocoff = 0, - .ntoc = 0, - .modtaboff = 0, - .nmodtab = 0, - .extrefsymoff = 0, - .nextrefsyms = 0, - .indirectsymoff = 0, - .nindirectsyms = 0, - .extreloff = 0, - .nextrel = 0, - .locreloff = 0, - .nlocrel = 0, - }, - }); - } - - if (self.dylinker_cmd_index == null) { - self.dylinker_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH), - @sizeOf(u64), - )); - var dylinker_cmd = commands.emptyGenericCommandWithData(macho.dylinker_command{ - .cmd = macho.LC_LOAD_DYLINKER, - .cmdsize = cmdsize, - .name = @sizeOf(macho.dylinker_command), - }); - dylinker_cmd.data = try self.base.allocator.alloc(u8, cmdsize - dylinker_cmd.inner.name); - mem.set(u8, dylinker_cmd.data, 0); - mem.copy(u8, dylinker_cmd.data, mem.spanZ(DEFAULT_DYLD_PATH)); - try self.load_commands.append(self.base.allocator, .{ .Dylinker = dylinker_cmd }); - } - - if (self.main_cmd_index == null and self.base.options.output_mode == .Exe) { - self.main_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .Main = .{ - .cmd = macho.LC_MAIN, - .cmdsize = @sizeOf(macho.entry_point_command), - .entryoff = 0x0, - .stacksize = 0, - }, - }); - } - - if (self.dylib_id_cmd_index == null and self.base.options.output_mode == .Lib) { - self.dylib_id_cmd_index = @intCast(u16, self.load_commands.items.len); - const install_name = try std.fmt.allocPrint(self.base.allocator, "@rpath/{s}", .{ - self.base.options.emit.?.sub_path, - }); - defer self.base.allocator.free(install_name); - var dylib_cmd = try commands.createLoadDylibCommand( - self.base.allocator, - install_name, - 2, - 0x10000, // TODO forward user-provided versions - 0x10000, - ); - errdefer dylib_cmd.deinit(self.base.allocator); - dylib_cmd.inner.cmd = macho.LC_ID_DYLIB; - try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); - } - - if (self.source_version_cmd_index == null) { - self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .SourceVersion = .{ - .cmd = macho.LC_SOURCE_VERSION, - .cmdsize = @sizeOf(macho.source_version_command), - .version = 0x0, - }, - }); - } - - if (self.build_version_cmd_index == null) { - self.build_version_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version), - @sizeOf(u64), - )); - const ver = self.base.options.target.os.version_range.semver.min; - const version = ver.major << 16 | ver.minor << 8 | ver.patch; - const is_simulator_abi = self.base.options.target.abi == .simulator; - var cmd = commands.emptyGenericCommandWithData(macho.build_version_command{ - .cmd = macho.LC_BUILD_VERSION, - .cmdsize = cmdsize, - .platform = switch (self.base.options.target.os.tag) { - .macos => macho.PLATFORM_MACOS, - .ios => if (is_simulator_abi) macho.PLATFORM_IOSSIMULATOR else macho.PLATFORM_IOS, - .watchos => if (is_simulator_abi) macho.PLATFORM_WATCHOSSIMULATOR else macho.PLATFORM_WATCHOS, - .tvos => if (is_simulator_abi) macho.PLATFORM_TVOSSIMULATOR else macho.PLATFORM_TVOS, - else => unreachable, - }, - .minos = version, - .sdk = version, - .ntools = 1, - }); - const ld_ver = macho.build_tool_version{ - .tool = macho.TOOL_LD, - .version = 0x0, - }; - cmd.data = try self.base.allocator.alloc(u8, cmdsize - @sizeOf(macho.build_version_command)); - mem.set(u8, cmd.data, 0); - mem.copy(u8, cmd.data, mem.asBytes(&ld_ver)); - try self.load_commands.append(self.base.allocator, .{ .BuildVersion = cmd }); - } - - if (self.uuid_cmd_index == null) { - self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); - var uuid_cmd: macho.uuid_command = .{ - .cmd = macho.LC_UUID, - .cmdsize = @sizeOf(macho.uuid_command), - .uuid = undefined, - }; - std.crypto.random.bytes(&uuid_cmd.uuid); - try self.load_commands.append(self.base.allocator, .{ .Uuid = uuid_cmd }); - } -} - fn addDataInCodeLC(self: *MachO) !void { if (self.data_in_code_cmd_index == null) { self.data_in_code_cmd_index = @intCast(u16, self.load_commands.items.len); @@ -4004,12 +3723,6 @@ pub fn getDeclVAddr(self: *MachO, decl: *const Module.Decl) u64 { } pub fn populateMissingMetadata(self: *MachO) !void { - switch (self.base.options.output_mode) { - .Exe => {}, - .Obj => return error.TODOImplementWritingObjFiles, - .Lib => return error.TODOImplementWritingLibFiles, - } - if (self.pagezero_segment_cmd_index == null) { self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); try self.load_commands.append(self.base.allocator, .{ @@ -4019,11 +3732,9 @@ pub fn populateMissingMetadata(self: *MachO) !void { }); self.load_commands_dirty = true; } + if (self.text_segment_cmd_index == null) { self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE; - const initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE; - const program_code_size_hint = self.base.options.program_code_size_hint; const got_size_hint = @sizeOf(u64) * self.base.options.symbol_count_hint; const ideal_size = self.header_pad + program_code_size_hint + 3 * got_size_hint; @@ -4036,12 +3747,13 @@ pub fn populateMissingMetadata(self: *MachO) !void { .vmaddr = 0x100000000, // always starts at 4GB .vmsize = needed_size, .filesize = needed_size, - .maxprot = maxprot, - .initprot = initprot, + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, + .initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, }), }); self.load_commands_dirty = true; } + if (self.text_section_index == null) { const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; self.text_section_index = @intCast(u16, text_segment.sections.items.len); @@ -4051,7 +3763,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .aarch64 => 2, else => unreachable, // unhandled architecture type }; - const flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS; const needed_size = self.base.options.program_code_size_hint; const off = text_segment.findFreeSpace(needed_size, @as(u16, 1) << alignment, self.header_pad); @@ -4062,10 +3773,15 @@ pub fn populateMissingMetadata(self: *MachO) !void { .size = @intCast(u32, needed_size), .offset = @intCast(u32, off), .@"align" = alignment, - .flags = flags, + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }); + _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, }); self.load_commands_dirty = true; } + if (self.stubs_section_index == null) { const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; self.stubs_section_index = @intCast(u16, text_segment.sections.items.len); @@ -4080,7 +3796,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .aarch64 => 3 * @sizeOf(u32), else => unreachable, // unhandled architecture type }; - const flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS; const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; const off = text_segment.findFreeSpace(needed_size, @alignOf(u64), self.header_pad); assert(off + needed_size <= text_segment.inner.fileoff + text_segment.inner.filesize); // TODO Must expand __TEXT segment. @@ -4092,11 +3807,16 @@ pub fn populateMissingMetadata(self: *MachO) !void { .size = needed_size, .offset = @intCast(u32, off), .@"align" = alignment, - .flags = flags, + .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, .reserved2 = stub_size, }); + _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stubs_section_index.?, + }); self.load_commands_dirty = true; } + if (self.stub_helper_section_index == null) { const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; self.stub_helper_section_index = @intCast(u16, text_segment.sections.items.len); @@ -4106,7 +3826,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { .aarch64 => 2, else => unreachable, // unhandled architecture type }; - const flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS; const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; const off = text_segment.findFreeSpace(needed_size, @alignOf(u64), self.header_pad); assert(off + needed_size <= text_segment.inner.fileoff + text_segment.inner.filesize); // TODO Must expand __TEXT segment. @@ -4118,16 +3837,18 @@ pub fn populateMissingMetadata(self: *MachO) !void { .size = needed_size, .offset = @intCast(u32, off), .@"align" = alignment, - .flags = flags, + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }); + _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, }); self.load_commands_dirty = true; } + if (self.data_const_segment_cmd_index == null) { self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE; - const initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE; const address_and_offset = self.nextSegmentAddressAndOffset(); - const ideal_size = @sizeOf(u64) * self.base.options.symbol_count_hint; const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); @@ -4139,17 +3860,17 @@ pub fn populateMissingMetadata(self: *MachO) !void { .vmsize = needed_size, .fileoff = address_and_offset.offset, .filesize = needed_size, - .maxprot = maxprot, - .initprot = initprot, + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, }), }); self.load_commands_dirty = true; } + if (self.got_section_index == null) { const dc_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; self.got_section_index = @intCast(u16, dc_segment.sections.items.len); - const flags = macho.S_NON_LAZY_SYMBOL_POINTERS; const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; const off = dc_segment.findFreeSpace(needed_size, @alignOf(u64), null); assert(off + needed_size <= dc_segment.inner.fileoff + dc_segment.inner.filesize); // TODO Must expand __DATA_CONST segment. @@ -4161,16 +3882,18 @@ pub fn populateMissingMetadata(self: *MachO) !void { .size = needed_size, .offset = @intCast(u32, off), .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = flags, + .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, + }); + _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, }); self.load_commands_dirty = true; } + if (self.data_segment_cmd_index == null) { self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE; - const initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE; const address_and_offset = self.nextSegmentAddressAndOffset(); - const ideal_size = 2 * @sizeOf(u64) * self.base.options.symbol_count_hint; const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); @@ -4182,17 +3905,17 @@ pub fn populateMissingMetadata(self: *MachO) !void { .vmsize = needed_size, .fileoff = address_and_offset.offset, .filesize = needed_size, - .maxprot = maxprot, - .initprot = initprot, + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, }), }); self.load_commands_dirty = true; } + if (self.la_symbol_ptr_section_index == null) { const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; self.la_symbol_ptr_section_index = @intCast(u16, data_segment.sections.items.len); - const flags = macho.S_LAZY_SYMBOL_POINTERS; const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; const off = data_segment.findFreeSpace(needed_size, @alignOf(u64), null); assert(off + needed_size <= data_segment.inner.fileoff + data_segment.inner.filesize); // TODO Must expand __DATA segment. @@ -4204,10 +3927,15 @@ pub fn populateMissingMetadata(self: *MachO) !void { .size = needed_size, .offset = @intCast(u32, off), .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = flags, + .flags = macho.S_LAZY_SYMBOL_POINTERS, + }); + _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.la_symbol_ptr_section_index.?, }); self.load_commands_dirty = true; } + if (self.data_section_index == null) { const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; self.data_section_index = @intCast(u16, data_segment.sections.items.len); @@ -4224,13 +3952,15 @@ pub fn populateMissingMetadata(self: *MachO) !void { .offset = @intCast(u32, off), .@"align" = 3, // 2^3 = @sizeOf(u64) }); + _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.data_section_index.?, + }); self.load_commands_dirty = true; } + if (self.linkedit_segment_cmd_index == null) { self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - - const maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE; - const initprot = macho.VM_PROT_READ; const address_and_offset = self.nextSegmentAddressAndOffset(); log.debug("found __LINKEDIT segment free space at 0x{x}", .{address_and_offset.offset}); @@ -4239,12 +3969,13 @@ pub fn populateMissingMetadata(self: *MachO) !void { .Segment = SegmentCommand.empty("__LINKEDIT", .{ .vmaddr = address_and_offset.address, .fileoff = address_and_offset.offset, - .maxprot = maxprot, - .initprot = initprot, + .maxprot = macho.VM_PROT_READ, + .initprot = macho.VM_PROT_READ, }), }); self.load_commands_dirty = true; } + if (self.dyld_info_cmd_index == null) { self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len); @@ -4291,6 +4022,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { self.load_commands_dirty = true; } + if (self.symtab_cmd_index == null) { self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); @@ -4323,6 +4055,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { self.load_commands_dirty = true; self.strtab_dirty = true; } + if (self.dysymtab_cmd_index == null) { self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); @@ -4358,6 +4091,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { }); self.load_commands_dirty = true; } + if (self.dylinker_cmd_index == null) { self.dylinker_cmd_index = @intCast(u16, self.load_commands.items.len); const cmdsize = @intCast(u32, mem.alignForwardGeneric( @@ -4376,17 +4110,8 @@ pub fn populateMissingMetadata(self: *MachO) !void { try self.load_commands.append(self.base.allocator, .{ .Dylinker = dylinker_cmd }); self.load_commands_dirty = true; } - if (self.libsystem_cmd_index == null) { - self.libsystem_cmd_index = @intCast(u16, self.load_commands.items.len); - var dylib_cmd = try commands.createLoadDylibCommand(self.base.allocator, mem.spanZ(LIB_SYSTEM_PATH), 2, 0, 0); - errdefer dylib_cmd.deinit(self.base.allocator); - - try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); - - self.load_commands_dirty = true; - } - if (self.main_cmd_index == null) { + if (self.main_cmd_index == null and self.base.options.output_mode == .Exe) { self.main_cmd_index = @intCast(u16, self.load_commands.items.len); try self.load_commands.append(self.base.allocator, .{ .Main = .{ @@ -4398,6 +4123,38 @@ pub fn populateMissingMetadata(self: *MachO) !void { }); self.load_commands_dirty = true; } + + if (self.dylib_id_cmd_index == null and self.base.options.output_mode == .Lib) { + self.dylib_id_cmd_index = @intCast(u16, self.load_commands.items.len); + const install_name = try std.fmt.allocPrint(self.base.allocator, "@rpath/{s}", .{ + self.base.options.emit.?.sub_path, + }); + defer self.base.allocator.free(install_name); + var dylib_cmd = try commands.createLoadDylibCommand( + self.base.allocator, + install_name, + 2, + 0x10000, // TODO forward user-provided versions + 0x10000, + ); + errdefer dylib_cmd.deinit(self.base.allocator); + dylib_cmd.inner.cmd = macho.LC_ID_DYLIB; + try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); + self.load_commands_dirty = true; + } + + if (self.source_version_cmd_index == null) { + self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .SourceVersion = .{ + .cmd = macho.LC_SOURCE_VERSION, + .cmdsize = @sizeOf(macho.source_version_command), + .version = 0x0, + }, + }); + self.load_commands_dirty = true; + } + if (self.build_version_cmd_index == null) { self.build_version_cmd_index = @intCast(u16, self.load_commands.items.len); const cmdsize = @intCast(u32, mem.alignForwardGeneric( @@ -4430,18 +4187,9 @@ pub fn populateMissingMetadata(self: *MachO) !void { mem.set(u8, cmd.data, 0); mem.copy(u8, cmd.data, mem.asBytes(&ld_ver)); try self.load_commands.append(self.base.allocator, .{ .BuildVersion = cmd }); - } - if (self.source_version_cmd_index == null) { - self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .SourceVersion = .{ - .cmd = macho.LC_SOURCE_VERSION, - .cmdsize = @sizeOf(macho.source_version_command), - .version = 0x0, - }, - }); self.load_commands_dirty = true; } + if (self.uuid_cmd_index == null) { self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); var uuid_cmd: macho.uuid_command = .{ @@ -4453,47 +4201,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { try self.load_commands.append(self.base.allocator, .{ .Uuid = uuid_cmd }); self.load_commands_dirty = true; } - if (self.code_signature_cmd_index == null and self.requires_adhoc_codesig) { - self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .LinkeditData = .{ - .cmd = macho.LC_CODE_SIGNATURE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - self.load_commands_dirty = true; - } - if (!self.strtab_dir.containsAdapted(@as([]const u8, "dyld_stub_binder"), StringSliceAdapter{ - .strtab = &self.strtab, - })) { - const import_sym_index = @intCast(u32, self.undefs.items.len); - const n_strx = try self.makeString("dyld_stub_binder"); - try self.undefs.append(self.base.allocator, .{ - .n_strx = n_strx, - .n_type = macho.N_UNDF | macho.N_EXT, - .n_sect = 0, - .n_desc = @intCast(u8, 1) * macho.N_SYMBOL_RESOLVER, - .n_value = 0, - }); - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .undef, - .where_index = import_sym_index, - }); - const got_key = GotIndirectionKey{ - .where = .undef, - .where_index = import_sym_index, - }; - const got_index = @intCast(u32, self.got_entries.items.len); - try self.got_entries.append(self.base.allocator, got_key); - try self.got_entries_map.putNoClobber(self.base.allocator, got_key, got_index); - try self.writeGotEntry(got_index); - self.binding_info_dirty = true; - } - if (self.stub_helper_stubs_start_off == null) { - try self.writeStubHelperPreamble(); - } } fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, alignment: u64) !u64 { From 52f9b283a77e79f64d3a3b9faf31a07fe350fd8a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 16 Aug 2021 19:58:02 +0200 Subject: [PATCH 04/78] macho: fix writeSymbolTable() function After merging `populateMetadata` with `populateMissingMetadata`, it is imperative we clear the number of symbols in `writeSymbolTable`. However, this is hopefully just a temp measure until the convergence of incremental with traditional is complete. --- src/link/MachO.zig | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 3c024f6867..046d5ba6fb 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -597,7 +597,6 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { .n_desc = 0, .n_value = 0, }); - try self.strtab.append(self.base.allocator, 0); } // Positional arguments to the linker such as object files and static archives. @@ -2608,7 +2607,6 @@ fn addLoadDylibLCs(self: *MachO) !void { } fn flushZld(self: *MachO) !void { - self.load_commands_dirty = true; try self.writeTextBlocks(); try self.writeStubHelperCommon(); @@ -3012,7 +3010,7 @@ fn writeSymbolTable(self: *MachO) !void { const nexports = self.globals.items.len; const nundefs = self.undefs.items.len; - const locals_off = symtab.symoff + symtab.nsyms * @sizeOf(macho.nlist_64); + const locals_off = symtab.symoff + @sizeOf(macho.nlist_64); const locals_size = nlocals * @sizeOf(macho.nlist_64); log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off }); try self.base.file.?.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); @@ -3027,7 +3025,7 @@ fn writeSymbolTable(self: *MachO) !void { log.debug("writing undefined symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.undefs.items), undefs_off); - symtab.nsyms += @intCast(u32, nlocals + nexports + nundefs); + symtab.nsyms = @intCast(u32, nlocals + nexports + nundefs); seg.inner.filesize += locals_size + exports_size + undefs_size; // Update dynamic symbol table. @@ -3826,7 +3824,11 @@ pub fn populateMissingMetadata(self: *MachO) !void { .aarch64 => 2, else => unreachable, // unhandled architecture type }; - const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const needed_size: u6 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 15, + .aarch64 => 6 * @sizeOf(u32), + else => unreachable, + }; const off = text_segment.findFreeSpace(needed_size, @alignOf(u64), self.header_pad); assert(off + needed_size <= text_segment.inner.fileoff + text_segment.inner.filesize); // TODO Must expand __TEXT segment. From c6ea181e75cc6822edbaceccb11bc3a39484d116 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 17 Aug 2021 10:52:36 +0200 Subject: [PATCH 05/78] macho: dedup setting entry point logic --- src/link/MachO.zig | 57 ++++++++++++++++++---------------------------- 1 file changed, 22 insertions(+), 35 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 046d5ba6fb..2074dc6b5d 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -860,46 +860,32 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { pub fn flushModule(self: *MachO, comp: *Compilation) !void { _ = comp; + const tracy = trace(@src()); defer tracy.end(); - const output_mode = self.base.options.output_mode; + try self.setEntryPoint(); + try self.writeRebaseInfoTable(); + try self.writeBindInfoTable(); + try self.writeLazyBindInfoTable(); + try self.writeExportInfo(); + try self.writeAllGlobalAndUndefSymbols(); + try self.writeIndirectSymbolTable(); + try self.writeStringTable(); + try self.updateLinkeditSegmentSizes(); - switch (output_mode) { - .Exe => { - if (self.entry_addr) |addr| { - // Update LC_MAIN with entry offset. - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const main_cmd = &self.load_commands.items[self.main_cmd_index.?].Main; - main_cmd.entryoff = addr - text_segment.inner.vmaddr; - main_cmd.stacksize = self.base.options.stack_size_override orelse 0; - self.load_commands_dirty = true; - } - try self.writeRebaseInfoTable(); - try self.writeBindInfoTable(); - try self.writeLazyBindInfoTable(); - try self.writeExportInfo(); - try self.writeAllGlobalAndUndefSymbols(); - try self.writeIndirectSymbolTable(); - try self.writeStringTable(); - try self.updateLinkeditSegmentSizes(); + if (self.d_sym) |*ds| { + // Flush debug symbols bundle. + try ds.flushModule(self.base.allocator, self.base.options); + } - if (self.d_sym) |*ds| { - // Flush debug symbols bundle. - try ds.flushModule(self.base.allocator, self.base.options); - } - - if (self.requires_adhoc_codesig) { - // Preallocate space for the code signature. - // We need to do this at this stage so that we have the load commands with proper values - // written out to the file. - // The most important here is to have the correct vm and filesize of the __LINKEDIT segment - // where the code signature goes into. - try self.writeCodeSignaturePadding(); - } - }, - .Obj => {}, - .Lib => return error.TODOImplementWritingLibFiles, + if (self.requires_adhoc_codesig) { + // Preallocate space for the code signature. + // We need to do this at this stage so that we have the load commands with proper values + // written out to the file. + // The most important here is to have the correct vm and filesize of the __LINKEDIT segment + // where the code signature goes into. + try self.writeCodeSignaturePadding(); } try self.writeLoadCommands(); @@ -2704,6 +2690,7 @@ fn setEntryPoint(self: *MachO) !void { const ec = &self.load_commands.items[self.main_cmd_index.?].Main; ec.entryoff = @intCast(u32, sym.n_value - seg.inner.vmaddr); ec.stacksize = self.base.options.stack_size_override orelse 0; + self.load_commands_dirty = true; } fn writeRebaseInfoTableZld(self: *MachO) !void { From 69f42817745a13fc3cc69896cb628a7028696a88 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 17 Aug 2021 18:33:41 +0200 Subject: [PATCH 06/78] macho: memorize if dyld_stub_binder was already resolved --- src/link/MachO.zig | 112 +++++++++++++++++++++++++-------------------- 1 file changed, 62 insertions(+), 50 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 2074dc6b5d..73b7b93d11 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -145,6 +145,8 @@ symbol_resolver: std.AutoHashMapUnmanaged(u32, SymbolWithLoc) = .{}, locals_free_list: std.ArrayListUnmanaged(u32) = .{}, globals_free_list: std.ArrayListUnmanaged(u32) = .{}, +dyld_stub_binder_index: ?u32 = null, + stub_helper_stubs_start_off: ?u64 = null, strtab: std.ArrayListUnmanaged(u8) = .{}, @@ -386,10 +388,8 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio try self.populateMissingMetadata(); try self.writeLocalSymbol(0); - if (!self.strtab_dir.containsAdapted(@as([]const u8, "dyld_stub_binder"), StringSliceAdapter{ - .strtab = &self.strtab, - })) { - const import_sym_index = @intCast(u32, self.undefs.items.len); + if (self.dyld_stub_binder_index == null) { + self.dyld_stub_binder_index = @intCast(u32, self.undefs.items.len); const n_strx = try self.makeString("dyld_stub_binder"); try self.undefs.append(self.base.allocator, .{ .n_strx = n_strx, @@ -400,11 +400,11 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio }); try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ .where = .undef, - .where_index = import_sym_index, + .where_index = self.dyld_stub_binder_index.?, }); const got_key = GotIndirectionKey{ .where = .undef, - .where_index = import_sym_index, + .where_index = self.dyld_stub_binder_index.?, }; const got_index = @intCast(u32, self.got_entries.items.len); try self.got_entries.append(self.base.allocator, got_key); @@ -799,26 +799,11 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.parseInputFiles(positionals.items, self.base.options.sysroot); try self.parseLibs(libs.items, self.base.options.sysroot); try self.resolveSymbols(); + try self.resolveDyldStubBinder(); try self.parseTextBlocks(); try self.addLoadDylibLCs(); try self.addDataInCodeLC(); try self.addCodeSignatureLC(); - - { - // Add dyld_stub_binder as the final GOT entry. - const n_strx = self.strtab_dir.getAdapted(@as([]const u8, "dyld_stub_binder"), StringSliceAdapter{ - .strtab = &self.strtab, - }) orelse unreachable; - const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; - const got_index = @intCast(u32, self.got_entries.items.len); - const got_entry = GotIndirectionKey{ - .where = .undef, - .where_index = resolv.where_index, - }; - try self.got_entries.append(self.base.allocator, got_entry); - try self.got_entries_map.putNoClobber(self.base.allocator, got_entry, got_index); - } - try self.sortSections(); try self.allocateTextSegment(); try self.allocateDataConstSegment(); @@ -1982,13 +1967,9 @@ fn writeStubHelperCommon(self: *MachO) !void { code[9] = 0xff; code[10] = 0x25; { - const n_strx = self.strtab_dir.getAdapted(@as([]const u8, "dyld_stub_binder"), StringSliceAdapter{ - .strtab = &self.strtab, - }) orelse unreachable; - const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; const got_index = self.got_entries_map.get(.{ .where = .undef, - .where_index = resolv.where_index, + .where_index = self.dyld_stub_binder_index.?, }) orelse unreachable; const addr = got.addr + got_index * @sizeOf(u64); const displacement = try math.cast(u32, addr - stub_helper.addr - code_size); @@ -2033,13 +2014,9 @@ fn writeStubHelperCommon(self: *MachO) !void { code[10] = 0xbf; code[11] = 0xa9; binder_blk_outer: { - const n_strx = self.strtab_dir.getAdapted(@as([]const u8, "dyld_stub_binder"), StringSliceAdapter{ - .strtab = &self.strtab, - }) orelse unreachable; - const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; const got_index = self.got_entries_map.get(.{ .where = .undef, - .where_index = resolv.where_index, + .where_index = self.dyld_stub_binder_index.?, }) orelse unreachable; const this_addr = stub_helper.addr + 3 * @sizeOf(u32); const target_addr = got.addr + got_index * @sizeOf(u64); @@ -2405,24 +2382,6 @@ fn resolveSymbols(self: *MachO) !void { } // Third pass, resolve symbols in dynamic libraries. - { - // Put dyld_stub_binder as an undefined special symbol. - const n_strx = try self.makeString("dyld_stub_binder"); - const undef_sym_index = @intCast(u32, self.undefs.items.len); - try self.undefs.append(self.base.allocator, .{ - .n_strx = n_strx, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .undef, - .where_index = undef_sym_index, - }); - _ = try unresolved.getOrPut(undef_sym_index); - } - next_sym = 0; loop: while (next_sym < unresolved.count()) { const sym = self.undefs.items[unresolved.keys()[next_sym]]; @@ -2523,6 +2482,59 @@ fn resolveSymbols(self: *MachO) !void { return error.UndefinedSymbolReference; } +fn resolveDyldStubBinder(self: *MachO) !void { + if (self.dyld_stub_binder_index != null) return; + + const n_strx = try self.makeString("dyld_stub_binder"); + const sym_index = @intCast(u32, self.undefs.items.len); + try self.undefs.append(self.base.allocator, .{ + .n_strx = n_strx, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ + .where = .undef, + .where_index = sym_index, + }); + const sym = &self.undefs.items[sym_index]; + const sym_name = self.getString(n_strx); + + for (self.dylibs.items) |dylib, id| { + if (!dylib.symbols.contains(sym_name)) continue; + + const dylib_id = @intCast(u16, id); + if (!self.referenced_dylibs.contains(dylib_id)) { + try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); + } + + const ordinal = self.referenced_dylibs.getIndex(dylib_id) orelse unreachable; + sym.n_type |= macho.N_EXT; + sym.n_desc = @intCast(u16, ordinal + 1) * macho.N_SYMBOL_RESOLVER; + self.dyld_stub_binder_index = sym_index; + + break; + } + + if (self.dyld_stub_binder_index == null) { + log.err("undefined reference to symbol '{s}'", .{sym_name}); + return error.UndefinedSymbolReference; + } + + // Add dyld_stub_binder as the final GOT entry. + const got_index = @intCast(u32, self.got_entries.items.len); + const got_entry = GotIndirectionKey{ + .where = .undef, + .where_index = self.dyld_stub_binder_index.?, + }; + try self.got_entries.append(self.base.allocator, got_entry); + try self.got_entries_map.putNoClobber(self.base.allocator, got_entry, got_index); + + self.binding_info_dirty = true; + self.got_entries_count_dirty = true; +} + fn parseTextBlocks(self: *MachO) !void { for (self.objects.items) |*object, object_id| { try object.parseTextBlocks(self.base.allocator, @intCast(u16, object_id), self); From 8b795fe2ac17466d0bc9de7b494f0cf076d7c46d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 17 Aug 2021 19:49:17 +0200 Subject: [PATCH 07/78] macho: parse input files and libs in incremental This converges parsing of linker line in incremental; however, still doesn't save the parsing state between updates. --- src/link/MachO.zig | 39 ++++++++------------------------------- 1 file changed, 8 insertions(+), 31 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 73b7b93d11..ece930bb5c 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -93,9 +93,6 @@ source_version_cmd_index: ?u16 = null, build_version_cmd_index: ?u16 = null, uuid_cmd_index: ?u16 = null, code_signature_cmd_index: ?u16 = null, -/// Path to libSystem -/// TODO this is obsolete, remove it. -libsystem_cmd_index: ?u16 = null, // __TEXT segment sections text_section_index: ?u16 = null, @@ -281,15 +278,6 @@ const ideal_factor = 2; /// instead but this will do for now. const DEFAULT_DYLD_PATH: [*:0]const u8 = "/usr/lib/dyld"; -/// Default lib search path -/// TODO instead of hardcoding it, we should probably look through some env vars and search paths -/// instead but this will do for now. -const DEFAULT_LIB_SEARCH_PATH: []const u8 = "/usr/lib"; - -const LIB_SYSTEM_NAME: [*:0]const u8 = "System"; -/// TODO we should search for libSystem and fail if it doesn't exist, instead of hardcoding it -const LIB_SYSTEM_PATH: [*:0]const u8 = DEFAULT_LIB_SEARCH_PATH ++ "/libSystem.B.dylib"; - /// In order for a slice of bytes to be considered eligible to keep metadata pointing at /// it as a possible place to put new symbols, it must have enough room for this many bytes /// (plus extra for reserved capacity). @@ -793,17 +781,17 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { Compilation.dump_argv(argv.items); } + try self.parseInputFiles(positionals.items, self.base.options.sysroot); + try self.parseLibs(libs.items, self.base.options.sysroot); + try self.resolveSymbols(); + try self.resolveDyldStubBinder(); + try self.parseTextBlocks(); try self.addRpathLCs(rpath_table.keys()); + try self.addLoadDylibLCs(); + try self.addDataInCodeLC(); + try self.addCodeSignatureLC(); if (use_stage1) { - try self.parseInputFiles(positionals.items, self.base.options.sysroot); - try self.parseLibs(libs.items, self.base.options.sysroot); - try self.resolveSymbols(); - try self.resolveDyldStubBinder(); - try self.parseTextBlocks(); - try self.addLoadDylibLCs(); - try self.addDataInCodeLC(); - try self.addCodeSignatureLC(); try self.sortSections(); try self.allocateTextSegment(); try self.allocateDataConstSegment(); @@ -812,17 +800,6 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.allocateTextBlocks(); try self.flushZld(); } else { - // TODO this is just a temp; libsystem load command will be autoresolved when parsing libSystem from - // the linker line and actually referencing symbols. - if (self.libsystem_cmd_index == null) { - self.libsystem_cmd_index = @intCast(u16, self.load_commands.items.len); - var dylib_cmd = try commands.createLoadDylibCommand(self.base.allocator, mem.spanZ(LIB_SYSTEM_PATH), 2, 0, 0); - errdefer dylib_cmd.deinit(self.base.allocator); - try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); - self.load_commands_dirty = true; - } - try self.addDataInCodeLC(); - try self.addCodeSignatureLC(); try self.flushModule(comp); } } From 790633a2a09164db5985d1c8302a60d3809e5002 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 17 Aug 2021 22:10:07 +0200 Subject: [PATCH 08/78] macho: use common codepath for resolving dyld_stub_binder --- src/link/MachO.zig | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index ece930bb5c..4fd36d39e4 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -376,30 +376,6 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio try self.populateMissingMetadata(); try self.writeLocalSymbol(0); - if (self.dyld_stub_binder_index == null) { - self.dyld_stub_binder_index = @intCast(u32, self.undefs.items.len); - const n_strx = try self.makeString("dyld_stub_binder"); - try self.undefs.append(self.base.allocator, .{ - .n_strx = n_strx, - .n_type = macho.N_UNDF | macho.N_EXT, - .n_sect = 0, - .n_desc = @intCast(u8, 1) * macho.N_SYMBOL_RESOLVER, - .n_value = 0, - }); - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .undef, - .where_index = self.dyld_stub_binder_index.?, - }); - const got_key = GotIndirectionKey{ - .where = .undef, - .where_index = self.dyld_stub_binder_index.?, - }; - const got_index = @intCast(u32, self.got_entries.items.len); - try self.got_entries.append(self.base.allocator, got_key); - try self.got_entries_map.putNoClobber(self.base.allocator, got_key, got_index); - try self.writeGotEntry(got_index); - self.binding_info_dirty = true; - } if (self.stub_helper_stubs_start_off == null) { try self.writeStubHelperPreamble(); } @@ -1089,6 +1065,7 @@ fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const break :full_path try self.base.allocator.dupe(u8, path); }; defer self.base.allocator.free(full_path); + log.debug("parsing input file path '{s}'", .{full_path}); if (try self.parseObject(full_path)) continue; if (try self.parseArchive(full_path)) continue; @@ -1102,6 +1079,7 @@ fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const fn parseLibs(self: *MachO, libs: []const []const u8, syslibroot: ?[]const u8) !void { for (libs) |lib| { + log.debug("parsing lib path '{s}'", .{lib}); if (try self.parseDylib(lib, .{ .syslibroot = syslibroot, })) continue; @@ -2510,6 +2488,11 @@ fn resolveDyldStubBinder(self: *MachO) !void { self.binding_info_dirty = true; self.got_entries_count_dirty = true; + + if (!(build_options.is_stage1 and self.base.options.use_stage1)) { + // TODO remove once we can incrementally update in stage1 too. + try self.writeGotEntry(got_index); + } } fn parseTextBlocks(self: *MachO) !void { From 8167456c58270fe9586ac93dbd6a6ee1a8ae7915 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 18 Aug 2021 00:17:03 +0200 Subject: [PATCH 09/78] macho: resolve undefs in incremental properly Instead of assuming that every undef extern symbol comes from libSystem, actually perform the check! --- src/link/MachO.zig | 101 ++++++++++++++++++++++++++++++--------------- 1 file changed, 68 insertions(+), 33 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 4fd36d39e4..d5b2163b75 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -174,13 +174,7 @@ has_stabs: bool = false, section_ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}, -pending_updates: std.ArrayListUnmanaged(struct { - kind: enum { - got, - stub, - }, - index: u32, -}) = .{}, +pending_updates: std.ArrayListUnmanaged(PendingUpdate) = .{}, /// A list of text blocks that have surplus capacity. This list can have false /// positives, as functions grow and shrink over time, only sometimes being added @@ -223,6 +217,12 @@ decls: std.AutoArrayHashMapUnmanaged(*Module.Decl, void) = .{}, /// somewhere else in the codegen. active_decl: ?*Module.Decl = null, +const PendingUpdate = union(enum) { + resolve_undef: u32, + add_stub_entry: u32, + add_got_entry: u32, +}; + const StringIndexContext = struct { strtab: *std.ArrayListUnmanaged(u8), @@ -761,6 +761,56 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.parseLibs(libs.items, self.base.options.sysroot); try self.resolveSymbols(); try self.resolveDyldStubBinder(); + + // Apply pending updates + var still_pending = std.ArrayList(PendingUpdate).init(self.base.allocator); + defer still_pending.deinit(); + + for (self.pending_updates.items) |update| { + switch (update) { + .resolve_undef => |sym_index| { + const sym = &self.undefs.items[sym_index]; + const sym_name = self.getString(sym.n_strx); + const resolv = self.symbol_resolver.getPtr(sym.n_strx) orelse unreachable; + + for (self.dylibs.items) |dylib, id| { + if (!dylib.symbols.contains(sym_name)) continue; + + const dylib_id = @intCast(u16, id); + if (!self.referenced_dylibs.contains(dylib_id)) { + try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); + } + + const ordinal = self.referenced_dylibs.getIndex(dylib_id) orelse unreachable; + sym.n_type |= macho.N_EXT; + sym.n_desc = @intCast(u16, ordinal + 1) * macho.N_SYMBOL_RESOLVER; + + break; + } else { + try still_pending.append(update); + log.warn("undefined reference to symbol '{s}'", .{sym_name}); + // TODO self-reference for incremental means resolv.file == 0! + if (self.objects.items.len > 0) { + log.warn(" first referenced in '{s}'", .{self.objects.items[resolv.file].name}); + } + } + }, + .add_got_entry => return error.TODOAddGotEntryUpdate, + .add_stub_entry => |stub_index| { + try self.writeStub(stub_index); + try self.writeStubInStubHelper(stub_index); + try self.writeLazySymbolPointer(stub_index); + self.rebase_info_dirty = true; + self.lazy_binding_info_dirty = true; + }, + } + } + + self.pending_updates.clearRetainingCapacity(); + for (still_pending.items) |update| { + self.pending_updates.appendAssumeCapacity(update); + } + try self.parseTextBlocks(); try self.addRpathLCs(rpath_table.keys()); try self.addLoadDylibLCs(); @@ -3488,20 +3538,6 @@ fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 // so that we can reapply them when moving/growing sections? decl.link.macho.relocs.clearAndFree(self.base.allocator); - // Apply pending updates - while (self.pending_updates.popOrNull()) |update| { - switch (update.kind) { - .got => unreachable, - .stub => { - try self.writeStub(update.index); - try self.writeStubInStubHelper(update.index); - try self.writeLazySymbolPointer(update.index); - self.rebase_info_dirty = true; - self.lazy_binding_info_dirty = true; - }, - } - } - return symbol; } @@ -4281,34 +4317,33 @@ pub fn addExternFn(self: *MachO, name: []const u8) !u32 { return resolv.where_index; } - log.debug("adding new extern function '{s}' with dylib ordinal 1", .{sym_name}); - const import_sym_index = @intCast(u32, self.undefs.items.len); + log.debug("adding new extern function '{s}'", .{sym_name}); + const sym_index = @intCast(u32, self.undefs.items.len); const n_strx = try self.makeString(sym_name); try self.undefs.append(self.base.allocator, .{ .n_strx = n_strx, - .n_type = macho.N_UNDF | macho.N_EXT, + .n_type = macho.N_UNDF, .n_sect = 0, - .n_desc = @intCast(u8, 1) * macho.N_SYMBOL_RESOLVER, + .n_desc = 0, .n_value = 0, }); try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ .where = .undef, - .where_index = import_sym_index, + .where_index = sym_index, }); const stubs_index = @intCast(u32, self.stubs.items.len); - try self.stubs.append(self.base.allocator, import_sym_index); - try self.stubs_map.putNoClobber(self.base.allocator, import_sym_index, stubs_index); + try self.stubs.append(self.base.allocator, sym_index); + try self.stubs_map.putNoClobber(self.base.allocator, sym_index, stubs_index); // TODO discuss this. The caller context expects codegen.InnerError{ OutOfMemory, CodegenFail }, // which obviously doesn't include file writing op errors. So instead of trying to write the stub // entry right here and now, queue it up and dispose of when updating decl. - try self.pending_updates.append(self.base.allocator, .{ - .kind = .stub, - .index = stubs_index, - }); + try self.pending_updates.ensureUnusedCapacity(self.base.allocator, 2); + self.pending_updates.appendAssumeCapacity(.{ .resolve_undef = sym_index }); + self.pending_updates.appendAssumeCapacity(.{ .add_stub_entry = stubs_index }); - return import_sym_index; + return sym_index; } const NextSegmentAddressAndOffset = struct { From 30247fbb6a46231154f051119228829c6a0dfd90 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 18 Aug 2021 11:46:45 +0200 Subject: [PATCH 10/78] macho: remove redundant writeStubHelperCommon codepath --- src/link/MachO.zig | 156 +++++---------------------------------------- 1 file changed, 15 insertions(+), 141 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index d5b2163b75..defede3d0f 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -376,10 +376,6 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio try self.populateMissingMetadata(); try self.writeLocalSymbol(0); - if (self.stub_helper_stubs_start_off == null) { - try self.writeStubHelperPreamble(); - } - if (self.d_sym) |*ds| { try ds.populateMissingMetadata(allocator); try ds.writeLocalSymbol(0); @@ -762,6 +758,11 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.resolveSymbols(); try self.resolveDyldStubBinder(); + if (!use_stage1) { + // TODO this should be made common when I figure out how to prealloc space for traditional linker path. + try self.writeStubHelperPreamble(); + } + // Apply pending updates var still_pending = std.ArrayList(PendingUpdate).init(self.base.allocator); defer still_pending.deinit(); @@ -824,6 +825,7 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.allocateDataSegment(); self.allocateLinkeditSegment(); try self.allocateTextBlocks(); + try self.writeStubHelperPreamble(); try self.flushZld(); } else { try self.flushModule(comp); @@ -1943,7 +1945,7 @@ fn writeTextBlocks(self: *MachO) !void { } } -fn writeStubHelperCommon(self: *MachO) !void { +fn writeStubHelperPreamble(self: *MachO) !void { const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?]; const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; @@ -2074,14 +2076,6 @@ fn writeStubHelperCommon(self: *MachO) !void { else => unreachable, } }; - - for (self.stubs.items) |_, i| { - const index = @intCast(u32, i); - // TODO weak bound pointers - try self.writeLazySymbolPointer(index); - try self.writeStub(index); - try self.writeStubInStubHelper(index); - } } fn resolveSymbolsInObject( @@ -2616,7 +2610,14 @@ fn addLoadDylibLCs(self: *MachO) !void { fn flushZld(self: *MachO) !void { try self.writeTextBlocks(); - try self.writeStubHelperCommon(); + + for (self.stubs.items) |_, i| { + const index = @intCast(u32, i); + // TODO weak bound pointers + try self.writeLazySymbolPointer(index); + try self.writeStub(index); + try self.writeStubInStubHelper(index); + } if (self.common_section_index) |index| { const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; @@ -4540,133 +4541,6 @@ fn writeLazySymbolPointer(self: *MachO, index: u32) !void { try self.base.file.?.pwriteAll(&buf, off); } -fn writeStubHelperPreamble(self: *MachO) !void { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?]; - const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const got = &data_const_segment.sections.items[self.got_section_index.?]; - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const data = &data_segment.sections.items[self.data_section_index.?]; - - switch (self.base.options.target.cpu.arch) { - .x86_64 => { - const code_size = 15; - var code: [code_size]u8 = undefined; - // lea %r11, [rip + disp] - code[0] = 0x4c; - code[1] = 0x8d; - code[2] = 0x1d; - { - const target_addr = data.addr; - const displacement = try math.cast(u32, target_addr - stub_helper.addr - 7); - mem.writeIntLittle(u32, code[3..7], displacement); - } - // push %r11 - code[7] = 0x41; - code[8] = 0x53; - // jmp [rip + disp] - code[9] = 0xff; - code[10] = 0x25; - { - const displacement = try math.cast(u32, got.addr - stub_helper.addr - code_size); - mem.writeIntLittle(u32, code[11..], displacement); - } - try self.base.file.?.pwriteAll(&code, stub_helper.offset); - self.stub_helper_stubs_start_off = stub_helper.offset + code_size; - }, - .aarch64 => { - var code: [6 * @sizeOf(u32)]u8 = undefined; - - data_blk_outer: { - const this_addr = stub_helper.addr; - const target_addr = data.addr; - data_blk: { - const displacement = math.cast(i21, target_addr - this_addr) catch break :data_blk; - // adr x17, disp - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32()); - // nop - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); - break :data_blk_outer; - } - data_blk: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.cast(i21, target_addr - new_this_addr) catch break :data_blk; - // nop - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); - // adr x17, disp - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.adr(.x17, displacement).toU32()); - break :data_blk_outer; - } - // Jump is too big, replace adr with adrp and add. - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - // adrp x17, pages - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x17, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.add(.x17, .x17, narrowed, false).toU32()); - } - - // stp x16, x17, [sp, #-16]! - mem.writeIntLittle(u32, code[8..12], aarch64.Instruction.stp( - .x16, - .x17, - aarch64.Register.sp, - aarch64.Instruction.LoadStorePairOffset.pre_index(-16), - ).toU32()); - - binder_blk_outer: { - const this_addr = stub_helper.addr + 3 * @sizeOf(u32); - const target_addr = got.addr; - binder_blk: { - const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :binder_blk; - const literal = math.cast(u18, displacement) catch break :binder_blk; - // ldr x16, label - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - // nop - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.nop().toU32()); - break :binder_blk_outer; - } - binder_blk: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :binder_blk; - const literal = math.cast(u18, displacement) catch break :binder_blk; - // nop - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.nop().toU32()); - // ldr x16, label - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - break :binder_blk_outer; - } - // Jump is too big, replace ldr with adrp and ldr(register). - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - // adrp x16, pages - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.adrp(.x16, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - const offset = try math.divExact(u12, narrowed, 8); - // ldr x16, x16, offset - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ - .register = .{ - .rn = .x16, - .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), - }, - }).toU32()); - } - - // br x16 - mem.writeIntLittle(u32, code[20..24], aarch64.Instruction.br(.x16).toU32()); - try self.base.file.?.pwriteAll(&code, stub_helper.offset); - self.stub_helper_stubs_start_off = stub_helper.offset + code.len; - }, - else => unreachable, - } -} - fn writeStub(self: *MachO, index: u32) !void { const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; const stubs = text_segment.sections.items[self.stubs_section_index.?]; From 153e2317748cff7d59d2709f72fe8b22c14f2a7b Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 20 Aug 2021 09:12:26 +0200 Subject: [PATCH 11/78] macho: track unresolved externs globally this way we share state between incremental and traditional paths. --- src/link/MachO.zig | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index defede3d0f..ab8779edb0 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -138,6 +138,7 @@ locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, globals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, undefs: std.ArrayListUnmanaged(macho.nlist_64) = .{}, symbol_resolver: std.AutoHashMapUnmanaged(u32, SymbolWithLoc) = .{}, +unresolved: std.AutoArrayHashMapUnmanaged(u32, void) = .{}, locals_free_list: std.ArrayListUnmanaged(u32) = .{}, globals_free_list: std.ArrayListUnmanaged(u32) = .{}, @@ -2082,7 +2083,6 @@ fn resolveSymbolsInObject( self: *MachO, object_id: u16, tentatives: *std.AutoArrayHashMap(u32, void), - unresolved: *std.AutoArrayHashMap(u32, void), ) !void { const object = &self.objects.items[object_id]; @@ -2181,7 +2181,7 @@ fn resolveSymbolsInObject( .n_desc = 0, .n_value = 0, }; - _ = unresolved.fetchSwapRemove(resolv.where_index); + _ = self.unresolved.fetchSwapRemove(resolv.where_index); }, } @@ -2252,7 +2252,7 @@ fn resolveSymbolsInObject( .n_desc = 0, .n_value = 0, }; - _ = unresolved.fetchSwapRemove(resolv.where_index); + _ = self.unresolved.fetchSwapRemove(resolv.where_index); }, } } else { @@ -2272,7 +2272,7 @@ fn resolveSymbolsInObject( .where_index = undef_sym_index, .file = object_id, }); - _ = try unresolved.getOrPut(undef_sym_index); + _ = try self.unresolved.getOrPut(self.base.allocator, undef_sym_index); } } } @@ -2281,18 +2281,15 @@ fn resolveSymbols(self: *MachO) !void { var tentatives = std.AutoArrayHashMap(u32, void).init(self.base.allocator); defer tentatives.deinit(); - var unresolved = std.AutoArrayHashMap(u32, void).init(self.base.allocator); - defer unresolved.deinit(); - // First pass, resolve symbols in provided objects. for (self.objects.items) |_, object_id| { - try self.resolveSymbolsInObject(@intCast(u16, object_id), &tentatives, &unresolved); + try self.resolveSymbolsInObject(@intCast(u16, object_id), &tentatives); } // Second pass, resolve symbols in static libraries. var next_sym: usize = 0; - loop: while (next_sym < unresolved.count()) { - const sym = self.undefs.items[unresolved.keys()[next_sym]]; + loop: while (next_sym < self.unresolved.count()) { + const sym = self.undefs.items[self.unresolved.keys()[next_sym]]; const sym_name = self.getString(sym.n_strx); for (self.archives.items) |archive| { @@ -2306,7 +2303,7 @@ fn resolveSymbols(self: *MachO) !void { const object_id = @intCast(u16, self.objects.items.len); const object = try self.objects.addOne(self.base.allocator); object.* = try archive.parseObject(self.base.allocator, self.base.options.target, offsets.items[0]); - try self.resolveSymbolsInObject(object_id, &tentatives, &unresolved); + try self.resolveSymbolsInObject(object_id, &tentatives); continue :loop; } @@ -2382,8 +2379,8 @@ fn resolveSymbols(self: *MachO) !void { // Third pass, resolve symbols in dynamic libraries. next_sym = 0; - loop: while (next_sym < unresolved.count()) { - const sym = self.undefs.items[unresolved.keys()[next_sym]]; + loop: while (next_sym < self.unresolved.count()) { + const sym = self.undefs.items[self.unresolved.keys()[next_sym]]; const sym_name = self.getString(sym.n_strx); for (self.dylibs.items) |dylib, id| { @@ -2400,7 +2397,7 @@ fn resolveSymbols(self: *MachO) !void { undef.n_type |= macho.N_EXT; undef.n_desc = @intCast(u16, ordinal + 1) * macho.N_SYMBOL_RESOLVER; - _ = unresolved.fetchSwapRemove(resolv.where_index); + _ = self.unresolved.fetchSwapRemove(resolv.where_index); continue :loop; } @@ -2434,7 +2431,7 @@ fn resolveSymbols(self: *MachO) !void { nlist.n_desc = macho.N_WEAK_DEF; try self.globals.append(self.base.allocator, nlist); - _ = unresolved.fetchSwapRemove(resolv.where_index); + _ = self.unresolved.fetchSwapRemove(resolv.where_index); undef.* = .{ .n_strx = 0, @@ -2468,7 +2465,7 @@ fn resolveSymbols(self: *MachO) !void { } } - for (unresolved.keys()) |index| { + for (self.unresolved.keys()) |index| { const sym = self.undefs.items[index]; const sym_name = self.getString(sym.n_strx); const resolv = self.symbol_resolver.get(sym.n_strx) orelse unreachable; @@ -2477,7 +2474,7 @@ fn resolveSymbols(self: *MachO) !void { log.err(" first referenced in '{s}'", .{self.objects.items[resolv.file].name}); } - if (unresolved.count() > 0) + if (self.unresolved.count() > 0) return error.UndefinedSymbolReference; } @@ -3122,6 +3119,7 @@ pub fn deinit(self: *MachO) void { self.locals.deinit(self.base.allocator); self.locals_free_list.deinit(self.base.allocator); self.symbol_resolver.deinit(self.base.allocator); + self.unresolved.deinit(self.base.allocator); for (self.objects.items) |*object| { object.deinit(self.base.allocator); @@ -4332,6 +4330,7 @@ pub fn addExternFn(self: *MachO, name: []const u8) !u32 { .where = .undef, .where_index = sym_index, }); + _ = try self.unresolved.getOrPut(self.base.allocator, sym_index); const stubs_index = @intCast(u32, self.stubs.items.len); try self.stubs.append(self.base.allocator, sym_index); @@ -4340,8 +4339,7 @@ pub fn addExternFn(self: *MachO, name: []const u8) !u32 { // TODO discuss this. The caller context expects codegen.InnerError{ OutOfMemory, CodegenFail }, // which obviously doesn't include file writing op errors. So instead of trying to write the stub // entry right here and now, queue it up and dispose of when updating decl. - try self.pending_updates.ensureUnusedCapacity(self.base.allocator, 2); - self.pending_updates.appendAssumeCapacity(.{ .resolve_undef = sym_index }); + try self.pending_updates.ensureUnusedCapacity(self.base.allocator, 1); self.pending_updates.appendAssumeCapacity(.{ .add_stub_entry = stubs_index }); return sym_index; From 792fd9c4a362f44f900007562504bfaceffbcc82 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 20 Aug 2021 09:49:21 +0200 Subject: [PATCH 12/78] macho: extract logic for creating and tracking atoms into fn --- src/link/MachO.zig | 244 ++++++++++++++++++++++++++++++++------------- 1 file changed, 172 insertions(+), 72 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index ab8779edb0..f17ecb5a09 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -770,34 +770,6 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { for (self.pending_updates.items) |update| { switch (update) { - .resolve_undef => |sym_index| { - const sym = &self.undefs.items[sym_index]; - const sym_name = self.getString(sym.n_strx); - const resolv = self.symbol_resolver.getPtr(sym.n_strx) orelse unreachable; - - for (self.dylibs.items) |dylib, id| { - if (!dylib.symbols.contains(sym_name)) continue; - - const dylib_id = @intCast(u16, id); - if (!self.referenced_dylibs.contains(dylib_id)) { - try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); - } - - const ordinal = self.referenced_dylibs.getIndex(dylib_id) orelse unreachable; - sym.n_type |= macho.N_EXT; - sym.n_desc = @intCast(u16, ordinal + 1) * macho.N_SYMBOL_RESOLVER; - - break; - } else { - try still_pending.append(update); - log.warn("undefined reference to symbol '{s}'", .{sym_name}); - // TODO self-reference for incremental means resolv.file == 0! - if (self.objects.items.len > 0) { - log.warn(" first referenced in '{s}'", .{self.objects.items[resolv.file].name}); - } - } - }, - .add_got_entry => return error.TODOAddGotEntryUpdate, .add_stub_entry => |stub_index| { try self.writeStub(stub_index); try self.writeStubInStubHelper(stub_index); @@ -805,6 +777,7 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { self.rebase_info_dirty = true; self.lazy_binding_info_dirty = true; }, + else => unreachable, } } @@ -856,6 +829,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { defer tracy.end(); try self.setEntryPoint(); + try self.writeTextBlocks(); try self.writeRebaseInfoTable(); try self.writeBindInfoTable(); try self.writeLazyBindInfoTable(); @@ -1946,6 +1920,174 @@ fn writeTextBlocks(self: *MachO) !void { } } +fn createEmptyAtom( + self: *MachO, + match: MatchingSection, + local_sym_index: u32, + size: u64, + alignment: u32, +) !*TextBlock { + const code = try self.base.allocator.alloc(u8, size); + defer self.base.allocator.free(code); + mem.set(u8, code, 0); + + const block = try self.base.allocator.create(TextBlock); + errdefer self.base.allocator.destroy(block); + block.* = TextBlock.empty; + block.local_sym_index = local_sym_index; + block.size = size; + block.alignment = alignment; + try block.code.appendSlice(self.base.allocator, code); + + try self.managed_blocks.append(self.base.allocator, block); + + // Update target section's metadata + // TODO should we update segment's size here too? + // How does it tie with incremental space allocs? + const tseg = &self.load_commands.items[match.seg].Segment; + const tsect = &tseg.sections.items[match.sect]; + const new_alignment = math.max(tsect.@"align", alignment); + const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); + const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + size; + tsect.size = new_size; + tsect.@"align" = new_alignment; + + if (self.blocks.getPtr(match)) |last| { + last.*.next = block; + block.prev = last.*; + last.* = block; + } else { + try self.blocks.putNoClobber(self.base.allocator, match, block); + } + + return block; +} + +// fn createStubHelperPreambleAtom(self: *MachO) !void { +// switch (self.base.options.target.cpu.arch) { +// .x86_64 => { +// const code_size = 15; +// var code = try self.base.allocator.alloc(u8, code_size); +// errdefer self.base.allocator.free(code); +// // lea %r11, [rip + disp] +// code[0] = 0x4c; +// code[1] = 0x8d; +// code[2] = 0x1d; +// { +// const target_addr = data.addr + data.size - @sizeOf(u64); +// const displacement = try math.cast(u32, target_addr - stub_helper.addr - 7); +// mem.writeIntLittle(u32, code[3..7], displacement); +// } +// // push %r11 +// code[7] = 0x41; +// code[8] = 0x53; +// // jmp [rip + disp] +// code[9] = 0xff; +// code[10] = 0x25; +// { +// const got_index = self.got_entries_map.get(.{ +// .where = .undef, +// .where_index = self.dyld_stub_binder_index.?, +// }) orelse unreachable; +// const addr = got.addr + got_index * @sizeOf(u64); +// const displacement = try math.cast(u32, addr - stub_helper.addr - code_size); +// mem.writeIntLittle(u32, code[11..], displacement); +// } +// try self.base.file.?.pwriteAll(&code, stub_helper.offset); +// break :blk stub_helper.offset + code_size; +// }, +// .aarch64 => { +// var code: [6 * @sizeOf(u32)]u8 = undefined; +// data_blk_outer: { +// const this_addr = stub_helper.addr; +// const target_addr = data.addr + data.size - @sizeOf(u64); +// data_blk: { +// const displacement = math.cast(i21, target_addr - this_addr) catch break :data_blk; +// // adr x17, disp +// mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32()); +// // nop +// mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); +// break :data_blk_outer; +// } +// data_blk: { +// const new_this_addr = this_addr + @sizeOf(u32); +// const displacement = math.cast(i21, target_addr - new_this_addr) catch break :data_blk; +// // nop +// mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); +// // adr x17, disp +// mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.adr(.x17, displacement).toU32()); +// break :data_blk_outer; +// } +// // Jump is too big, replace adr with adrp and add. +// const this_page = @intCast(i32, this_addr >> 12); +// const target_page = @intCast(i32, target_addr >> 12); +// const pages = @intCast(i21, target_page - this_page); +// mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x17, pages).toU32()); +// const narrowed = @truncate(u12, target_addr); +// mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.add(.x17, .x17, narrowed, false).toU32()); +// } +// // stp x16, x17, [sp, #-16]! +// code[8] = 0xf0; +// code[9] = 0x47; +// code[10] = 0xbf; +// code[11] = 0xa9; +// binder_blk_outer: { +// const got_index = self.got_entries_map.get(.{ +// .where = .undef, +// .where_index = self.dyld_stub_binder_index.?, +// }) orelse unreachable; +// const this_addr = stub_helper.addr + 3 * @sizeOf(u32); +// const target_addr = got.addr + got_index * @sizeOf(u64); +// binder_blk: { +// const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :binder_blk; +// const literal = math.cast(u18, displacement) catch break :binder_blk; +// // ldr x16, label +// mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.ldr(.x16, .{ +// .literal = literal, +// }).toU32()); +// // nop +// mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.nop().toU32()); +// break :binder_blk_outer; +// } +// binder_blk: { +// const new_this_addr = this_addr + @sizeOf(u32); +// const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :binder_blk; +// const literal = math.cast(u18, displacement) catch break :binder_blk; +// // Pad with nop to please division. +// // nop +// mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.nop().toU32()); +// // ldr x16, label +// mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ +// .literal = literal, +// }).toU32()); +// break :binder_blk_outer; +// } +// // Use adrp followed by ldr(immediate). +// const this_page = @intCast(i32, this_addr >> 12); +// const target_page = @intCast(i32, target_addr >> 12); +// const pages = @intCast(i21, target_page - this_page); +// mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.adrp(.x16, pages).toU32()); +// const narrowed = @truncate(u12, target_addr); +// const offset = try math.divExact(u12, narrowed, 8); +// mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ +// .register = .{ +// .rn = .x16, +// .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), +// }, +// }).toU32()); +// } +// // br x16 +// code[20] = 0x00; +// code[21] = 0x02; +// code[22] = 0x1f; +// code[23] = 0xd6; +// try self.base.file.?.pwriteAll(&code, stub_helper.offset); +// break :blk stub_helper.offset + 6 * @sizeOf(u32); +// }, +// else => unreachable, +// } +// } + fn writeStubHelperPreamble(self: *MachO) !void { const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?]; @@ -2331,9 +2473,6 @@ fn resolveSymbols(self: *MachO) !void { _ = try self.section_ordinals.getOrPut(self.base.allocator, match); const size = sym.n_value; - const code = try self.base.allocator.alloc(u8, size); - defer self.base.allocator.free(code); - mem.set(u8, code, 0); const alignment = (sym.n_desc >> 8) & 0x0f; sym.n_value = 0; @@ -2348,33 +2487,7 @@ fn resolveSymbols(self: *MachO) !void { const resolv = self.symbol_resolver.getPtr(sym.n_strx) orelse unreachable; resolv.local_sym_index = local_sym_index; - const block = try self.base.allocator.create(TextBlock); - block.* = TextBlock.empty; - block.local_sym_index = local_sym_index; - block.size = size; - block.alignment = alignment; - try self.managed_blocks.append(self.base.allocator, block); - - try block.code.appendSlice(self.base.allocator, code); - - // Update target section's metadata - // TODO should we update segment's size here too? - // How does it tie with incremental space allocs? - const tseg = &self.load_commands.items[match.seg].Segment; - const tsect = &tseg.sections.items[match.sect]; - const new_alignment = math.max(tsect.@"align", block.alignment); - const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; - tsect.size = new_size; - tsect.@"align" = new_alignment; - - if (self.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; - } else { - try self.blocks.putNoClobber(self.base.allocator, match, block); - } + _ = try self.createEmptyAtom(match, local_sym_index, size, alignment); } // Third pass, resolve symbols in dynamic libraries. @@ -2449,20 +2562,7 @@ fn resolveSymbols(self: *MachO) !void { // We create an empty atom for this symbol. // TODO perhaps we should special-case special symbols? Create a separate // linked list of atoms? - const block = try self.base.allocator.create(TextBlock); - block.* = TextBlock.empty; - block.local_sym_index = local_sym_index; - block.size = 0; - block.alignment = 0; - try self.managed_blocks.append(self.base.allocator, block); - - if (self.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; - } else { - try self.blocks.putNoClobber(self.base.allocator, match, block); - } + _ = try self.createEmptyAtom(match, local_sym_index, 0, 0); } for (self.unresolved.keys()) |index| { From 2d10c52b3cfac9516f5548b04528bb7c31a0a6e7 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 21 Aug 2021 11:51:50 +0200 Subject: [PATCH 13/78] macho: create an explicit symbol and atom for dyld entry in __data This way, we will be able to refer to that atom dynamically via relocations rather than requiring hardcoded addressing upfront. --- src/link/MachO.zig | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index f17ecb5a09..947279a37f 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -143,6 +143,7 @@ unresolved: std.AutoArrayHashMapUnmanaged(u32, void) = .{}, locals_free_list: std.ArrayListUnmanaged(u32) = .{}, globals_free_list: std.ArrayListUnmanaged(u32) = .{}, +dyld_private_sym_index: ?u32 = null, dyld_stub_binder_index: ?u32 = null, stub_helper_stubs_start_off: ?u64 = null, @@ -758,6 +759,7 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.parseLibs(libs.items, self.base.options.sysroot); try self.resolveSymbols(); try self.resolveDyldStubBinder(); + try self.createDyldPrivateAtom(); if (!use_stage1) { // TODO this should be made common when I figure out how to prealloc space for traditional linker path. @@ -1963,7 +1965,31 @@ fn createEmptyAtom( return block; } +fn createDyldPrivateAtom(self: *MachO) !void { + if (self.dyld_private_sym_index != null) return; + + const match = MatchingSection{ + .seg = self.data_segment_cmd_index.?, + .sect = self.data_section_index.?, + }; + const local_sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(self.base.allocator, .{ + .n_strx = try self.makeString("dyld_private"), + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + _ = try self.createEmptyAtom(match, local_sym_index, @sizeOf(u64), 3); + + self.dyld_private_sym_index = local_sym_index; +} + // fn createStubHelperPreambleAtom(self: *MachO) !void { +// const match = MatchingSection{ +// .seg = self.text_segment_cmd_index.?, +// .sect = self.stub_helper_section_index.?, +// }; // switch (self.base.options.target.cpu.arch) { // .x86_64 => { // const code_size = 15; From d63d8ae1c00c240ac20836f1b54bff0093edcb61 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 21 Aug 2021 14:21:55 +0200 Subject: [PATCH 14/78] macho: create __stub_helper preamble atom with relocations to `dyld_private` and `__dyld_stub_binder` symbols making the routine properly dynamic (i.e., making it possible to call the routine before VM allocation takes place). --- src/link/MachO.zig | 305 +++++++++++++++++++++++++-------------------- 1 file changed, 171 insertions(+), 134 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 947279a37f..8bb139e624 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -145,6 +145,7 @@ globals_free_list: std.ArrayListUnmanaged(u32) = .{}, dyld_private_sym_index: ?u32 = null, dyld_stub_binder_index: ?u32 = null, +stub_preamble_sym_index: ?u32 = null, stub_helper_stubs_start_off: ?u64 = null, @@ -760,6 +761,7 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.resolveSymbols(); try self.resolveDyldStubBinder(); try self.createDyldPrivateAtom(); + try self.createStubHelperPreambleAtom(); if (!use_stage1) { // TODO this should be made common when I figure out how to prealloc space for traditional linker path. @@ -801,7 +803,12 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.allocateDataSegment(); self.allocateLinkeditSegment(); try self.allocateTextBlocks(); - try self.writeStubHelperPreamble(); + { + // TODO just a temp + const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const sect = seg.sections.items[self.stub_helper_section_index.?]; + self.stub_helper_stubs_start_off = sect.offset + 15; + } try self.flushZld(); } else { try self.flushModule(comp); @@ -1755,11 +1762,9 @@ fn allocateDataSegment(self: *MachO) !void { seg.inner.fileoff = data_const_seg.inner.fileoff + data_const_seg.inner.filesize; seg.inner.vmaddr = data_const_seg.inner.vmaddr + data_const_seg.inner.vmsize; - // Set la_symbol_ptr and data size + // Set la_symbol_ptr const la_symbol_ptr = &seg.sections.items[self.la_symbol_ptr_section_index.?]; - const data = &seg.sections.items[self.data_section_index.?]; la_symbol_ptr.size += nstubs * @sizeOf(u64); - data.size += @sizeOf(u64); // We need at least 8bytes for address of dyld_stub_binder try self.allocateSegment(self.data_segment_cmd_index.?, 0); } @@ -1985,134 +1990,166 @@ fn createDyldPrivateAtom(self: *MachO) !void { self.dyld_private_sym_index = local_sym_index; } -// fn createStubHelperPreambleAtom(self: *MachO) !void { -// const match = MatchingSection{ -// .seg = self.text_segment_cmd_index.?, -// .sect = self.stub_helper_section_index.?, -// }; -// switch (self.base.options.target.cpu.arch) { -// .x86_64 => { -// const code_size = 15; -// var code = try self.base.allocator.alloc(u8, code_size); -// errdefer self.base.allocator.free(code); -// // lea %r11, [rip + disp] -// code[0] = 0x4c; -// code[1] = 0x8d; -// code[2] = 0x1d; -// { -// const target_addr = data.addr + data.size - @sizeOf(u64); -// const displacement = try math.cast(u32, target_addr - stub_helper.addr - 7); -// mem.writeIntLittle(u32, code[3..7], displacement); -// } -// // push %r11 -// code[7] = 0x41; -// code[8] = 0x53; -// // jmp [rip + disp] -// code[9] = 0xff; -// code[10] = 0x25; -// { -// const got_index = self.got_entries_map.get(.{ -// .where = .undef, -// .where_index = self.dyld_stub_binder_index.?, -// }) orelse unreachable; -// const addr = got.addr + got_index * @sizeOf(u64); -// const displacement = try math.cast(u32, addr - stub_helper.addr - code_size); -// mem.writeIntLittle(u32, code[11..], displacement); -// } -// try self.base.file.?.pwriteAll(&code, stub_helper.offset); -// break :blk stub_helper.offset + code_size; -// }, -// .aarch64 => { -// var code: [6 * @sizeOf(u32)]u8 = undefined; -// data_blk_outer: { -// const this_addr = stub_helper.addr; -// const target_addr = data.addr + data.size - @sizeOf(u64); -// data_blk: { -// const displacement = math.cast(i21, target_addr - this_addr) catch break :data_blk; -// // adr x17, disp -// mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32()); -// // nop -// mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); -// break :data_blk_outer; -// } -// data_blk: { -// const new_this_addr = this_addr + @sizeOf(u32); -// const displacement = math.cast(i21, target_addr - new_this_addr) catch break :data_blk; -// // nop -// mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); -// // adr x17, disp -// mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.adr(.x17, displacement).toU32()); -// break :data_blk_outer; -// } -// // Jump is too big, replace adr with adrp and add. -// const this_page = @intCast(i32, this_addr >> 12); -// const target_page = @intCast(i32, target_addr >> 12); -// const pages = @intCast(i21, target_page - this_page); -// mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x17, pages).toU32()); -// const narrowed = @truncate(u12, target_addr); -// mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.add(.x17, .x17, narrowed, false).toU32()); -// } -// // stp x16, x17, [sp, #-16]! -// code[8] = 0xf0; -// code[9] = 0x47; -// code[10] = 0xbf; -// code[11] = 0xa9; -// binder_blk_outer: { -// const got_index = self.got_entries_map.get(.{ -// .where = .undef, -// .where_index = self.dyld_stub_binder_index.?, -// }) orelse unreachable; -// const this_addr = stub_helper.addr + 3 * @sizeOf(u32); -// const target_addr = got.addr + got_index * @sizeOf(u64); -// binder_blk: { -// const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :binder_blk; -// const literal = math.cast(u18, displacement) catch break :binder_blk; -// // ldr x16, label -// mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.ldr(.x16, .{ -// .literal = literal, -// }).toU32()); -// // nop -// mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.nop().toU32()); -// break :binder_blk_outer; -// } -// binder_blk: { -// const new_this_addr = this_addr + @sizeOf(u32); -// const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :binder_blk; -// const literal = math.cast(u18, displacement) catch break :binder_blk; -// // Pad with nop to please division. -// // nop -// mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.nop().toU32()); -// // ldr x16, label -// mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ -// .literal = literal, -// }).toU32()); -// break :binder_blk_outer; -// } -// // Use adrp followed by ldr(immediate). -// const this_page = @intCast(i32, this_addr >> 12); -// const target_page = @intCast(i32, target_addr >> 12); -// const pages = @intCast(i21, target_page - this_page); -// mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.adrp(.x16, pages).toU32()); -// const narrowed = @truncate(u12, target_addr); -// const offset = try math.divExact(u12, narrowed, 8); -// mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ -// .register = .{ -// .rn = .x16, -// .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), -// }, -// }).toU32()); -// } -// // br x16 -// code[20] = 0x00; -// code[21] = 0x02; -// code[22] = 0x1f; -// code[23] = 0xd6; -// try self.base.file.?.pwriteAll(&code, stub_helper.offset); -// break :blk stub_helper.offset + 6 * @sizeOf(u32); -// }, -// else => unreachable, -// } -// } +fn createStubHelperPreambleAtom(self: *MachO) !void { + if (self.stub_preamble_sym_index != null) return; + const arch = self.base.options.target.cpu.arch; + const match = MatchingSection{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, + }; + const size: u64 = switch (arch) { + .x86_64 => 15, + .aarch64 => 6 * @sizeOf(u32), + else => unreachable, + }; + const alignment: u32 = switch (arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, + }; + const local_sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(self.base.allocator, .{ + .n_strx = try self.makeString("stub_preamble"), + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + const atom = try self.createEmptyAtom(match, local_sym_index, size, alignment); + switch (arch) { + .x86_64 => { + // lea %r11, [rip + disp] + atom.code.items[0] = 0x4c; + atom.code.items[1] = 0x8d; + atom.code.items[2] = 0x1d; + try atom.relocs.append(self.base.allocator, .{ + .offset = 3, + .where = .local, + .where_index = self.dyld_private_sym_index.?, + .payload = .{ + .signed = .{ + .addend = 0, + .correction = 0, + }, + }, + }); + // push %r11 + atom.code.items[7] = 0x41; + atom.code.items[8] = 0x53; + // jmp [rip + disp] + atom.code.items[9] = 0xff; + atom.code.items[10] = 0x25; + try atom.relocs.append(self.base.allocator, .{ + .offset = 11, + .where = .undef, + .where_index = self.dyld_stub_binder_index.?, + .payload = .{ + .load = .{ + .kind = .got, + .addend = 0, + }, + }, + }); + }, + // .aarch64 => { + // var code: [6 * @sizeOf(u32)]u8 = undefined; + // data_blk_outer: { + // const this_addr = stub_helper.addr; + // const target_addr = data.addr + data.size - @sizeOf(u64); + // data_blk: { + // const displacement = math.cast(i21, target_addr - this_addr) catch break :data_blk; + // // adr x17, disp + // mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32()); + // // nop + // mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); + // break :data_blk_outer; + // } + // data_blk: { + // const new_this_addr = this_addr + @sizeOf(u32); + // const displacement = math.cast(i21, target_addr - new_this_addr) catch break :data_blk; + // // nop + // mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); + // // adr x17, disp + // mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.adr(.x17, displacement).toU32()); + // break :data_blk_outer; + // } + // // Jump is too big, replace adr with adrp and add. + // const this_page = @intCast(i32, this_addr >> 12); + // const target_page = @intCast(i32, target_addr >> 12); + // const pages = @intCast(i21, target_page - this_page); + // mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x17, pages).toU32()); + // const narrowed = @truncate(u12, target_addr); + // mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.add(.x17, .x17, narrowed, false).toU32()); + // } + // // stp x16, x17, [sp, #-16]! + // code[8] = 0xf0; + // code[9] = 0x47; + // code[10] = 0xbf; + // code[11] = 0xa9; + // binder_blk_outer: { + // const got_index = self.got_entries_map.get(.{ + // .where = .undef, + // .where_index = self.dyld_stub_binder_index.?, + // }) orelse unreachable; + // const this_addr = stub_helper.addr + 3 * @sizeOf(u32); + // const target_addr = got.addr + got_index * @sizeOf(u64); + // binder_blk: { + // const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :binder_blk; + // const literal = math.cast(u18, displacement) catch break :binder_blk; + // // ldr x16, label + // mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.ldr(.x16, .{ + // .literal = literal, + // }).toU32()); + // // nop + // mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.nop().toU32()); + // break :binder_blk_outer; + // } + // binder_blk: { + // const new_this_addr = this_addr + @sizeOf(u32); + // const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :binder_blk; + // const literal = math.cast(u18, displacement) catch break :binder_blk; + // // Pad with nop to please division. + // // nop + // mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.nop().toU32()); + // // ldr x16, label + // mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ + // .literal = literal, + // }).toU32()); + // break :binder_blk_outer; + // } + // // Use adrp followed by ldr(immediate). + // const this_page = @intCast(i32, this_addr >> 12); + // const target_page = @intCast(i32, target_addr >> 12); + // const pages = @intCast(i21, target_page - this_page); + // mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.adrp(.x16, pages).toU32()); + // const narrowed = @truncate(u12, target_addr); + // const offset = try math.divExact(u12, narrowed, 8); + // mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ + // .register = .{ + // .rn = .x16, + // .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), + // }, + // }).toU32()); + // } + // // br x16 + // code[20] = 0x00; + // code[21] = 0x02; + // code[22] = 0x1f; + // code[23] = 0xd6; + // try self.base.file.?.pwriteAll(&code, stub_helper.offset); + // break :blk stub_helper.offset + 6 * @sizeOf(u32); + // }, + else => unreachable, + } + self.stub_preamble_sym_index = local_sym_index; + + // TODO this needs to be fixed + // We already prealloc stub helper size in populateMissingMetadata(), but + // perhaps it's not needed after all? + const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[self.stub_helper_section_index.?]; + sect.size -= size; +} fn writeStubHelperPreamble(self: *MachO) !void { const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; @@ -3143,7 +3180,7 @@ fn writeSymbolTable(self: *MachO) !void { const nexports = self.globals.items.len; const nundefs = self.undefs.items.len; - const locals_off = symtab.symoff + @sizeOf(macho.nlist_64); + const locals_off = symtab.symoff; const locals_size = nlocals * @sizeOf(macho.nlist_64); log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off }); try self.base.file.?.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); @@ -3163,7 +3200,7 @@ fn writeSymbolTable(self: *MachO) !void { // Update dynamic symbol table. const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - dysymtab.nlocalsym += @intCast(u32, nlocals); + dysymtab.nlocalsym = @intCast(u32, nlocals); dysymtab.iextdefsym = dysymtab.nlocalsym; dysymtab.nextdefsym = @intCast(u32, nexports); dysymtab.iundefsym = dysymtab.nlocalsym + dysymtab.nextdefsym; From d61d85abd2b998fea438e7317568537f927fa6a1 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 21 Aug 2021 18:55:15 +0200 Subject: [PATCH 15/78] macho: implement aarch64 prong of createStubHelperAtom --- src/link/MachO.zig | 169 +++++++++++++++++++++------------------------ 1 file changed, 78 insertions(+), 91 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 8bb139e624..f6dd0ddaba 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -807,7 +807,11 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { // TODO just a temp const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; const sect = seg.sections.items[self.stub_helper_section_index.?]; - self.stub_helper_stubs_start_off = sect.offset + 15; + self.stub_helper_stubs_start_off = sect.offset + switch (self.base.options.target.cpu.arch) { + .x86_64 => @intCast(u64, 15), + .aarch64 => @intCast(u64, 6 * @sizeOf(u32)), + else => unreachable, + }; } try self.flushZld(); } else { @@ -2018,11 +2022,12 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { const atom = try self.createEmptyAtom(match, local_sym_index, size, alignment); switch (arch) { .x86_64 => { + try atom.relocs.ensureUnusedCapacity(self.base.allocator, 2); // lea %r11, [rip + disp] atom.code.items[0] = 0x4c; atom.code.items[1] = 0x8d; atom.code.items[2] = 0x1d; - try atom.relocs.append(self.base.allocator, .{ + atom.relocs.appendAssumeCapacity(.{ .offset = 3, .where = .local, .where_index = self.dyld_private_sym_index.?, @@ -2039,7 +2044,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { // jmp [rip + disp] atom.code.items[9] = 0xff; atom.code.items[10] = 0x25; - try atom.relocs.append(self.base.allocator, .{ + atom.relocs.appendAssumeCapacity(.{ .offset = 11, .where = .undef, .where_index = self.dyld_stub_binder_index.?, @@ -2051,94 +2056,76 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { }, }); }, - // .aarch64 => { - // var code: [6 * @sizeOf(u32)]u8 = undefined; - // data_blk_outer: { - // const this_addr = stub_helper.addr; - // const target_addr = data.addr + data.size - @sizeOf(u64); - // data_blk: { - // const displacement = math.cast(i21, target_addr - this_addr) catch break :data_blk; - // // adr x17, disp - // mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32()); - // // nop - // mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); - // break :data_blk_outer; - // } - // data_blk: { - // const new_this_addr = this_addr + @sizeOf(u32); - // const displacement = math.cast(i21, target_addr - new_this_addr) catch break :data_blk; - // // nop - // mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); - // // adr x17, disp - // mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.adr(.x17, displacement).toU32()); - // break :data_blk_outer; - // } - // // Jump is too big, replace adr with adrp and add. - // const this_page = @intCast(i32, this_addr >> 12); - // const target_page = @intCast(i32, target_addr >> 12); - // const pages = @intCast(i21, target_page - this_page); - // mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x17, pages).toU32()); - // const narrowed = @truncate(u12, target_addr); - // mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.add(.x17, .x17, narrowed, false).toU32()); - // } - // // stp x16, x17, [sp, #-16]! - // code[8] = 0xf0; - // code[9] = 0x47; - // code[10] = 0xbf; - // code[11] = 0xa9; - // binder_blk_outer: { - // const got_index = self.got_entries_map.get(.{ - // .where = .undef, - // .where_index = self.dyld_stub_binder_index.?, - // }) orelse unreachable; - // const this_addr = stub_helper.addr + 3 * @sizeOf(u32); - // const target_addr = got.addr + got_index * @sizeOf(u64); - // binder_blk: { - // const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :binder_blk; - // const literal = math.cast(u18, displacement) catch break :binder_blk; - // // ldr x16, label - // mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.ldr(.x16, .{ - // .literal = literal, - // }).toU32()); - // // nop - // mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.nop().toU32()); - // break :binder_blk_outer; - // } - // binder_blk: { - // const new_this_addr = this_addr + @sizeOf(u32); - // const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :binder_blk; - // const literal = math.cast(u18, displacement) catch break :binder_blk; - // // Pad with nop to please division. - // // nop - // mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.nop().toU32()); - // // ldr x16, label - // mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ - // .literal = literal, - // }).toU32()); - // break :binder_blk_outer; - // } - // // Use adrp followed by ldr(immediate). - // const this_page = @intCast(i32, this_addr >> 12); - // const target_page = @intCast(i32, target_addr >> 12); - // const pages = @intCast(i21, target_page - this_page); - // mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.adrp(.x16, pages).toU32()); - // const narrowed = @truncate(u12, target_addr); - // const offset = try math.divExact(u12, narrowed, 8); - // mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ - // .register = .{ - // .rn = .x16, - // .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), - // }, - // }).toU32()); - // } - // // br x16 - // code[20] = 0x00; - // code[21] = 0x02; - // code[22] = 0x1f; - // code[23] = 0xd6; - // try self.base.file.?.pwriteAll(&code, stub_helper.offset); - // break :blk stub_helper.offset + 6 * @sizeOf(u32); - // }, + .aarch64 => { + try atom.relocs.ensureUnusedCapacity(self.base.allocator, 4); + // adrp x17, 0 + mem.writeIntLittle(u32, atom.code.items[0..][0..4], aarch64.Instruction.adrp(.x17, 0).toU32()); + atom.relocs.appendAssumeCapacity(.{ + .offset = 0, + .where = .local, + .where_index = self.dyld_private_sym_index.?, + .payload = .{ + .page = .{ + .kind = .page, + .addend = 0, + }, + }, + }); + // add x17, x17, 0 + mem.writeIntLittle(u32, atom.code.items[4..][0..4], aarch64.Instruction.add(.x17, .x17, 0, false).toU32()); + atom.relocs.appendAssumeCapacity(.{ + .offset = 4, + .where = .local, + .where_index = self.dyld_private_sym_index.?, + .payload = .{ + .page_off = .{ + .kind = .page, + .addend = 0, + .op_kind = .arithmetic, + }, + }, + }); + // stp x16, x17, [sp, #-16]! + mem.writeIntLittle(u32, atom.code.items[8..][0..4], aarch64.Instruction.stp( + .x16, + .x17, + aarch64.Register.sp, + aarch64.Instruction.LoadStorePairOffset.pre_index(-16), + ).toU32()); + // adrp x16, 0 + mem.writeIntLittle(u32, atom.code.items[12..][0..4], aarch64.Instruction.adrp(.x16, 0).toU32()); + atom.relocs.appendAssumeCapacity(.{ + .offset = 12, + .where = .undef, + .where_index = self.dyld_stub_binder_index.?, + .payload = .{ + .page = .{ + .kind = .got, + .addend = 0, + }, + }, + }); + // ldr x16, [x16, 0] + mem.writeIntLittle(u32, atom.code.items[16..][0..4], aarch64.Instruction.ldr(.x16, .{ + .register = .{ + .rn = .x16, + .offset = aarch64.Instruction.LoadStoreOffset.imm(0), + }, + }).toU32()); + atom.relocs.appendAssumeCapacity(.{ + .offset = 16, + .where = .undef, + .where_index = self.dyld_stub_binder_index.?, + .payload = .{ + .page_off = .{ + .kind = .got, + .addend = 0, + }, + }, + }); + // br x16 + mem.writeIntLittle(u32, atom.code.items[20..][0..4], aarch64.Instruction.br(.x16).toU32()); + }, else => unreachable, } self.stub_preamble_sym_index = local_sym_index; From ea499203fe22dd3cd40753c020794790f32b91be Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 21 Aug 2021 22:26:02 +0200 Subject: [PATCH 16/78] macho: incrementally write dyld_private and stub_helper atoms By incrementally I mean using the incremental linker machinery and concepts. Currently, lots of repetition but already highlighted a potential problem with resolving relocations for symbols that weren't seen yet but wanting to write the atom to file (before seeing the relevant atoms). --- src/link/MachO.zig | 194 +++++++++++++-------------------------------- 1 file changed, 56 insertions(+), 138 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index f6dd0ddaba..fe7c7789bf 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -764,8 +764,14 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.createStubHelperPreambleAtom(); if (!use_stage1) { - // TODO this should be made common when I figure out how to prealloc space for traditional linker path. - try self.writeStubHelperPreamble(); + // TODO just a temp + const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const sect = seg.sections.items[self.stub_helper_section_index.?]; + self.stub_helper_stubs_start_off = sect.offset + switch (self.base.options.target.cpu.arch) { + .x86_64 => @intCast(u64, 15), + .aarch64 => @intCast(u64, 6 * @sizeOf(u32)), + else => unreachable, + }; } // Apply pending updates @@ -842,7 +848,6 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { defer tracy.end(); try self.setEntryPoint(); - try self.writeTextBlocks(); try self.writeRebaseInfoTable(); try self.writeBindInfoTable(); try self.writeLazyBindInfoTable(); @@ -1989,7 +1994,29 @@ fn createDyldPrivateAtom(self: *MachO) !void { .n_desc = 0, .n_value = 0, }); - _ = try self.createEmptyAtom(match, local_sym_index, @sizeOf(u64), 3); + const last = self.blocks.get(match); + const atom = try self.createEmptyAtom(match, local_sym_index, @sizeOf(u64), 3); + + if (!(build_options.is_stage1 and self.base.options.use_stage1)) { + const seg = self.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + const base_addr = if (last) |last_atom| blk: { + const last_atom_sym = self.locals.items[last_atom.local_sym_index]; + break :blk last_atom_sym.n_value; + } else sect.addr; + const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + const atom_alignment = try math.powi(u32, 2, atom.alignment); + const vaddr = mem.alignForwardGeneric(u64, base_addr, atom_alignment); + + const sym = &self.locals.items[local_sym_index]; + sym.n_value = vaddr; + sym.n_sect = n_sect; + + const file_offset = sect.offset + vaddr - sect.addr; + log.debug("writing code for symbol {s} at file offset 0x{x}", .{ self.getString(sym.n_strx), file_offset }); + try self.base.file.?.pwriteAll(atom.code.items, file_offset); + try self.writeLocalSymbol(local_sym_index); + } self.dyld_private_sym_index = local_sym_index; } @@ -2019,6 +2046,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { .n_desc = 0, .n_value = 0, }); + const last = self.blocks.get(match); const atom = try self.createEmptyAtom(match, local_sym_index, size, alignment); switch (arch) { .x86_64 => { @@ -2130,6 +2158,29 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { } self.stub_preamble_sym_index = local_sym_index; + if (!(build_options.is_stage1 and self.base.options.use_stage1)) { + const seg = self.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + const base_addr = if (last) |last_atom| blk: { + const last_atom_sym = self.locals.items[last_atom.local_sym_index]; + break :blk last_atom_sym.n_value; + } else sect.addr; + const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + const atom_alignment = try math.powi(u32, 2, atom.alignment); + const vaddr = mem.alignForwardGeneric(u64, base_addr, atom_alignment); + + const sym = &self.locals.items[local_sym_index]; + sym.n_value = vaddr; + sym.n_sect = n_sect; + + try atom.resolveRelocs(self); + + const file_offset = sect.offset + vaddr - sect.addr; + log.debug("writing code for symbol {s} at file offset 0x{x}", .{ self.getString(sym.n_strx), file_offset }); + try self.base.file.?.pwriteAll(atom.code.items, file_offset); + try self.writeLocalSymbol(local_sym_index); + } + // TODO this needs to be fixed // We already prealloc stub helper size in populateMissingMetadata(), but // perhaps it's not needed after all? @@ -2138,139 +2189,6 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { sect.size -= size; } -fn writeStubHelperPreamble(self: *MachO) !void { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?]; - const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const got = &data_const_segment.sections.items[self.got_section_index.?]; - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const data = &data_segment.sections.items[self.data_section_index.?]; - - self.stub_helper_stubs_start_off = blk: { - switch (self.base.options.target.cpu.arch) { - .x86_64 => { - const code_size = 15; - var code: [code_size]u8 = undefined; - // lea %r11, [rip + disp] - code[0] = 0x4c; - code[1] = 0x8d; - code[2] = 0x1d; - { - const target_addr = data.addr + data.size - @sizeOf(u64); - const displacement = try math.cast(u32, target_addr - stub_helper.addr - 7); - mem.writeIntLittle(u32, code[3..7], displacement); - } - // push %r11 - code[7] = 0x41; - code[8] = 0x53; - // jmp [rip + disp] - code[9] = 0xff; - code[10] = 0x25; - { - const got_index = self.got_entries_map.get(.{ - .where = .undef, - .where_index = self.dyld_stub_binder_index.?, - }) orelse unreachable; - const addr = got.addr + got_index * @sizeOf(u64); - const displacement = try math.cast(u32, addr - stub_helper.addr - code_size); - mem.writeIntLittle(u32, code[11..], displacement); - } - try self.base.file.?.pwriteAll(&code, stub_helper.offset); - break :blk stub_helper.offset + code_size; - }, - .aarch64 => { - var code: [6 * @sizeOf(u32)]u8 = undefined; - data_blk_outer: { - const this_addr = stub_helper.addr; - const target_addr = data.addr + data.size - @sizeOf(u64); - data_blk: { - const displacement = math.cast(i21, target_addr - this_addr) catch break :data_blk; - // adr x17, disp - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, displacement).toU32()); - // nop - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); - break :data_blk_outer; - } - data_blk: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.cast(i21, target_addr - new_this_addr) catch break :data_blk; - // nop - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); - // adr x17, disp - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.adr(.x17, displacement).toU32()); - break :data_blk_outer; - } - // Jump is too big, replace adr with adrp and add. - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x17, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.add(.x17, .x17, narrowed, false).toU32()); - } - // stp x16, x17, [sp, #-16]! - code[8] = 0xf0; - code[9] = 0x47; - code[10] = 0xbf; - code[11] = 0xa9; - binder_blk_outer: { - const got_index = self.got_entries_map.get(.{ - .where = .undef, - .where_index = self.dyld_stub_binder_index.?, - }) orelse unreachable; - const this_addr = stub_helper.addr + 3 * @sizeOf(u32); - const target_addr = got.addr + got_index * @sizeOf(u64); - binder_blk: { - const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :binder_blk; - const literal = math.cast(u18, displacement) catch break :binder_blk; - // ldr x16, label - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - // nop - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.nop().toU32()); - break :binder_blk_outer; - } - binder_blk: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :binder_blk; - const literal = math.cast(u18, displacement) catch break :binder_blk; - // Pad with nop to please division. - // nop - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.nop().toU32()); - // ldr x16, label - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - break :binder_blk_outer; - } - // Use adrp followed by ldr(immediate). - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.adrp(.x16, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - const offset = try math.divExact(u12, narrowed, 8); - mem.writeIntLittle(u32, code[16..20], aarch64.Instruction.ldr(.x16, .{ - .register = .{ - .rn = .x16, - .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), - }, - }).toU32()); - } - // br x16 - code[20] = 0x00; - code[21] = 0x02; - code[22] = 0x1f; - code[23] = 0xd6; - try self.base.file.?.pwriteAll(&code, stub_helper.offset); - break :blk stub_helper.offset + 6 * @sizeOf(u32); - }, - else => unreachable, - } - }; -} - fn resolveSymbolsInObject( self: *MachO, object_id: u16, @@ -4873,7 +4791,7 @@ fn writeAllGlobalAndUndefSymbols(self: *MachO) !void { const undefs_off = globals_off + globals_size; const undefs_size = nundefs * @sizeOf(macho.nlist_64); - log.debug("writing extern symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); + log.debug("writing undef symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.undefs.items), undefs_off); // Update dynamic symbol table. From 799c5bb9551dafd76f9d1fce7d3f5a01ac55da83 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 23 Aug 2021 22:12:04 +0200 Subject: [PATCH 17/78] macho: add routine for creating a dynamic stub_helper atom With this routine, we are now able to freely shift stub_helper section in memory and in file since the VM addressing is now dynamically dependent on the positioning of `__stub_helper` preamble and other sections generated by the linker. --- src/link/MachO.zig | 347 ++++++++++++++++++++++----------------------- 1 file changed, 173 insertions(+), 174 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index fe7c7789bf..6785da065b 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -177,8 +177,6 @@ has_stabs: bool = false, section_ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}, -pending_updates: std.ArrayListUnmanaged(PendingUpdate) = .{}, - /// A list of text blocks that have surplus capacity. This list can have false /// positives, as functions grow and shrink over time, only sometimes being added /// or removed from the freelist. @@ -384,6 +382,26 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio try ds.writeLocalSymbol(0); } + { + const atom = try self.createDyldPrivateAtom(); + const match = MatchingSection{ + .seg = self.data_segment_cmd_index.?, + .sect = self.data_section_index.?, + }; + const vaddr = try self.allocateAtom(atom, match); + try self.writeAtom(atom, match); + } + + { + const atom = try self.createStubHelperPreambleAtom(); + const match = MatchingSection{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, + }; + const vaddr = try self.allocateAtom(atom, match); + try self.writeAtom(atom, match); + } + return self; } @@ -760,50 +778,42 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.parseLibs(libs.items, self.base.options.sysroot); try self.resolveSymbols(); try self.resolveDyldStubBinder(); - try self.createDyldPrivateAtom(); - try self.createStubHelperPreambleAtom(); - - if (!use_stage1) { - // TODO just a temp - const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const sect = seg.sections.items[self.stub_helper_section_index.?]; - self.stub_helper_stubs_start_off = sect.offset + switch (self.base.options.target.cpu.arch) { - .x86_64 => @intCast(u64, 15), - .aarch64 => @intCast(u64, 6 * @sizeOf(u32)), - else => unreachable, - }; - } - - // Apply pending updates - var still_pending = std.ArrayList(PendingUpdate).init(self.base.allocator); - defer still_pending.deinit(); - - for (self.pending_updates.items) |update| { - switch (update) { - .add_stub_entry => |stub_index| { - try self.writeStub(stub_index); - try self.writeStubInStubHelper(stub_index); - try self.writeLazySymbolPointer(stub_index); - self.rebase_info_dirty = true; - self.lazy_binding_info_dirty = true; - }, - else => unreachable, - } - } - - self.pending_updates.clearRetainingCapacity(); - for (still_pending.items) |update| { - self.pending_updates.appendAssumeCapacity(update); - } - - try self.parseTextBlocks(); try self.addRpathLCs(rpath_table.keys()); try self.addLoadDylibLCs(); try self.addDataInCodeLC(); try self.addCodeSignatureLC(); if (use_stage1) { + try self.parseTextBlocks(); try self.sortSections(); + { + const atom = try self.createDyldPrivateAtom(); + try self.allocateAtomStage1(atom, .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.data_section_index.?, + }); + } + { + const atom = try self.createStubHelperPreambleAtom(); + try self.allocateAtomStage1(atom, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, + }); + + // TODO this is just a temp + // We already prealloc stub helper size in populateMissingMetadata(), but + // perhaps it's not needed after all? + const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[self.stub_helper_section_index.?]; + sect.size -= atom.size; + } + for (self.stubs.items) |_| { + const atom = try self.createStubHelperAtom(); + try self.allocateAtomStage1(atom, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, + }); + } try self.allocateTextSegment(); try self.allocateDataConstSegment(); try self.allocateDataSegment(); @@ -1705,18 +1715,10 @@ fn allocateTextSegment(self: *MachO) !void { seg.inner.fileoff = 0; seg.inner.vmaddr = base_vmaddr; - // Set stubs and stub_helper sizes + // Set stubs sizes const stubs = &seg.sections.items[self.stubs_section_index.?]; - const stub_helper = &seg.sections.items[self.stub_helper_section_index.?]; stubs.size += nstubs * stubs.reserved2; - const stub_size: u4 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - stub_helper.size += nstubs * stub_size; - var sizeofcmds: u64 = 0; for (self.load_commands.items) |lc| { sizeofcmds += lc.cmdsize(); @@ -1947,41 +1949,81 @@ fn createEmptyAtom( defer self.base.allocator.free(code); mem.set(u8, code, 0); - const block = try self.base.allocator.create(TextBlock); - errdefer self.base.allocator.destroy(block); - block.* = TextBlock.empty; - block.local_sym_index = local_sym_index; - block.size = size; - block.alignment = alignment; - try block.code.appendSlice(self.base.allocator, code); + const atom = try self.base.allocator.create(TextBlock); + errdefer self.base.allocator.destroy(atom); + atom.* = TextBlock.empty; + atom.local_sym_index = local_sym_index; + atom.size = size; + atom.alignment = alignment; + try atom.code.appendSlice(self.base.allocator, code); + try self.managed_blocks.append(self.base.allocator, atom); - try self.managed_blocks.append(self.base.allocator, block); + return atom; +} +fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 { + // TODO converge with `allocateTextBlock` + const seg = self.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + const base_addr = if (atom.prev) |prev| blk: { + const prev_atom_sym = self.locals.items[prev.local_sym_index]; + break :blk prev_atom_sym.n_value; + } else sect.addr; + const atom_alignment = try math.powi(u32, 2, atom.alignment); + const vaddr = mem.alignForwardGeneric(u64, base_addr, atom_alignment); + + // TODO we should check if we need to expand the section or not like we + // do in `allocateTextBlock`. + if (self.blocks.getPtr(match)) |last| { + last.*.next = atom; + atom.prev = last.*; + last.* = atom; + } else { + try self.blocks.putNoClobber(self.base.allocator, match, atom); + } + + return vaddr; +} + +fn writeAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !void { + const seg = self.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + + const vaddr = try self.allocateAtom(atom, match); + const sym = &self.locals.items[atom.local_sym_index]; + sym.n_value = vaddr; + sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + + try atom.resolveRelocs(self); + + const file_offset = sect.offset + vaddr - sect.addr; + log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ self.getString(sym.n_strx), file_offset }); + try self.base.file.?.pwriteAll(atom.code.items, file_offset); + try self.writeLocalSymbol(atom.local_sym_index); +} + +fn allocateAtomStage1(self: *MachO, atom: *TextBlock, match: MatchingSection) !void { // Update target section's metadata // TODO should we update segment's size here too? // How does it tie with incremental space allocs? const tseg = &self.load_commands.items[match.seg].Segment; const tsect = &tseg.sections.items[match.sect]; - const new_alignment = math.max(tsect.@"align", alignment); + const new_alignment = math.max(tsect.@"align", atom.alignment); const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + size; + const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + atom.size; tsect.size = new_size; tsect.@"align" = new_alignment; if (self.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; + last.*.next = atom; + atom.prev = last.*; + last.* = atom; } else { - try self.blocks.putNoClobber(self.base.allocator, match, block); + try self.blocks.putNoClobber(self.base.allocator, match, atom); } - - return block; } -fn createDyldPrivateAtom(self: *MachO) !void { - if (self.dyld_private_sym_index != null) return; - +fn createDyldPrivateAtom(self: *MachO) !*TextBlock { const match = MatchingSection{ .seg = self.data_segment_cmd_index.?, .sect = self.data_section_index.?, @@ -1994,35 +2036,11 @@ fn createDyldPrivateAtom(self: *MachO) !void { .n_desc = 0, .n_value = 0, }); - const last = self.blocks.get(match); - const atom = try self.createEmptyAtom(match, local_sym_index, @sizeOf(u64), 3); - - if (!(build_options.is_stage1 and self.base.options.use_stage1)) { - const seg = self.load_commands.items[match.seg].Segment; - const sect = seg.sections.items[match.sect]; - const base_addr = if (last) |last_atom| blk: { - const last_atom_sym = self.locals.items[last_atom.local_sym_index]; - break :blk last_atom_sym.n_value; - } else sect.addr; - const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - const atom_alignment = try math.powi(u32, 2, atom.alignment); - const vaddr = mem.alignForwardGeneric(u64, base_addr, atom_alignment); - - const sym = &self.locals.items[local_sym_index]; - sym.n_value = vaddr; - sym.n_sect = n_sect; - - const file_offset = sect.offset + vaddr - sect.addr; - log.debug("writing code for symbol {s} at file offset 0x{x}", .{ self.getString(sym.n_strx), file_offset }); - try self.base.file.?.pwriteAll(atom.code.items, file_offset); - try self.writeLocalSymbol(local_sym_index); - } - self.dyld_private_sym_index = local_sym_index; + return self.createEmptyAtom(match, local_sym_index, @sizeOf(u64), 3); } -fn createStubHelperPreambleAtom(self: *MachO) !void { - if (self.stub_preamble_sym_index != null) return; +fn createStubHelperPreambleAtom(self: *MachO) !*TextBlock { const arch = self.base.options.target.cpu.arch; const match = MatchingSection{ .seg = self.text_segment_cmd_index.?, @@ -2046,7 +2064,6 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { .n_desc = 0, .n_value = 0, }); - const last = self.blocks.get(match); const atom = try self.createEmptyAtom(match, local_sym_index, size, alignment); switch (arch) { .x86_64 => { @@ -2157,36 +2174,71 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { else => unreachable, } self.stub_preamble_sym_index = local_sym_index; + return atom; +} - if (!(build_options.is_stage1 and self.base.options.use_stage1)) { - const seg = self.load_commands.items[match.seg].Segment; - const sect = seg.sections.items[match.sect]; - const base_addr = if (last) |last_atom| blk: { - const last_atom_sym = self.locals.items[last_atom.local_sym_index]; - break :blk last_atom_sym.n_value; - } else sect.addr; - const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - const atom_alignment = try math.powi(u32, 2, atom.alignment); - const vaddr = mem.alignForwardGeneric(u64, base_addr, atom_alignment); +fn createStubHelperAtom(self: *MachO) !*TextBlock { + const arch = self.base.options.target.cpu.arch; + const stub_size: u4 = switch (arch) { + .x86_64 => 10, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, + }; + const local_sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(self.base.allocator, .{ + .n_strx = try self.makeString("stub_in_stub_helper"), + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + const atom = try self.createEmptyAtom(.{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, + }, local_sym_index, stub_size, 2); + try atom.relocs.ensureTotalCapacity(self.base.allocator, 1); - const sym = &self.locals.items[local_sym_index]; - sym.n_value = vaddr; - sym.n_sect = n_sect; - - try atom.resolveRelocs(self); - - const file_offset = sect.offset + vaddr - sect.addr; - log.debug("writing code for symbol {s} at file offset 0x{x}", .{ self.getString(sym.n_strx), file_offset }); - try self.base.file.?.pwriteAll(atom.code.items, file_offset); - try self.writeLocalSymbol(local_sym_index); + switch (arch) { + .x86_64 => { + // pushq + atom.code.items[0] = 0x68; + // Next 4 bytes 1..4 are just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. + // jmpq + atom.code.items[5] = 0xe9; + atom.relocs.appendAssumeCapacity(.{ + .offset = 6, + .where = .local, + .where_index = self.stub_preamble_sym_index.?, + .payload = .{ + .branch = .{ .arch = arch }, + }, + }); + }, + .aarch64 => { + const literal = blk: { + const div_res = try math.divExact(u64, stub_size - @sizeOf(u32), 4); + break :blk try math.cast(u18, div_res); + }; + // ldr w16, literal + mem.writeIntLittle(u32, atom.code.items[0..4], aarch64.Instruction.ldr(.w16, .{ + .literal = literal, + }).toU32()); + // b disp + mem.writeIntLittle(u32, atom.code.items[4..8], aarch64.Instruction.b(0).toU32()); + atom.relocs.appendAssumeCapacity(.{ + .offset = 4, + .where = .local, + .where_index = self.stub_preamble_sym_index.?, + .payload = .{ + .branch = .{ .arch = arch }, + }, + }); + // Next 4 bytes 8..12 are just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. + }, + else => unreachable, } - // TODO this needs to be fixed - // We already prealloc stub helper size in populateMissingMetadata(), but - // perhaps it's not needed after all? - const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[self.stub_helper_section_index.?]; - sect.size -= size; + return atom; } fn resolveSymbolsInObject( @@ -2681,7 +2733,6 @@ fn flushZld(self: *MachO) !void { // TODO weak bound pointers try self.writeLazySymbolPointer(index); try self.writeStub(index); - try self.writeStubInStubHelper(index); } if (self.common_section_index) |index| { @@ -3173,7 +3224,6 @@ pub fn deinit(self: *MachO) void { } self.section_ordinals.deinit(self.base.allocator); - self.pending_updates.deinit(self.base.allocator); self.got_entries.deinit(self.base.allocator); self.got_entries_map.deinit(self.base.allocator); self.got_entries_free_list.deinit(self.base.allocator); @@ -4404,11 +4454,7 @@ pub fn addExternFn(self: *MachO, name: []const u8) !u32 { try self.stubs.append(self.base.allocator, sym_index); try self.stubs_map.putNoClobber(self.base.allocator, sym_index, stubs_index); - // TODO discuss this. The caller context expects codegen.InnerError{ OutOfMemory, CodegenFail }, - // which obviously doesn't include file writing op errors. So instead of trying to write the stub - // entry right here and now, queue it up and dispose of when updating decl. - try self.pending_updates.ensureUnusedCapacity(self.base.allocator, 1); - self.pending_updates.appendAssumeCapacity(.{ .add_stub_entry = stubs_index }); + // TODO create and write stub, stub_helper and lazy_ptr atoms return sym_index; } @@ -4683,53 +4729,6 @@ fn writeStub(self: *MachO, index: u32) !void { try self.base.file.?.pwriteAll(code, stub_off); } -fn writeStubInStubHelper(self: *MachO, index: u32) !void { - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stub_helper = text_segment.sections.items[self.stub_helper_section_index.?]; - - const stub_size: u4 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - const stub_off = self.stub_helper_stubs_start_off.? + index * stub_size; - - var code = try self.base.allocator.alloc(u8, stub_size); - defer self.base.allocator.free(code); - - switch (self.base.options.target.cpu.arch) { - .x86_64 => { - const displacement = try math.cast( - i32, - @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - stub_size, - ); - // pushq - code[0] = 0x68; - mem.writeIntLittle(u32, code[1..][0..4], 0x0); // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. - // jmpq - code[5] = 0xe9; - mem.writeIntLittle(u32, code[6..][0..4], @bitCast(u32, displacement)); - }, - .aarch64 => { - const literal = blk: { - const div_res = try math.divExact(u64, stub_size - @sizeOf(u32), 4); - break :blk try math.cast(u18, div_res); - }; - // ldr w16, literal - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.w16, .{ - .literal = literal, - }).toU32()); - const displacement = try math.cast(i28, @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - 4); - // b disp - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.b(displacement).toU32()); - // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. - mem.writeIntLittle(u32, code[8..12], 0x0); - }, - else => unreachable, - } - try self.base.file.?.pwriteAll(code, stub_off); -} - fn relocateSymbolTable(self: *MachO) !void { const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const nlocals = self.locals.items.len; From 9e7b2fb894cf4021ec188fbb64b11d750f4354fa Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 23 Aug 2021 23:01:14 +0200 Subject: [PATCH 18/78] macho: add routine for creating lazy pointer for stub --- src/link/MachO.zig | 67 +++++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 6785da065b..27e05d18ac 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -808,8 +808,14 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { sect.size -= atom.size; } for (self.stubs.items) |_| { - const atom = try self.createStubHelperAtom(); - try self.allocateAtomStage1(atom, .{ + const stub_atom = try self.createStubHelperAtom(); + try self.allocateAtomStage1(stub_atom, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, + }); + + const laptr_atom = try self.createLazyPointerAtom(stub_atom.local_sym_index); + try self.allocateAtomStage1(laptr_atom, .{ .seg = self.text_segment_cmd_index.?, .sect = self.stub_helper_section_index.?, }); @@ -1767,16 +1773,9 @@ fn allocateDataConstSegment(self: *MachO) !void { fn allocateDataSegment(self: *MachO) !void { const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const nstubs = @intCast(u32, self.stubs.items.len); - const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; seg.inner.fileoff = data_const_seg.inner.fileoff + data_const_seg.inner.filesize; seg.inner.vmaddr = data_const_seg.inner.vmaddr + data_const_seg.inner.vmsize; - - // Set la_symbol_ptr - const la_symbol_ptr = &seg.sections.items[self.la_symbol_ptr_section_index.?]; - la_symbol_ptr.size += nstubs * @sizeOf(u64); - try self.allocateSegment(self.data_segment_cmd_index.?, 0); } @@ -2241,6 +2240,35 @@ fn createStubHelperAtom(self: *MachO) !*TextBlock { return atom; } +fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32) !*TextBlock { + const local_sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(self.base.allocator, .{ + .n_strx = try self.makeString("lazy_ptr"), + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + const atom = try self.createEmptyAtom(.{ + .seg = self.data_segment_cmd_index.?, + .sect = self.la_symbol_ptr_section_index.?, + }, local_sym_index, @sizeOf(u64), 3); + try atom.relocs.append(self.base.allocator, .{ + .offset = 0, + .where = .local, + .where_index = stub_sym_index, + .payload = .{ + .unsigned = .{ + .subtractor = null, + .addend = 0, + .is_64bit = true, + }, + }, + }); + self.lazy_binding_info_dirty = true; + return atom; +} + fn resolveSymbolsInObject( self: *MachO, object_id: u16, @@ -2731,7 +2759,6 @@ fn flushZld(self: *MachO) !void { for (self.stubs.items) |_, i| { const index = @intCast(u32, i); // TODO weak bound pointers - try self.writeLazySymbolPointer(index); try self.writeStub(index); } @@ -4633,26 +4660,6 @@ fn writeGotEntry(self: *MachO, index: usize) !void { try self.base.file.?.pwriteAll(mem.asBytes(&sym.n_value), off); } -fn writeLazySymbolPointer(self: *MachO, index: u32) !void { - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stub_helper = text_segment.sections.items[self.stub_helper_section_index.?]; - const data_segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const la_symbol_ptr = data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - - const stub_size: u4 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - const stub_off = self.stub_helper_stubs_start_off.? + index * stub_size; - const end = stub_helper.addr + stub_off - stub_helper.offset; - var buf: [@sizeOf(u64)]u8 = undefined; - mem.writeIntLittle(u64, &buf, end); - const off = la_symbol_ptr.offset + index * @sizeOf(u64); - log.debug("writing lazy symbol pointer entry 0x{x} at 0x{x}", .{ end, off }); - try self.base.file.?.pwriteAll(&buf, off); -} - fn writeStub(self: *MachO, index: u32) !void { const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; const stubs = text_segment.sections.items[self.stubs_section_index.?]; From 876071b50b1d23a59b3d3e5acedf4161029f5f2b Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 23 Aug 2021 23:23:00 +0200 Subject: [PATCH 19/78] macho: fix writing stubs (on x86_64 only) and lazy ptrs --- src/link/MachO.zig | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 27e05d18ac..e3b6fcf158 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -816,8 +816,8 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { const laptr_atom = try self.createLazyPointerAtom(stub_atom.local_sym_index); try self.allocateAtomStage1(laptr_atom, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, + .seg = self.data_segment_cmd_index.?, + .sect = self.la_symbol_ptr_section_index.?, }); } try self.allocateTextSegment(); @@ -2183,6 +2183,11 @@ fn createStubHelperAtom(self: *MachO) !*TextBlock { .aarch64 => 3 * @sizeOf(u32), else => unreachable, }; + const alignment: u2 = switch (arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, + }; const local_sym_index = @intCast(u32, self.locals.items.len); try self.locals.append(self.base.allocator, .{ .n_strx = try self.makeString("stub_in_stub_helper"), @@ -2194,7 +2199,7 @@ fn createStubHelperAtom(self: *MachO) !*TextBlock { const atom = try self.createEmptyAtom(.{ .seg = self.text_segment_cmd_index.?, .sect = self.stub_helper_section_index.?, - }, local_sym_index, stub_size, 2); + }, local_sym_index, stub_size, alignment); try atom.relocs.ensureTotalCapacity(self.base.allocator, 1); switch (arch) { From 91c0552cfcb727dd6c2e6aa402112145993fac5b Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 24 Aug 2021 00:03:51 +0200 Subject: [PATCH 20/78] macho: add routine for creating stubs in __stubs section --- src/link/MachO.zig | 180 ++++++++++++++++++++++----------------------- 1 file changed, 89 insertions(+), 91 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index e3b6fcf158..1d45157f0a 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -808,17 +808,23 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { sect.size -= atom.size; } for (self.stubs.items) |_| { - const stub_atom = try self.createStubHelperAtom(); - try self.allocateAtomStage1(stub_atom, .{ + const stub_helper_atom = try self.createStubHelperAtom(); + try self.allocateAtomStage1(stub_helper_atom, .{ .seg = self.text_segment_cmd_index.?, .sect = self.stub_helper_section_index.?, }); - const laptr_atom = try self.createLazyPointerAtom(stub_atom.local_sym_index); + const laptr_atom = try self.createLazyPointerAtom(stub_helper_atom.local_sym_index); try self.allocateAtomStage1(laptr_atom, .{ .seg = self.data_segment_cmd_index.?, .sect = self.la_symbol_ptr_section_index.?, }); + + const stub_atom = try self.createStubAtom(laptr_atom.local_sym_index); + try self.allocateAtomStage1(stub_atom, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stubs_section_index.?, + }); } try self.allocateTextSegment(); try self.allocateDataConstSegment(); @@ -1715,16 +1721,10 @@ fn sortSections(self: *MachO) !void { fn allocateTextSegment(self: *MachO) !void { const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const nstubs = @intCast(u32, self.stubs.items.len); - const base_vmaddr = self.load_commands.items[self.pagezero_segment_cmd_index.?].Segment.inner.vmsize; seg.inner.fileoff = 0; seg.inner.vmaddr = base_vmaddr; - // Set stubs sizes - const stubs = &seg.sections.items[self.stubs_section_index.?]; - stubs.size += nstubs * stubs.reserved2; - var sizeofcmds: u64 = 0; for (self.load_commands.items) |lc| { sizeofcmds += lc.cmdsize(); @@ -2274,6 +2274,86 @@ fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32) !*TextBlock { return atom; } +fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*TextBlock { + const arch = self.base.options.target.cpu.arch; + const alignment: u2 = switch (arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type + }; + const stub_size: u4 = switch (arch) { + .x86_64 => 6, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, // unhandled architecture type + }; + const local_sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(self.base.allocator, .{ + .n_strx = try self.makeString("stub"), + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + const atom = try self.createEmptyAtom(.{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stubs_section_index.?, + }, local_sym_index, stub_size, alignment); + switch (arch) { + .x86_64 => { + // jmp + atom.code.items[0] = 0xff; + atom.code.items[1] = 0x25; + try atom.relocs.append(self.base.allocator, .{ + .offset = 2, + .where = .local, + .where_index = laptr_sym_index, + .payload = .{ + .branch = .{ .arch = arch }, + }, + }); + }, + .aarch64 => { + try atom.relocs.ensureTotalCapacity(self.base.allocator, 2); + // adrp x16, pages + mem.writeIntLittle(u32, atom.code.items[0..4], aarch64.Instruction.adrp(.x16, 0).toU32()); + atom.relocs.appendAssumeCapacity(.{ + .offset = 0, + .where = .local, + .where_index = laptr_sym_index, + .payload = .{ + .page = .{ + .kind = .page, + .addend = 0, + }, + }, + }); + // ldr x16, x16, offset + mem.writeIntLittle(u32, atom.code.items[4..8], aarch64.Instruction.ldr(.x16, .{ + .register = .{ + .rn = .x16, + .offset = aarch64.Instruction.LoadStoreOffset.imm(0), + }, + }).toU32()); + atom.relocs.appendAssumeCapacity(.{ + .offset = 4, + .where = .local, + .where_index = laptr_sym_index, + .payload = .{ + .page_off = .{ + .kind = .page, + .addend = 0, + .op_kind = .load, + }, + }, + }); + // br x16 + mem.writeIntLittle(u32, atom.code.items[8..12], aarch64.Instruction.br(.x16).toU32()); + }, + else => unreachable, + } + return atom; +} + fn resolveSymbolsInObject( self: *MachO, object_id: u16, @@ -2761,12 +2841,6 @@ fn addLoadDylibLCs(self: *MachO) !void { fn flushZld(self: *MachO) !void { try self.writeTextBlocks(); - for (self.stubs.items) |_, i| { - const index = @intCast(u32, i); - // TODO weak bound pointers - try self.writeStub(index); - } - if (self.common_section_index) |index| { const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; const sect = &seg.sections.items[index]; @@ -4665,82 +4739,6 @@ fn writeGotEntry(self: *MachO, index: usize) !void { try self.base.file.?.pwriteAll(mem.asBytes(&sym.n_value), off); } -fn writeStub(self: *MachO, index: u32) !void { - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stubs = text_segment.sections.items[self.stubs_section_index.?]; - const data_segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const la_symbol_ptr = data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - - const stub_off = stubs.offset + index * stubs.reserved2; - const stub_addr = stubs.addr + index * stubs.reserved2; - const la_ptr_addr = la_symbol_ptr.addr + index * @sizeOf(u64); - - log.debug("writing stub at 0x{x}", .{stub_off}); - - var code = try self.base.allocator.alloc(u8, stubs.reserved2); - defer self.base.allocator.free(code); - - switch (self.base.options.target.cpu.arch) { - .x86_64 => { - assert(la_ptr_addr >= stub_addr + stubs.reserved2); - const displacement = try math.cast(u32, la_ptr_addr - stub_addr - stubs.reserved2); - // jmp - code[0] = 0xff; - code[1] = 0x25; - mem.writeIntLittle(u32, code[2..][0..4], displacement); - }, - .aarch64 => { - assert(la_ptr_addr >= stub_addr); - outer: { - const this_addr = stub_addr; - const target_addr = la_ptr_addr; - inner: { - const displacement = math.divExact(u64, target_addr - this_addr, 4) catch break :inner; - const literal = math.cast(u18, displacement) catch break :inner; - // ldr x16, literal - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - // nop - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.nop().toU32()); - break :outer; - } - inner: { - const new_this_addr = this_addr + @sizeOf(u32); - const displacement = math.divExact(u64, target_addr - new_this_addr, 4) catch break :inner; - const literal = math.cast(u18, displacement) catch break :inner; - // nop - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.nop().toU32()); - // ldr x16, literal - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr(.x16, .{ - .literal = literal, - }).toU32()); - break :outer; - } - // Use adrp followed by ldr(register). - const this_page = @intCast(i32, this_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @intCast(i21, target_page - this_page); - // adrp x16, pages - mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x16, pages).toU32()); - const narrowed = @truncate(u12, target_addr); - const offset = try math.divExact(u12, narrowed, 8); - // ldr x16, x16, offset - mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr(.x16, .{ - .register = .{ - .rn = .x16, - .offset = aarch64.Instruction.LoadStoreOffset.imm(offset), - }, - }).toU32()); - } - // br x16 - mem.writeIntLittle(u32, code[8..12], aarch64.Instruction.br(.x16).toU32()); - }, - else => unreachable, - } - try self.base.file.?.pwriteAll(code, stub_off); -} - fn relocateSymbolTable(self: *MachO) !void { const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const nlocals = self.locals.items.len; From 8d300927045f3f2be3cc2eb6c665a7b17d81a655 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 24 Aug 2021 13:16:43 +0200 Subject: [PATCH 21/78] macho: port stub and lazy ptr atoms to stage2 --- src/link/MachO.zig | 200 ++++++++++++++++++++++++++------------------- 1 file changed, 115 insertions(+), 85 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 1d45157f0a..f490583ed8 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -138,7 +138,11 @@ locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, globals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, undefs: std.ArrayListUnmanaged(macho.nlist_64) = .{}, symbol_resolver: std.AutoHashMapUnmanaged(u32, SymbolWithLoc) = .{}, -unresolved: std.AutoArrayHashMapUnmanaged(u32, void) = .{}, +unresolved: std.AutoArrayHashMapUnmanaged(u32, enum { + none, + stub, + got, +}) = .{}, locals_free_list: std.ArrayListUnmanaged(u32) = .{}, globals_free_list: std.ArrayListUnmanaged(u32) = .{}, @@ -147,8 +151,6 @@ dyld_private_sym_index: ?u32 = null, dyld_stub_binder_index: ?u32 = null, stub_preamble_sym_index: ?u32 = null, -stub_helper_stubs_start_off: ?u64 = null, - strtab: std.ArrayListUnmanaged(u8) = .{}, strtab_dir: std.HashMapUnmanaged(u32, u32, StringIndexContext, std.hash_map.default_max_load_percentage) = .{}, @@ -382,26 +384,6 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio try ds.writeLocalSymbol(0); } - { - const atom = try self.createDyldPrivateAtom(); - const match = MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }; - const vaddr = try self.allocateAtom(atom, match); - try self.writeAtom(atom, match); - } - - { - const atom = try self.createStubHelperPreambleAtom(); - const match = MatchingSection{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, - }; - const vaddr = try self.allocateAtom(atom, match); - try self.writeAtom(atom, match); - } - return self; } @@ -776,8 +758,8 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.parseInputFiles(positionals.items, self.base.options.sysroot); try self.parseLibs(libs.items, self.base.options.sysroot); + try self.resolveSymbols(); - try self.resolveDyldStubBinder(); try self.addRpathLCs(rpath_table.keys()); try self.addLoadDylibLCs(); try self.addDataInCodeLC(); @@ -799,7 +781,6 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { .seg = self.text_segment_cmd_index.?, .sect = self.stub_helper_section_index.?, }); - // TODO this is just a temp // We already prealloc stub helper size in populateMissingMetadata(), but // perhaps it's not needed after all? @@ -807,6 +788,7 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { const sect = &seg.sections.items[self.stub_helper_section_index.?]; sect.size -= atom.size; } + for (self.stubs.items) |_| { const stub_helper_atom = try self.createStubHelperAtom(); try self.allocateAtomStage1(stub_helper_atom, .{ @@ -826,21 +808,12 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { .sect = self.stubs_section_index.?, }); } + try self.allocateTextSegment(); try self.allocateDataConstSegment(); try self.allocateDataSegment(); self.allocateLinkeditSegment(); try self.allocateTextBlocks(); - { - // TODO just a temp - const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const sect = seg.sections.items[self.stub_helper_section_index.?]; - self.stub_helper_stubs_start_off = sect.offset + switch (self.base.options.target.cpu.arch) { - .x86_64 => @intCast(u64, 15), - .aarch64 => @intCast(u64, 6 * @sizeOf(u32)), - else => unreachable, - }; - } try self.flushZld(); } else { try self.flushModule(comp); @@ -1937,13 +1910,7 @@ fn writeTextBlocks(self: *MachO) !void { } } -fn createEmptyAtom( - self: *MachO, - match: MatchingSection, - local_sym_index: u32, - size: u64, - alignment: u32, -) !*TextBlock { +fn createEmptyAtom(self: *MachO, local_sym_index: u32, size: u64, alignment: u32) !*TextBlock { const code = try self.base.allocator.alloc(u8, size); defer self.base.allocator.free(code); mem.set(u8, code, 0); @@ -1964,15 +1931,20 @@ fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 { // TODO converge with `allocateTextBlock` const seg = self.load_commands.items[match.seg].Segment; const sect = seg.sections.items[match.sect]; - const base_addr = if (atom.prev) |prev| blk: { - const prev_atom_sym = self.locals.items[prev.local_sym_index]; - break :blk prev_atom_sym.n_value; + const sym = &self.locals.items[atom.local_sym_index]; + const base_addr = if (self.blocks.get(match)) |last| blk: { + const last_atom_sym = self.locals.items[last.local_sym_index]; + break :blk last_atom_sym.n_value + last.size; } else sect.addr; const atom_alignment = try math.powi(u32, 2, atom.alignment); const vaddr = mem.alignForwardGeneric(u64, base_addr, atom_alignment); + log.debug("allocating atom for symbol {s} at address 0x{x}", .{ self.getString(sym.n_strx), vaddr }); // TODO we should check if we need to expand the section or not like we // do in `allocateTextBlock`. + sym.n_value = vaddr; + sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + if (self.blocks.getPtr(match)) |last| { last.*.next = atom; atom.prev = last.*; @@ -1987,15 +1959,9 @@ fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 { fn writeAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !void { const seg = self.load_commands.items[match.seg].Segment; const sect = seg.sections.items[match.sect]; - - const vaddr = try self.allocateAtom(atom, match); - const sym = &self.locals.items[atom.local_sym_index]; - sym.n_value = vaddr; - sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - + const sym = self.locals.items[atom.local_sym_index]; + const file_offset = sect.offset + sym.n_value - sect.addr; try atom.resolveRelocs(self); - - const file_offset = sect.offset + vaddr - sect.addr; log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ self.getString(sym.n_strx), file_offset }); try self.base.file.?.pwriteAll(atom.code.items, file_offset); try self.writeLocalSymbol(atom.local_sym_index); @@ -2023,10 +1989,6 @@ fn allocateAtomStage1(self: *MachO, atom: *TextBlock, match: MatchingSection) !v } fn createDyldPrivateAtom(self: *MachO) !*TextBlock { - const match = MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }; const local_sym_index = @intCast(u32, self.locals.items.len); try self.locals.append(self.base.allocator, .{ .n_strx = try self.makeString("dyld_private"), @@ -2036,15 +1998,11 @@ fn createDyldPrivateAtom(self: *MachO) !*TextBlock { .n_value = 0, }); self.dyld_private_sym_index = local_sym_index; - return self.createEmptyAtom(match, local_sym_index, @sizeOf(u64), 3); + return self.createEmptyAtom(local_sym_index, @sizeOf(u64), 3); } fn createStubHelperPreambleAtom(self: *MachO) !*TextBlock { const arch = self.base.options.target.cpu.arch; - const match = MatchingSection{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, - }; const size: u64 = switch (arch) { .x86_64 => 15, .aarch64 => 6 * @sizeOf(u32), @@ -2063,7 +2021,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !*TextBlock { .n_desc = 0, .n_value = 0, }); - const atom = try self.createEmptyAtom(match, local_sym_index, size, alignment); + const atom = try self.createEmptyAtom(local_sym_index, size, alignment); switch (arch) { .x86_64 => { try atom.relocs.ensureUnusedCapacity(self.base.allocator, 2); @@ -2196,10 +2154,7 @@ fn createStubHelperAtom(self: *MachO) !*TextBlock { .n_desc = 0, .n_value = 0, }); - const atom = try self.createEmptyAtom(.{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, - }, local_sym_index, stub_size, alignment); + const atom = try self.createEmptyAtom(local_sym_index, stub_size, alignment); try atom.relocs.ensureTotalCapacity(self.base.allocator, 1); switch (arch) { @@ -2254,10 +2209,7 @@ fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32) !*TextBlock { .n_desc = 0, .n_value = 0, }); - const atom = try self.createEmptyAtom(.{ - .seg = self.data_segment_cmd_index.?, - .sect = self.la_symbol_ptr_section_index.?, - }, local_sym_index, @sizeOf(u64), 3); + const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), 3); try atom.relocs.append(self.base.allocator, .{ .offset = 0, .where = .local, @@ -2294,10 +2246,7 @@ fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*TextBlock { .n_desc = 0, .n_value = 0, }); - const atom = try self.createEmptyAtom(.{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stubs_section_index.?, - }, local_sym_index, stub_size, alignment); + const atom = try self.createEmptyAtom(local_sym_index, stub_size, alignment); switch (arch) { .x86_64 => { // jmp @@ -2547,12 +2496,14 @@ fn resolveSymbolsInObject( .where_index = undef_sym_index, .file = object_id, }); - _ = try self.unresolved.getOrPut(self.base.allocator, undef_sym_index); + try self.unresolved.putNoClobber(self.base.allocator, undef_sym_index, .none); } } } fn resolveSymbols(self: *MachO) !void { + const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; + var tentatives = std.AutoArrayHashMap(u32, void).init(self.base.allocator); defer tentatives.deinit(); @@ -2620,7 +2571,32 @@ fn resolveSymbols(self: *MachO) !void { const resolv = self.symbol_resolver.getPtr(sym.n_strx) orelse unreachable; resolv.local_sym_index = local_sym_index; - _ = try self.createEmptyAtom(match, local_sym_index, size, alignment); + const atom = try self.createEmptyAtom(local_sym_index, size, alignment); + if (use_stage1) { + try self.allocateAtomStage1(atom, match); + } + } + + try self.resolveDyldStubBinder(); + if (!use_stage1) { + { + const atom = try self.createDyldPrivateAtom(); + const match = MatchingSection{ + .seg = self.data_segment_cmd_index.?, + .sect = self.data_section_index.?, + }; + _ = try self.allocateAtom(atom, match); + try self.writeAtom(atom, match); + } + { + const atom = try self.createStubHelperPreambleAtom(); + const match = MatchingSection{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, + }; + _ = try self.allocateAtom(atom, match); + try self.writeAtom(atom, match); + } } // Third pass, resolve symbols in dynamic libraries. @@ -2643,7 +2619,43 @@ fn resolveSymbols(self: *MachO) !void { undef.n_type |= macho.N_EXT; undef.n_desc = @intCast(u16, ordinal + 1) * macho.N_SYMBOL_RESOLVER; - _ = self.unresolved.fetchSwapRemove(resolv.where_index); + if (self.unresolved.fetchSwapRemove(resolv.where_index)) |entry| { + switch (entry.value) { + .none => {}, + .got => return error.TODOGotHint, + .stub => { + const stub_helper_atom = blk: { + const atom = try self.createStubHelperAtom(); + const match = MatchingSection{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, + }; + _ = try self.allocateAtom(atom, match); + try self.writeAtom(atom, match); + break :blk atom; + }; + const laptr_atom = blk: { + const atom = try self.createLazyPointerAtom(stub_helper_atom.local_sym_index); + const match = MatchingSection{ + .seg = self.data_segment_cmd_index.?, + .sect = self.la_symbol_ptr_section_index.?, + }; + _ = try self.allocateAtom(atom, match); + try self.writeAtom(atom, match); + break :blk atom; + }; + { + const atom = try self.createStubAtom(laptr_atom.local_sym_index); + const match = MatchingSection{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stubs_section_index.?, + }; + _ = try self.allocateAtom(atom, match); + try self.writeAtom(atom, match); + } + }, + } + } continue :loop; } @@ -2695,7 +2707,10 @@ fn resolveSymbols(self: *MachO) !void { // We create an empty atom for this symbol. // TODO perhaps we should special-case special symbols? Create a separate // linked list of atoms? - _ = try self.createEmptyAtom(match, local_sym_index, 0, 0); + const atom = try self.createEmptyAtom(local_sym_index, 0, 0); + if (use_stage1) { + try self.allocateAtomStage1(atom, match); + } } for (self.unresolved.keys()) |index| { @@ -4554,14 +4569,12 @@ pub fn addExternFn(self: *MachO, name: []const u8) !u32 { .where = .undef, .where_index = sym_index, }); - _ = try self.unresolved.getOrPut(self.base.allocator, sym_index); + try self.unresolved.putNoClobber(self.base.allocator, sym_index, .stub); const stubs_index = @intCast(u32, self.stubs.items.len); try self.stubs.append(self.base.allocator, sym_index); try self.stubs_map.putNoClobber(self.base.allocator, sym_index, stubs_index); - // TODO create and write stub, stub_helper and lazy_ptr atoms - return sym_index; } @@ -5271,7 +5284,11 @@ fn writeLazyBindInfoTable(self: *MachO) !void { } fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { - if (self.stubs.items.len == 0) return; + const last_atom = self.blocks.get(.{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, + }) orelse return; + if (last_atom.local_sym_index == self.stub_preamble_sym_index.?) return; var stream = std.io.fixedBufferStream(buffer); var reader = stream.reader(); @@ -5316,7 +5333,6 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { else => {}, } } - assert(self.stubs.items.len <= offsets.items.len); const stub_size: u4 = switch (self.base.options.target.cpu.arch) { .x86_64 => 10, @@ -5329,8 +5345,22 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { else => unreachable, }; var buf: [@sizeOf(u32)]u8 = undefined; + + var first_atom = last_atom; + while (first_atom.prev) |prev| { + first_atom = prev; + } + + const start_off = blk: { + const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const sect = seg.sections.items[self.stub_helper_section_index.?]; + const sym = self.locals.items[first_atom.next.?.local_sym_index]; + break :blk sym.n_value - sect.addr + sect.offset; + }; + log.warn("start_off = 0x{x}", .{start_off}); + for (self.stubs.items) |_, index| { - const placeholder_off = self.stub_helper_stubs_start_off.? + index * stub_size + off; + const placeholder_off = start_off + index * stub_size + off; mem.writeIntLittle(u32, &buf, offsets.items[index]); try self.base.file.?.pwriteAll(&buf, placeholder_off); } From 32ce8238a8f722a17a408bf9d15db4a74662ae71 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 24 Aug 2021 15:19:28 +0200 Subject: [PATCH 22/78] macho: rewrite populateLazyBindOffsetsInStubHelper to use atoms Instead of referencing stub indices since these can now be obtained in a more generic fashion from the actual linked-list of atoms in the __stub_helper section. --- src/link/MachO.zig | 46 +++++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index f490583ed8..f3304c9e47 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -766,8 +766,6 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.addCodeSignatureLC(); if (use_stage1) { - try self.parseTextBlocks(); - try self.sortSections(); { const atom = try self.createDyldPrivateAtom(); try self.allocateAtomStage1(atom, .{ @@ -789,6 +787,9 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { sect.size -= atom.size; } + try self.parseTextBlocks(); + try self.sortSections(); + for (self.stubs.items) |_| { const stub_helper_atom = try self.createStubHelperAtom(); try self.allocateAtomStage1(stub_helper_atom, .{ @@ -5334,35 +5335,30 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { } } - const stub_size: u4 = switch (self.base.options.target.cpu.arch) { - .x86_64 => 10, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, - }; - const off: u4 = switch (self.base.options.target.cpu.arch) { + const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const sect = seg.sections.items[self.stub_helper_section_index.?]; + const stub_offset: u4 = switch (self.base.options.target.cpu.arch) { .x86_64 => 1, .aarch64 => 2 * @sizeOf(u32), else => unreachable, }; var buf: [@sizeOf(u32)]u8 = undefined; + var atom = last_atom; + _ = offsets.pop(); + while (offsets.popOrNull()) |bind_offset| { + const sym = self.locals.items[atom.local_sym_index]; + const file_offset = sect.offset + sym.n_value - sect.addr + stub_offset; + mem.writeIntLittle(u32, &buf, bind_offset); + log.debug("writing lazy binding offset in stub helper of 0x{x} for symbol {s} at offset 0x{x}", .{ + bind_offset, + self.getString(sym.n_strx), + file_offset, + }); + try self.base.file.?.pwriteAll(&buf, file_offset); - var first_atom = last_atom; - while (first_atom.prev) |prev| { - first_atom = prev; - } - - const start_off = blk: { - const seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const sect = seg.sections.items[self.stub_helper_section_index.?]; - const sym = self.locals.items[first_atom.next.?.local_sym_index]; - break :blk sym.n_value - sect.addr + sect.offset; - }; - log.warn("start_off = 0x{x}", .{start_off}); - - for (self.stubs.items) |_, index| { - const placeholder_off = start_off + index * stub_size + off; - mem.writeIntLittle(u32, &buf, offsets.items[index]); - try self.base.file.?.pwriteAll(&buf, placeholder_off); + if (atom.prev) |prev| { + atom = prev; + } else break; } } From aee6f14bcee7c96fcb65490cad96e06991caece7 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 24 Aug 2021 20:16:34 +0200 Subject: [PATCH 23/78] macho: use existing rebase mechanism to rebase lazy pointers --- src/link/MachO.zig | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index f3304c9e47..b6b70060a9 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2223,6 +2223,7 @@ fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32) !*TextBlock { }, }, }); + try atom.rebases.append(self.base.allocator, 0); self.lazy_binding_info_dirty = true; return atom; } @@ -3002,21 +3003,6 @@ fn writeRebaseInfoTableZld(self: *MachO) !void { } } - if (self.la_symbol_ptr_section_index) |idx| { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - - try pointers.ensureUnusedCapacity(self.stubs.items.len); - for (self.stubs.items) |_, i| { - pointers.appendAssumeCapacity(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - }); - } - } - std.sort.sort(bind.Pointer, pointers.items, {}, bind.pointerCmp); const size = try bind.rebaseInfoSize(pointers.items); @@ -5104,21 +5090,6 @@ fn writeRebaseInfoTable(self: *MachO) !void { } } - if (self.la_symbol_ptr_section_index) |idx| { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - - try pointers.ensureUnusedCapacity(self.stubs.items.len); - for (self.stubs.items) |_, i| { - pointers.appendAssumeCapacity(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - }); - } - } - std.sort.sort(bind.Pointer, pointers.items, {}, bind.pointerCmp); const size = try bind.rebaseInfoSize(pointers.items); From ea4bd2b87962794233df1693cdea3da266e27b86 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 25 Aug 2021 10:51:09 +0200 Subject: [PATCH 24/78] macho: add routine for creating Got atoms --- src/link/MachO.zig | 52 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index b6b70060a9..ba21e2476f 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1989,6 +1989,42 @@ fn allocateAtomStage1(self: *MachO, atom: *TextBlock, match: MatchingSection) !v } } +fn createGotAtom(self: *MachO, key: GotIndirectionKey) !*TextBlock { + const local_sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(self.base.allocator, .{ + .n_strx = try self.makeString("got_entry"), + .n_type = macho.N_SECT, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), 3); + switch (key.where) { + .local => { + try atom.relocs.append(self.base.allocator, .{ + .offset = 0, + .where = .local, + .where_index = key.where_index, + .payload = .{ + .unsigned = .{ + .subtractor = null, + .addend = 0, + .is_64bit = true, + }, + }, + }); + try atom.rebases.append(self.base.allocator, 0); + }, + .undef => { + try atom.bindings.append(self.base.allocator, .{ + .local_sym_index = key.where_index, + .offset = 0, + }); + }, + } + return atom; +} + fn createDyldPrivateAtom(self: *MachO) !*TextBlock { const local_sym_index = @intCast(u32, self.locals.items.len); try self.locals.append(self.base.allocator, .{ @@ -2777,13 +2813,19 @@ fn resolveDyldStubBinder(self: *MachO) !void { try self.got_entries.append(self.base.allocator, got_entry); try self.got_entries_map.putNoClobber(self.base.allocator, got_entry, got_index); - self.binding_info_dirty = true; - self.got_entries_count_dirty = true; - + const atom = try self.createGotAtom(got_entry); + const match = MatchingSection{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, + }; + // TODO remove once we can incrementally update in stage1 too. if (!(build_options.is_stage1 and self.base.options.use_stage1)) { - // TODO remove once we can incrementally update in stage1 too. - try self.writeGotEntry(got_index); + _ = try self.allocateAtom(atom, match); + try self.writeAtom(atom, match); + } else { + try self.allocateAtomStage1(atom, match); } + self.binding_info_dirty = true; } fn parseTextBlocks(self: *MachO) !void { From af57ccbe279d73f91358ec28fb4afd54868650e8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 25 Aug 2021 15:11:21 +0200 Subject: [PATCH 25/78] macho: generalise free list usage to all sections --- src/link/MachO.zig | 95 ++++++++++++++++++++++++++++++---------------- 1 file changed, 63 insertions(+), 32 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index ba21e2476f..b3077d0eb1 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -194,10 +194,10 @@ section_ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}, /// overcapacity can be negative. A simple way to have negative overcapacity is to /// allocate a fresh text block, which will have ideal capacity, and then grow it /// by 1 byte. It will then have -1 overcapacity. -text_block_free_list: std.ArrayListUnmanaged(*TextBlock) = .{}, +block_free_lists: std.AutoHashMapUnmanaged(MatchingSection, std.ArrayListUnmanaged(*TextBlock)) = .{}, /// Pointer to the last allocated text block -last_text_block: ?*TextBlock = null, +blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, /// List of TextBlocks that are owned directly by the linker. /// Currently these are only TextBlocks that are the result of linking @@ -206,8 +206,6 @@ last_text_block: ?*TextBlock = null, /// TODO consolidate this. managed_blocks: std.ArrayListUnmanaged(*TextBlock) = .{}, -blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, - /// Table of Decls that are currently alive. /// We store them here so that we can properly dispose of any allocated /// memory within the TextBlock in the incremental linker. @@ -1535,6 +1533,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (res) |match| { _ = try self.section_ordinals.getOrPut(self.base.allocator, match); + _ = try self.block_free_lists.getOrPutValue(self.base.allocator, match, .{}); } return res; @@ -3417,8 +3416,13 @@ pub fn deinit(self: *MachO) void { } self.managed_blocks.deinit(self.base.allocator); self.blocks.deinit(self.base.allocator); - self.text_block_free_list.deinit(self.base.allocator); - + { + var it = self.block_free_lists.valueIterator(); + while (it.next()) |free_list| { + free_list.deinit(self.base.allocator); + } + self.block_free_lists.deinit(self.base.allocator); + } for (self.decls.keys()) |decl| { decl.link.macho.deinit(self.base.allocator); } @@ -3441,16 +3445,21 @@ fn freeTextBlock(self: *MachO, text_block: *TextBlock) void { log.debug("freeTextBlock {*}", .{text_block}); text_block.deinit(self.base.allocator); + const match = MatchingSection{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }; + const text_block_free_list = self.block_free_lists.getPtr(match).?; var already_have_free_list_node = false; { var i: usize = 0; // TODO turn text_block_free_list into a hash map - while (i < self.text_block_free_list.items.len) { - if (self.text_block_free_list.items[i] == text_block) { - _ = self.text_block_free_list.swapRemove(i); + while (i < text_block_free_list.items.len) { + if (text_block_free_list.items[i] == text_block) { + _ = text_block_free_list.swapRemove(i); continue; } - if (self.text_block_free_list.items[i] == text_block.prev) { + if (text_block_free_list.items[i] == text_block.prev) { already_have_free_list_node = true; } i += 1; @@ -3458,10 +3467,15 @@ fn freeTextBlock(self: *MachO, text_block: *TextBlock) void { } // TODO process free list for dbg info just like we do above for vaddrs - if (self.last_text_block == text_block) { - // TODO shrink the __text section size here - self.last_text_block = text_block.prev; + if (self.blocks.getPtr(match)) |last_text_block| { + if (last_text_block.* == text_block) { + if (text_block.prev) |prev| { + // TODO shrink the __text section size here + last_text_block.* = prev; + } + } } + if (self.d_sym) |*ds| { if (ds.dbg_info_decl_first == text_block) { ds.dbg_info_decl_first = text_block.dbg_info_next; @@ -3478,7 +3492,7 @@ fn freeTextBlock(self: *MachO, text_block: *TextBlock) void { if (!already_have_free_list_node and prev.freeListEligible(self.*)) { // The free list is heuristics, it doesn't have to be perfect, so we can ignore // the OOM here. - self.text_block_free_list.append(self.base.allocator, prev) catch {}; + text_block_free_list.append(self.base.allocator, prev) catch {}; } } else { text_block.prev = null; @@ -4035,10 +4049,12 @@ pub fn populateMissingMetadata(self: *MachO) !void { .@"align" = alignment, .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ + const match = MatchingSection{ .seg = self.text_segment_cmd_index.?, .sect = self.text_section_index.?, - }); + }; + _ = try self.section_ordinals.getOrPut(self.base.allocator, match); + try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); self.load_commands_dirty = true; } @@ -4070,10 +4086,12 @@ pub fn populateMissingMetadata(self: *MachO) !void { .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, .reserved2 = stub_size, }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ + const match = MatchingSection{ .seg = self.text_segment_cmd_index.?, .sect = self.stubs_section_index.?, - }); + }; + _ = try self.section_ordinals.getOrPut(self.base.allocator, match); + try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); self.load_commands_dirty = true; } @@ -4103,10 +4121,12 @@ pub fn populateMissingMetadata(self: *MachO) !void { .@"align" = alignment, .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ + const match = MatchingSection{ .seg = self.text_segment_cmd_index.?, .sect = self.stub_helper_section_index.?, - }); + }; + _ = try self.section_ordinals.getOrPut(self.base.allocator, match); + try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); self.load_commands_dirty = true; } @@ -4148,10 +4168,12 @@ pub fn populateMissingMetadata(self: *MachO) !void { .@"align" = 3, // 2^3 = @sizeOf(u64) .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ + const match = MatchingSection{ .seg = self.data_const_segment_cmd_index.?, .sect = self.got_section_index.?, - }); + }; + _ = try self.section_ordinals.getOrPut(self.base.allocator, match); + try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); self.load_commands_dirty = true; } @@ -4193,10 +4215,12 @@ pub fn populateMissingMetadata(self: *MachO) !void { .@"align" = 3, // 2^3 = @sizeOf(u64) .flags = macho.S_LAZY_SYMBOL_POINTERS, }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ + const match = MatchingSection{ .seg = self.data_segment_cmd_index.?, .sect = self.la_symbol_ptr_section_index.?, - }); + }; + _ = try self.section_ordinals.getOrPut(self.base.allocator, match); + try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); self.load_commands_dirty = true; } @@ -4216,10 +4240,12 @@ pub fn populateMissingMetadata(self: *MachO) !void { .offset = @intCast(u32, off), .@"align" = 3, // 2^3 = @sizeOf(u64) }); - _ = try self.section_ordinals.getOrPut(self.base.allocator, .{ + const match = MatchingSection{ .seg = self.data_segment_cmd_index.?, .sect = self.data_section_index.?, - }); + }; + _ = try self.section_ordinals.getOrPut(self.base.allocator, match); + try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); self.load_commands_dirty = true; } @@ -4470,6 +4496,11 @@ pub fn populateMissingMetadata(self: *MachO) !void { fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, alignment: u64) !u64 { const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; const text_section = &text_segment.sections.items[self.text_section_index.?]; + const match = MatchingSection{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }; + const text_block_free_list = self.block_free_lists.getPtr(match).?; const new_block_ideal_capacity = padToIdeal(new_block_size); // We use these to indicate our intention to update metadata, placing the new block, @@ -4484,8 +4515,8 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, // The list is unordered. We'll just take the first thing that works. const vaddr = blk: { var i: usize = 0; - while (i < self.text_block_free_list.items.len) { - const big_block = self.text_block_free_list.items[i]; + while (i < text_block_free_list.items.len) { + const big_block = text_block_free_list.items[i]; // We now have a pointer to a live text block that has too much capacity. // Is it enough that we could fit this new text block? const sym = self.locals.items[big_block.local_sym_index]; @@ -4500,7 +4531,7 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, // should be deleted because the block that it points to has grown to take up // more of the extra capacity. if (!big_block.freeListEligible(self.*)) { - const bl = self.text_block_free_list.swapRemove(i); + const bl = text_block_free_list.swapRemove(i); bl.deinit(self.base.allocator); } else { i += 1; @@ -4519,7 +4550,7 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, free_list_removal = i; } break :blk new_start_vaddr; - } else if (self.last_text_block) |last| { + } else if (self.blocks.get(match)) |last| { const last_symbol = self.locals.items[last.local_sym_index]; // TODO We should pad out the excess capacity with NOPs. For executables, // no padding seems to be OK, but it will probably not be for objects. @@ -4538,7 +4569,7 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, const needed_size = (vaddr + new_block_size) - text_section.addr; assert(needed_size <= text_segment.inner.filesize); // TODO must move the entire text section. - self.last_text_block = text_block; + _ = try self.blocks.getOrPutValue(self.base.allocator, match, text_block); text_section.size = needed_size; self.load_commands_dirty = true; // TODO Make more granular. @@ -4567,7 +4598,7 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, text_block.next = null; } if (free_list_removal) |i| { - _ = self.text_block_free_list.swapRemove(i); + _ = text_block_free_list.swapRemove(i); } return vaddr; From ee786e5c3c1171af082f9463ca46b9ed08d2601e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 25 Aug 2021 17:23:30 +0200 Subject: [PATCH 26/78] macho: add GOT entries as actual atoms --- src/codegen.zig | 62 +++------- src/link/MachO.zig | 220 +++++++---------------------------- src/link/MachO/TextBlock.zig | 22 ++-- 3 files changed, 73 insertions(+), 231 deletions(-) diff --git a/src/codegen.zig b/src/codegen.zig index d5b106dbe3..77055162b9 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -2668,24 +2668,21 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { if (self.air.value(callee)) |func_value| { if (func_value.castTag(.function)) |func_payload| { const func = func_payload.data; - const got_addr = blk: { - const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; - const got = seg.sections.items[macho_file.got_section_index.?]; - const got_index = macho_file.got_entries_map.get(.{ - .where = .local, - .where_index = func.owner_decl.link.macho.local_sym_index, - }) orelse unreachable; - break :blk got.addr + got_index * @sizeOf(u64); - }; + // TODO I'm hacking my way through here by repurposing .memory for storing + // index to the GOT target symbol index. switch (arch) { .x86_64 => { - try self.genSetReg(Type.initTag(.u64), .rax, .{ .memory = got_addr }); + try self.genSetReg(Type.initTag(.u64), .rax, .{ + .memory = func.owner_decl.link.macho.local_sym_index, + }); // callq *%rax try self.code.ensureCapacity(self.code.items.len + 2); self.code.appendSliceAssumeCapacity(&[2]u8{ 0xff, 0xd0 }); }, .aarch64 => { - try self.genSetReg(Type.initTag(.u64), .x30, .{ .memory = got_addr }); + try self.genSetReg(Type.initTag(.u64), .x30, .{ + .memory = func.owner_decl.link.macho.local_sym_index, + }); // blr x30 writeInt(u32, try self.code.addManyAsArray(4), Instruction.blr(.x30).toU32()); }, @@ -4206,29 +4203,20 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { }).toU32()); if (self.bin_file.cast(link.File.MachO)) |macho_file| { - // TODO this is super awkward. We are reversing the address of the GOT entry here. - // We should probably have it cached or move the reloc adding somewhere else. - const got_addr = blk: { - const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; - const got = seg.sections.items[macho_file.got_section_index.?]; - break :blk got.addr; - }; - const where_index = blk: for (macho_file.got_entries.items) |key, id| { - if (got_addr + id * @sizeOf(u64) == addr) break :blk key.where_index; - } else unreachable; + // TODO I think the reloc might be in the wrong place. const decl = macho_file.active_decl.?; // Page reloc for adrp instruction. try decl.link.macho.relocs.append(self.bin_file.allocator, .{ .offset = offset, .where = .local, - .where_index = where_index, + .where_index = @intCast(u32, addr), .payload = .{ .page = .{ .kind = .got } }, }); // Pageoff reloc for adrp instruction. try decl.link.macho.relocs.append(self.bin_file.allocator, .{ .offset = offset + 4, .where = .local, - .where_index = where_index, + .where_index = @intCast(u32, addr), .payload = .{ .page_off = .{ .kind = .got } }, }); } else { @@ -4489,22 +4477,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const offset = @intCast(u32, self.code.items.len); if (self.bin_file.cast(link.File.MachO)) |macho_file| { - // TODO this is super awkward. We are reversing the address of the GOT entry here. - // We should probably have it cached or move the reloc adding somewhere else. - const got_addr = blk: { - const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; - const got = seg.sections.items[macho_file.got_section_index.?]; - break :blk got.addr; - }; - const where_index = blk: for (macho_file.got_entries.items) |key, id| { - if (got_addr + id * @sizeOf(u64) == x) break :blk key.where_index; - } else unreachable; + // TODO I think the reloc might be in the wrong place. const decl = macho_file.active_decl.?; // Load reloc for LEA instruction. try decl.link.macho.relocs.append(self.bin_file.allocator, .{ .offset = offset - 4, .where = .local, - .where_index = where_index, + .where_index = @intCast(u32, x), .payload = .{ .load = .{ .kind = .got } }, }); } else { @@ -4720,17 +4699,10 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const got = &elf_file.program_headers.items[elf_file.phdr_got_index.?]; const got_addr = got.p_vaddr + decl.link.elf.offset_table_index * ptr_bytes; return MCValue{ .memory = got_addr }; - } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { - const got_addr = blk: { - const seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; - const got = seg.sections.items[macho_file.got_section_index.?]; - const got_index = macho_file.got_entries_map.get(.{ - .where = .local, - .where_index = decl.link.macho.local_sym_index, - }) orelse unreachable; - break :blk got.addr + got_index * ptr_bytes; - }; - return MCValue{ .memory = got_addr }; + } else if (self.bin_file.cast(link.File.MachO)) |_| { + // TODO I'm hacking my way through here by repurposing .memory for storing + // index to the GOT target symbol index. + return MCValue{ .memory = decl.link.macho.local_sym_index }; } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { const got_addr = coff_file.offset_table_virtual_address + decl.link.coff.offset_table_index * ptr_bytes; return MCValue{ .memory = got_addr }; diff --git a/src/link/MachO.zig b/src/link/MachO.zig index b3077d0eb1..fc0c9bb45f 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -154,17 +154,13 @@ stub_preamble_sym_index: ?u32 = null, strtab: std.ArrayListUnmanaged(u8) = .{}, strtab_dir: std.HashMapUnmanaged(u32, u32, StringIndexContext, std.hash_map.default_max_load_percentage) = .{}, -got_entries: std.ArrayListUnmanaged(GotIndirectionKey) = .{}, -got_entries_map: std.AutoHashMapUnmanaged(GotIndirectionKey, u32) = .{}, - -got_entries_free_list: std.ArrayListUnmanaged(u32) = .{}, +got_entries_map: std.AutoArrayHashMapUnmanaged(GotIndirectionKey, *TextBlock) = .{}, stubs: std.ArrayListUnmanaged(u32) = .{}, stubs_map: std.AutoHashMapUnmanaged(u32, u32) = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, -got_entries_count_dirty: bool = false, load_commands_dirty: bool = false, rebase_info_dirty: bool = false, binding_info_dirty: bool = false, @@ -876,7 +872,6 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { self.error_flags.no_entry_point_found = false; } - assert(!self.got_entries_count_dirty); assert(!self.load_commands_dirty); assert(!self.rebase_info_dirty); assert(!self.binding_info_dirty); @@ -1731,16 +1726,9 @@ fn allocateTextSegment(self: *MachO) !void { fn allocateDataConstSegment(self: *MachO) !void { const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const nentries = @intCast(u32, self.got_entries.items.len); - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; seg.inner.fileoff = text_seg.inner.fileoff + text_seg.inner.filesize; seg.inner.vmaddr = text_seg.inner.vmaddr + text_seg.inner.vmsize; - - // Set got size - const got = &seg.sections.items[self.got_section_index.?]; - got.size += nentries * @sizeOf(u64); - try self.allocateSegment(self.data_const_segment_cmd_index.?, 0); } @@ -1927,7 +1915,7 @@ fn createEmptyAtom(self: *MachO, local_sym_index: u32, size: u64, alignment: u32 return atom; } -fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 { +pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 { // TODO converge with `allocateTextBlock` const seg = self.load_commands.items[match.seg].Segment; const sect = seg.sections.items[match.sect]; @@ -1936,7 +1924,8 @@ fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 { const last_atom_sym = self.locals.items[last.local_sym_index]; break :blk last_atom_sym.n_value + last.size; } else sect.addr; - const atom_alignment = try math.powi(u32, 2, atom.alignment); + // const atom_alignment = try math.powi(u32, 2, atom.alignment); TODO + const atom_alignment = math.powi(u32, 2, atom.alignment) catch unreachable; const vaddr = mem.alignForwardGeneric(u64, base_addr, atom_alignment); log.debug("allocating atom for symbol {s} at address 0x{x}", .{ self.getString(sym.n_strx), vaddr }); @@ -1956,7 +1945,7 @@ fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 { return vaddr; } -fn writeAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !void { +pub fn writeAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !void { const seg = self.load_commands.items[match.seg].Segment; const sect = seg.sections.items[match.sect]; const sym = self.locals.items[atom.local_sym_index]; @@ -1967,7 +1956,7 @@ fn writeAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !void { try self.writeLocalSymbol(atom.local_sym_index); } -fn allocateAtomStage1(self: *MachO, atom: *TextBlock, match: MatchingSection) !void { +pub fn allocateAtomStage1(self: *MachO, atom: *TextBlock, match: MatchingSection) !void { // Update target section's metadata // TODO should we update segment's size here too? // How does it tie with incremental space allocs? @@ -1988,7 +1977,7 @@ fn allocateAtomStage1(self: *MachO, atom: *TextBlock, match: MatchingSection) !v } } -fn createGotAtom(self: *MachO, key: GotIndirectionKey) !*TextBlock { +pub fn createGotAtom(self: *MachO, key: GotIndirectionKey) !*TextBlock { const local_sym_index = @intCast(u32, self.locals.items.len); try self.locals.append(self.base.allocator, .{ .n_strx = try self.makeString("got_entry"), @@ -2804,15 +2793,12 @@ fn resolveDyldStubBinder(self: *MachO) !void { } // Add dyld_stub_binder as the final GOT entry. - const got_index = @intCast(u32, self.got_entries.items.len); const got_entry = GotIndirectionKey{ .where = .undef, .where_index = self.dyld_stub_binder_index.?, }; - try self.got_entries.append(self.base.allocator, got_entry); - try self.got_entries_map.putNoClobber(self.base.allocator, got_entry, got_index); - const atom = try self.createGotAtom(got_entry); + try self.got_entries_map.putNoClobber(self.base.allocator, got_entry, atom); const match = MatchingSection{ .seg = self.data_const_segment_cmd_index.?, .sect = self.got_section_index.?, @@ -2917,7 +2903,6 @@ fn flushZld(self: *MachO) !void { sect.offset = 0; } - try self.writeGotEntries(); try self.setEntryPoint(); try self.writeRebaseInfoTableZld(); try self.writeBindInfoTableZld(); @@ -2952,29 +2937,6 @@ fn flushZld(self: *MachO) !void { } } -fn writeGotEntries(self: *MachO) !void { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[self.got_section_index.?]; - - var buffer = try self.base.allocator.alloc(u8, self.got_entries.items.len * @sizeOf(u64)); - defer self.base.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - var writer = stream.writer(); - - for (self.got_entries.items) |key| { - const address: u64 = switch (key.where) { - .local => self.locals.items[key.where_index].n_value, - .undef => 0, - }; - try writer.writeIntLittle(u64, address); - } - - log.debug("writing GOT pointers at 0x{x} to 0x{x}", .{ sect.offset, sect.offset + buffer.len }); - - try self.base.file.?.pwriteAll(buffer, sect.offset); -} - fn setEntryPoint(self: *MachO) !void { if (self.base.options.output_mode != .Exe) return; @@ -3028,22 +2990,6 @@ fn writeRebaseInfoTableZld(self: *MachO) !void { } } - if (self.got_section_index) |idx| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - - for (self.got_entries.items) |entry, i| { - if (entry.where == .undef) continue; - - try pointers.append(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - }); - } - } - std.sort.sort(bind.Pointer, pointers.items, {}, bind.pointerCmp); const size = try bind.rebaseInfoSize(pointers.items); @@ -3068,25 +3014,6 @@ fn writeBindInfoTableZld(self: *MachO) !void { var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); defer pointers.deinit(); - if (self.got_section_index) |idx| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - - for (self.got_entries.items) |entry, i| { - if (entry.where == .local) continue; - - const sym = self.undefs.items[entry.where_index]; - try pointers.append(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - .dylib_ordinal = @divExact(sym.n_desc, macho.N_SYMBOL_RESOLVER), - .name = self.getString(sym.n_strx), - }); - } - } - { var it = self.blocks.iterator(); while (it.next()) |entry| { @@ -3319,7 +3246,7 @@ fn writeSymbolTable(self: *MachO) !void { const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; const nstubs = @intCast(u32, self.stubs.items.len); - const ngot_entries = @intCast(u32, self.got_entries.items.len); + const ngot_entries = @intCast(u32, self.got_entries_map.keys().len); dysymtab.indirectsymoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); dysymtab.nindirectsyms = nstubs * 2 + ngot_entries; @@ -3344,10 +3271,10 @@ fn writeSymbolTable(self: *MachO) !void { } got.reserved1 = nstubs; - for (self.got_entries.items) |entry| { - switch (entry.where) { + for (self.got_entries_map.keys()) |key| { + switch (key.where) { .undef => { - try writer.writeIntLittle(u32, dysymtab.iundefsym + entry.where_index); + try writer.writeIntLittle(u32, dysymtab.iundefsym + key.where_index); }, .local => { try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); @@ -3373,9 +3300,7 @@ pub fn deinit(self: *MachO) void { } self.section_ordinals.deinit(self.base.allocator); - self.got_entries.deinit(self.base.allocator); self.got_entries_map.deinit(self.base.allocator); - self.got_entries_free_list.deinit(self.base.allocator); self.stubs.deinit(self.base.allocator); self.stubs_map.deinit(self.base.allocator); self.strtab_dir.deinit(self.base.allocator); @@ -3539,8 +3464,6 @@ pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { if (decl.link.macho.local_sym_index != 0) return; try self.locals.ensureUnusedCapacity(self.base.allocator, 1); - try self.got_entries.ensureUnusedCapacity(self.base.allocator, 1); - try self.decls.putNoClobber(self.base.allocator, decl, {}); if (self.locals_free_list.popOrNull()) |i| { @@ -3552,20 +3475,6 @@ pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { _ = self.locals.addOneAssumeCapacity(); } - const got_index: u32 = blk: { - if (self.got_entries_free_list.popOrNull()) |i| { - log.debug("reusing GOT entry index {d} for {s}", .{ i, decl.name }); - break :blk i; - } else { - const got_index = @intCast(u32, self.got_entries.items.len); - log.debug("allocating GOT entry index {d} for {s}", .{ got_index, decl.name }); - _ = self.got_entries.addOneAssumeCapacity(); - self.got_entries_count_dirty = true; - self.rebase_info_dirty = true; - break :blk got_index; - } - }; - self.locals.items[decl.link.macho.local_sym_index] = .{ .n_strx = 0, .n_type = 0, @@ -3573,12 +3482,19 @@ pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { .n_desc = 0, .n_value = 0, }; - const got_entry = GotIndirectionKey{ + + // TODO try popping from free list first before allocating a new GOT atom. + const key = GotIndirectionKey{ .where = .local, .where_index = decl.link.macho.local_sym_index, }; - self.got_entries.items[got_index] = got_entry; - try self.got_entries_map.putNoClobber(self.base.allocator, got_entry, got_index); + const got_atom = try self.createGotAtom(key); + _ = try self.allocateAtom(got_atom, .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, + }); + try self.got_entries_map.put(self.base.allocator, key, got_atom); + self.rebase_info_dirty = true; } pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liveness: Liveness) !void { @@ -3760,11 +3676,16 @@ fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 if (vaddr != symbol.n_value) { log.debug(" (writing new GOT entry)", .{}); - const got_index = self.got_entries_map.get(.{ + const match = MatchingSection{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, + }; + const got_atom = self.got_entries_map.get(.{ .where = .local, .where_index = decl.link.macho.local_sym_index, }) orelse unreachable; - try self.writeGotEntry(got_index); + // _ = try self.allocateAtom(got_atom, match); + try self.writeAtom(got_atom, match); } symbol.n_value = vaddr; @@ -3802,11 +3723,16 @@ fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 .n_desc = 0, .n_value = addr, }; - const got_index = self.got_entries_map.get(.{ + const match = MatchingSection{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, + }; + const got_atom = self.got_entries_map.get(.{ .where = .local, .where_index = decl.link.macho.local_sym_index, }) orelse unreachable; - try self.writeGotEntry(got_index); + // _ = try self.allocateAtom(got_atom, match); + try self.writeAtom(got_atom, match); try self.writeLocalSymbol(decl.link.macho.local_sym_index); if (self.d_sym) |*ds| @@ -3955,13 +3881,7 @@ pub fn freeDecl(self: *MachO, decl: *Module.Decl) void { if (decl.link.macho.local_sym_index != 0) { self.locals_free_list.append(self.base.allocator, decl.link.macho.local_sym_index) catch {}; - const got_key = GotIndirectionKey{ - .where = .local, - .where_index = decl.link.macho.local_sym_index, - }; - const got_index = self.got_entries_map.get(got_key) orelse unreachable; - _ = self.got_entries_map.remove(got_key); - self.got_entries_free_list.append(self.base.allocator, got_index) catch {}; + // TODO free GOT atom here. self.locals.items[decl.link.macho.local_sym_index].n_type = 0; decl.link.macho.local_sym_index = 0; @@ -4789,29 +4709,6 @@ fn findFreeSpaceLinkedit(self: *MachO, object_size: u64, min_alignment: u16, sta return st; } -fn writeGotEntry(self: *MachO, index: usize) !void { - const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[self.got_section_index.?]; - const off = sect.offset + @sizeOf(u64) * index; - - if (self.got_entries_count_dirty) { - // TODO relocate. - self.got_entries_count_dirty = false; - } - - const got_entry = self.got_entries.items[index]; - const sym = switch (got_entry.where) { - .local => self.locals.items[got_entry.where_index], - .undef => self.undefs.items[got_entry.where_index], - }; - log.debug("writing offset table entry [ 0x{x} => 0x{x} ({s}) ]", .{ - off, - sym.n_value, - self.getString(sym.n_strx), - }); - try self.base.file.?.pwriteAll(mem.asBytes(&sym.n_value), off); -} - fn relocateSymbolTable(self: *MachO) !void { const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const nlocals = self.locals.items.len; @@ -4901,7 +4798,7 @@ fn writeIndirectSymbolTable(self: *MachO) !void { const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; const nstubs = @intCast(u32, self.stubs.items.len); - const ngot_entries = @intCast(u32, self.got_entries.items.len); + const ngot_entries = @intCast(u32, self.got_entries_map.keys().len); const allocated_size = self.allocatedSizeLinkedit(dysymtab.indirectsymoff); const nindirectsyms = nstubs * 2 + ngot_entries; const needed_size = @intCast(u32, nindirectsyms * @sizeOf(u32)); @@ -4927,10 +4824,10 @@ fn writeIndirectSymbolTable(self: *MachO) !void { } got.reserved1 = nstubs; - for (self.got_entries.items) |entry| { - switch (entry.where) { + for (self.got_entries_map.keys()) |key| { + switch (key.where) { .undef => { - try writer.writeIntLittle(u32, dysymtab.iundefsym + entry.where_index); + try writer.writeIntLittle(u32, dysymtab.iundefsym + key.where_index); }, .local => { try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); @@ -5147,22 +5044,6 @@ fn writeRebaseInfoTable(self: *MachO) !void { } } - if (self.got_section_index) |idx| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - - for (self.got_entries.items) |entry, i| { - if (entry.where == .undef) continue; - - try pointers.append(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - }); - } - } - std.sort.sort(bind.Pointer, pointers.items, {}, bind.pointerCmp); const size = try bind.rebaseInfoSize(pointers.items); @@ -5199,25 +5080,6 @@ fn writeBindInfoTable(self: *MachO) !void { var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); defer pointers.deinit(); - if (self.got_section_index) |idx| { - const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?); - - for (self.got_entries.items) |entry, i| { - if (entry.where == .local) continue; - - const sym = self.undefs.items[entry.where_index]; - try pointers.append(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - .dylib_ordinal = @divExact(sym.n_desc, macho.N_SYMBOL_RESOLVER), - .name = self.getString(sym.n_strx), - }); - } - } - { var it = self.blocks.iterator(); while (it.next()) |entry| { diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index 4788487d3b..0cb0c4e2ae 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -1,6 +1,7 @@ const TextBlock = @This(); const std = @import("std"); +const build_options = @import("build_options"); const aarch64 = @import("../../codegen/aarch64.zig"); const assert = std.debug.assert; const commands = @import("commands.zig"); @@ -830,9 +831,18 @@ pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: R }; if (context.macho_file.got_entries_map.contains(key)) break :blk; - const got_index = @intCast(u32, context.macho_file.got_entries.items.len); - try context.macho_file.got_entries.append(context.allocator, key); - try context.macho_file.got_entries_map.putNoClobber(context.allocator, key, got_index); + const atom = try context.macho_file.createGotAtom(key); + try context.macho_file.got_entries_map.putNoClobber(context.macho_file.base.allocator, key, atom); + const match = MachO.MatchingSection{ + .seg = context.macho_file.data_const_segment_cmd_index.?, + .sect = context.macho_file.got_section_index.?, + }; + if (!(build_options.is_stage1 and context.macho_file.base.options.use_stage1)) { + _ = try context.macho_file.allocateAtom(atom, match); + try context.macho_file.writeAtom(atom, match); + } else { + try context.macho_file.allocateAtomStage1(atom, match); + } } else if (parsed_rel.payload == .unsigned) { switch (parsed_rel.where) { .undef => { @@ -1082,9 +1092,7 @@ pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void { }; if (is_via_got) { - const dc_seg = macho_file.load_commands.items[macho_file.data_const_segment_cmd_index.?].Segment; - const got = dc_seg.sections.items[macho_file.got_section_index.?]; - const got_index = macho_file.got_entries_map.get(.{ + const atom = macho_file.got_entries_map.get(.{ .where = switch (rel.where) { .local => .local, .undef => .undef, @@ -1099,7 +1107,7 @@ pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void { log.err(" this is an internal linker error", .{}); return error.FailedToResolveRelocationTarget; }; - break :blk got.addr + got_index * @sizeOf(u64); + break :blk macho_file.locals.items[atom.local_sym_index].n_value; } switch (rel.where) { From c12183b608c48ec4417c530916fc1a78b7d442f6 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 25 Aug 2021 23:00:58 +0200 Subject: [PATCH 27/78] macho: fix text atom allocation --- src/link/MachO.zig | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index fc0c9bb45f..94e0da4003 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -4024,11 +4024,17 @@ pub fn populateMissingMetadata(self: *MachO) !void { .aarch64 => 2, else => unreachable, // unhandled architecture type }; - const needed_size: u6 = switch (self.base.options.target.cpu.arch) { + const preamble_size: u6 = switch (self.base.options.target.cpu.arch) { .x86_64 => 15, .aarch64 => 6 * @sizeOf(u32), else => unreachable, }; + const stub_size: u4 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 10, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, + }; + const needed_size = stub_size * self.base.options.symbol_count_hint + preamble_size; const off = text_segment.findFreeSpace(needed_size, @alignOf(u64), self.header_pad); assert(off + needed_size <= text_segment.inner.fileoff + text_segment.inner.filesize); // TODO Must expand __TEXT segment. @@ -4420,7 +4426,7 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, .seg = self.text_segment_cmd_index.?, .sect = self.text_section_index.?, }; - const text_block_free_list = self.block_free_lists.getPtr(match).?; + var text_block_free_list = self.block_free_lists.get(match).?; const new_block_ideal_capacity = padToIdeal(new_block_size); // We use these to indicate our intention to update metadata, placing the new block, @@ -4489,7 +4495,7 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, const needed_size = (vaddr + new_block_size) - text_section.addr; assert(needed_size <= text_segment.inner.filesize); // TODO must move the entire text section. - _ = try self.blocks.getOrPutValue(self.base.allocator, match, text_block); + _ = try self.blocks.put(self.base.allocator, match, text_block); text_section.size = needed_size; self.load_commands_dirty = true; // TODO Make more granular. From d19d3342c29cc0f5d78cd91b61e5949a86811680 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 26 Aug 2021 11:43:11 +0200 Subject: [PATCH 28/78] macho: save lazy binding info as part of the atom --- src/link/MachO.zig | 149 +++++++++++++++++------------------ src/link/MachO/TextBlock.zig | 60 ++++++++++++-- src/link/MachO/bind.zig | 9 --- 3 files changed, 126 insertions(+), 92 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 94e0da4003..534ead9b42 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -155,9 +155,7 @@ strtab: std.ArrayListUnmanaged(u8) = .{}, strtab_dir: std.HashMapUnmanaged(u32, u32, StringIndexContext, std.hash_map.default_max_load_percentage) = .{}, got_entries_map: std.AutoArrayHashMapUnmanaged(GotIndirectionKey, *TextBlock) = .{}, - -stubs: std.ArrayListUnmanaged(u32) = .{}, -stubs_map: std.AutoHashMapUnmanaged(u32, u32) = .{}, +stubs_map: std.AutoArrayHashMapUnmanaged(u32, *TextBlock) = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, @@ -783,27 +781,6 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.parseTextBlocks(); try self.sortSections(); - - for (self.stubs.items) |_| { - const stub_helper_atom = try self.createStubHelperAtom(); - try self.allocateAtomStage1(stub_helper_atom, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, - }); - - const laptr_atom = try self.createLazyPointerAtom(stub_helper_atom.local_sym_index); - try self.allocateAtomStage1(laptr_atom, .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.la_symbol_ptr_section_index.?, - }); - - const stub_atom = try self.createStubAtom(laptr_atom.local_sym_index); - try self.allocateAtomStage1(stub_atom, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stubs_section_index.?, - }); - } - try self.allocateTextSegment(); try self.allocateDataConstSegment(); try self.allocateDataSegment(); @@ -2159,7 +2136,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !*TextBlock { return atom; } -fn createStubHelperAtom(self: *MachO) !*TextBlock { +pub fn createStubHelperAtom(self: *MachO) !*TextBlock { const arch = self.base.options.target.cpu.arch; const stub_size: u4 = switch (arch) { .x86_64 => 10, @@ -2225,7 +2202,7 @@ fn createStubHelperAtom(self: *MachO) !*TextBlock { return atom; } -fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32) !*TextBlock { +pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, lazy_binding_sym_index: u32) !*TextBlock { const local_sym_index = @intCast(u32, self.locals.items.len); try self.locals.append(self.base.allocator, .{ .n_strx = try self.makeString("lazy_ptr"), @@ -2248,11 +2225,15 @@ fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32) !*TextBlock { }, }); try atom.rebases.append(self.base.allocator, 0); + try atom.lazy_bindings.append(self.base.allocator, .{ + .local_sym_index = lazy_binding_sym_index, + .offset = 0, + }); self.lazy_binding_info_dirty = true; return atom; } -fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*TextBlock { +pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*TextBlock { const arch = self.base.options.target.cpu.arch; const alignment: u2 = switch (arch) { .x86_64 => 0, @@ -2661,7 +2642,10 @@ fn resolveSymbols(self: *MachO) !void { break :blk atom; }; const laptr_atom = blk: { - const atom = try self.createLazyPointerAtom(stub_helper_atom.local_sym_index); + const atom = try self.createLazyPointerAtom( + stub_helper_atom.local_sym_index, + resolv.where_index, + ); const match = MatchingSection{ .seg = self.data_segment_cmd_index.?, .sect = self.la_symbol_ptr_section_index.?, @@ -2990,8 +2974,6 @@ fn writeRebaseInfoTableZld(self: *MachO) !void { } } - std.sort.sort(bind.Pointer, pointers.items, {}, bind.pointerCmp); - const size = try bind.rebaseInfoSize(pointers.items); var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); defer self.base.allocator.free(buffer); @@ -3067,22 +3049,29 @@ fn writeLazyBindInfoTableZld(self: *MachO) !void { var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); defer pointers.deinit(); - if (self.la_symbol_ptr_section_index) |idx| { + if (self.la_symbol_ptr_section_index) |sect| blk: { + var atom = self.blocks.get(.{ + .seg = self.data_segment_cmd_index.?, + .sect = sect, + }) orelse break :blk; const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - try pointers.ensureUnusedCapacity(self.stubs.items.len); + while (true) { + const sym = self.locals.items[atom.local_sym_index]; + const base_offset = sym.n_value - seg.inner.vmaddr; - for (self.stubs.items) |import_id, i| { - const sym = self.undefs.items[import_id]; - pointers.appendAssumeCapacity(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - .dylib_ordinal = @divExact(sym.n_desc, macho.N_SYMBOL_RESOLVER), - .name = self.getString(sym.n_strx), - }); + for (atom.lazy_bindings.items) |binding| { + const bind_sym = self.undefs.items[binding.local_sym_index]; + try pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = self.data_segment_cmd_index.?, + .dylib_ordinal = @divExact(bind_sym.n_desc, macho.N_SYMBOL_RESOLVER), + .name = self.getString(bind_sym.n_strx), + }); + } + if (atom.prev) |prev| { + atom = prev; + } else break; } } @@ -3245,7 +3234,7 @@ fn writeSymbolTable(self: *MachO) !void { const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - const nstubs = @intCast(u32, self.stubs.items.len); + const nstubs = @intCast(u32, self.stubs_map.keys().len); const ngot_entries = @intCast(u32, self.got_entries_map.keys().len); dysymtab.indirectsymoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); @@ -3266,8 +3255,8 @@ fn writeSymbolTable(self: *MachO) !void { var writer = stream.writer(); stubs.reserved1 = 0; - for (self.stubs.items) |id| { - try writer.writeIntLittle(u32, dysymtab.iundefsym + id); + for (self.stubs_map.keys()) |key| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + key); } got.reserved1 = nstubs; @@ -3283,8 +3272,8 @@ fn writeSymbolTable(self: *MachO) !void { } la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; - for (self.stubs.items) |id| { - try writer.writeIntLittle(u32, dysymtab.iundefsym + id); + for (self.stubs_map.keys()) |key| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + key); } try self.base.file.?.pwriteAll(buf, dysymtab.indirectsymoff); @@ -3301,7 +3290,6 @@ pub fn deinit(self: *MachO) void { self.section_ordinals.deinit(self.base.allocator); self.got_entries_map.deinit(self.base.allocator); - self.stubs.deinit(self.base.allocator); self.stubs_map.deinit(self.base.allocator); self.strtab_dir.deinit(self.base.allocator); self.strtab.deinit(self.base.allocator); @@ -4557,10 +4545,6 @@ pub fn addExternFn(self: *MachO, name: []const u8) !u32 { }); try self.unresolved.putNoClobber(self.base.allocator, sym_index, .stub); - const stubs_index = @intCast(u32, self.stubs.items.len); - try self.stubs.append(self.base.allocator, sym_index); - try self.stubs_map.putNoClobber(self.base.allocator, sym_index, stubs_index); - return sym_index; } @@ -4803,7 +4787,7 @@ fn writeIndirectSymbolTable(self: *MachO) !void { const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - const nstubs = @intCast(u32, self.stubs.items.len); + const nstubs = @intCast(u32, self.stubs_map.keys().len); const ngot_entries = @intCast(u32, self.got_entries_map.keys().len); const allocated_size = self.allocatedSizeLinkedit(dysymtab.indirectsymoff); const nindirectsyms = nstubs * 2 + ngot_entries; @@ -4825,8 +4809,8 @@ fn writeIndirectSymbolTable(self: *MachO) !void { var writer = stream.writer(); stubs.reserved1 = 0; - for (self.stubs.items) |id| { - try writer.writeIntLittle(u32, dysymtab.iundefsym + id); + for (self.stubs_map.keys()) |key| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + key); } got.reserved1 = nstubs; @@ -4842,8 +4826,8 @@ fn writeIndirectSymbolTable(self: *MachO) !void { } la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; - for (self.stubs.items) |id| { - try writer.writeIntLittle(u32, dysymtab.iundefsym + id); + for (self.stubs_map.keys()) |key| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + key); } try self.base.file.?.pwriteAll(buf, dysymtab.indirectsymoff); @@ -5050,8 +5034,6 @@ fn writeRebaseInfoTable(self: *MachO) !void { } } - std.sort.sort(bind.Pointer, pointers.items, {}, bind.pointerCmp); - const size = try bind.rebaseInfoSize(pointers.items); var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); defer self.base.allocator.free(buffer); @@ -5151,22 +5133,29 @@ fn writeLazyBindInfoTable(self: *MachO) !void { var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); defer pointers.deinit(); - if (self.la_symbol_ptr_section_index) |idx| { + if (self.la_symbol_ptr_section_index) |sect| blk: { + var atom = self.blocks.get(.{ + .seg = self.data_segment_cmd_index.?, + .sect = sect, + }) orelse break :blk; const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = seg.sections.items[idx]; - const base_offset = sect.addr - seg.inner.vmaddr; - const segment_id = @intCast(u16, self.data_segment_cmd_index.?); - try pointers.ensureUnusedCapacity(self.stubs.items.len); + while (true) { + const sym = self.locals.items[atom.local_sym_index]; + const base_offset = sym.n_value - seg.inner.vmaddr; - for (self.stubs.items) |import_id, i| { - const sym = self.undefs.items[import_id]; - pointers.appendAssumeCapacity(.{ - .offset = base_offset + i * @sizeOf(u64), - .segment_id = segment_id, - .dylib_ordinal = @divExact(sym.n_desc, macho.N_SYMBOL_RESOLVER), - .name = self.getString(sym.n_strx), - }); + for (atom.lazy_bindings.items) |binding| { + const bind_sym = self.undefs.items[binding.local_sym_index]; + try pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = self.data_segment_cmd_index.?, + .dylib_ordinal = @divExact(bind_sym.n_desc, macho.N_SYMBOL_RESOLVER), + .name = self.getString(bind_sym.n_strx), + }); + } + if (atom.prev) |prev| { + atom = prev; + } else break; } } @@ -5203,6 +5192,15 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { }) orelse return; if (last_atom.local_sym_index == self.stub_preamble_sym_index.?) return; + // Because we insert lazy binding opcodes in reverse order (from last to the first atom), + // we need reverse the order of atom traversal here as well. + // TODO figure out a less error prone mechanims for this! + var atom = last_atom; + while (atom.prev) |prev| { + atom = prev; + } + atom = atom.next.?; + var stream = std.io.fixedBufferStream(buffer); var reader = stream.reader(); var offsets = std.ArrayList(u32).init(self.base.allocator); @@ -5255,7 +5253,6 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { else => unreachable, }; var buf: [@sizeOf(u32)]u8 = undefined; - var atom = last_atom; _ = offsets.pop(); while (offsets.popOrNull()) |bind_offset| { const sym = self.locals.items[atom.local_sym_index]; @@ -5268,8 +5265,8 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { }); try self.base.file.?.pwriteAll(&buf, file_offset); - if (atom.prev) |prev| { - atom = prev; + if (atom.next) |next| { + atom = next; } else break; } } diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index 0cb0c4e2ae..8fc1cef634 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -50,6 +50,9 @@ rebases: std.ArrayListUnmanaged(u64) = .{}, /// symbols (aka proxies aka imports) bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, +/// List of lazy bindings +lazy_bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, + /// List of data-in-code entries. This is currently specific to x86_64 only. dices: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, @@ -570,6 +573,7 @@ pub const empty = TextBlock{ pub fn deinit(self: *TextBlock, allocator: *Allocator) void { self.dices.deinit(allocator); + self.lazy_bindings.deinit(allocator); self.bindings.deinit(allocator); self.rebases.deinit(allocator); self.relocs.deinit(allocator); @@ -898,9 +902,53 @@ pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: R if (parsed_rel.where != .undef) break :blk; if (context.macho_file.stubs_map.contains(parsed_rel.where_index)) break :blk; - const stubs_index = @intCast(u32, context.macho_file.stubs.items.len); - try context.macho_file.stubs.append(context.allocator, parsed_rel.where_index); - try context.macho_file.stubs_map.putNoClobber(context.allocator, parsed_rel.where_index, stubs_index); + const stub_helper_atom = try context.macho_file.createStubHelperAtom(); + const laptr_atom = try context.macho_file.createLazyPointerAtom( + stub_helper_atom.local_sym_index, + parsed_rel.where_index, + ); + const stub_atom = try context.macho_file.createStubAtom(laptr_atom.local_sym_index); + try context.macho_file.stubs_map.putNoClobber(context.allocator, parsed_rel.where_index, stub_atom); + + if (build_options.is_stage1 and context.macho_file.base.options.use_stage1) { + try context.macho_file.allocateAtomStage1(stub_helper_atom, .{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stub_helper_section_index.?, + }); + try context.macho_file.allocateAtomStage1(laptr_atom, .{ + .seg = context.macho_file.data_segment_cmd_index.?, + .sect = context.macho_file.la_symbol_ptr_section_index.?, + }); + try context.macho_file.allocateAtomStage1(stub_atom, .{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stubs_section_index.?, + }); + } else { + { + const match = MachO.MatchingSection{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stub_helper_section_index.?, + }; + _ = try context.macho_file.allocateAtom(stub_helper_atom, match); + try context.macho_file.writeAtom(stub_helper_atom, match); + } + { + const match = MachO.MatchingSection{ + .seg = context.macho_file.data_segment_cmd_index.?, + .sect = context.macho_file.la_symbol_ptr_section_index.?, + }; + _ = try context.macho_file.allocateAtom(laptr_atom, match); + try context.macho_file.writeAtom(laptr_atom, match); + } + { + const match = MachO.MatchingSection{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stubs_section_index.?, + }; + _ = try context.macho_file.allocateAtom(stub_atom, match); + try context.macho_file.writeAtom(stub_atom, match); + } + } } } } @@ -1145,13 +1193,11 @@ pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void { break :blk sym.n_value; }, .undef => { - const stubs_index = macho_file.stubs_map.get(rel.where_index) orelse { + const atom = macho_file.stubs_map.get(rel.where_index) orelse { // TODO verify in TextBlock that the symbol is indeed dynamically bound. break :blk 0; // Dynamically bound by dyld. }; - const segment = macho_file.load_commands.items[macho_file.text_segment_cmd_index.?].Segment; - const stubs = segment.sections.items[macho_file.stubs_section_index.?]; - break :blk stubs.addr + stubs_index * stubs.reserved2; + break :blk macho_file.locals.items[atom.local_sym_index].n_value; }, } }; diff --git a/src/link/MachO/bind.zig b/src/link/MachO/bind.zig index 402e74d776..14a5ba3e30 100644 --- a/src/link/MachO/bind.zig +++ b/src/link/MachO/bind.zig @@ -9,15 +9,6 @@ pub const Pointer = struct { name: ?[]const u8 = null, }; -pub fn pointerCmp(context: void, a: Pointer, b: Pointer) bool { - _ = context; - if (a.segment_id < b.segment_id) return true; - if (a.segment_id == b.segment_id) { - return a.offset < b.offset; - } - return false; -} - pub fn rebaseInfoSize(pointers: []const Pointer) !u64 { var stream = std.io.countingWriter(std.io.null_writer); var writer = stream.writer(); From 432fb7054d3214f992af5a2ef652b72d531d660b Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 26 Aug 2021 13:45:28 +0200 Subject: [PATCH 29/78] macho: fix stub writing in self-hosted setting --- src/link/MachO.zig | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 534ead9b42..cb030f8d12 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2626,11 +2626,12 @@ fn resolveSymbols(self: *MachO) !void { undef.n_type |= macho.N_EXT; undef.n_desc = @intCast(u16, ordinal + 1) * macho.N_SYMBOL_RESOLVER; - if (self.unresolved.fetchSwapRemove(resolv.where_index)) |entry| { + if (self.unresolved.fetchSwapRemove(resolv.where_index)) |entry| outer_blk: { switch (entry.value) { .none => {}, .got => return error.TODOGotHint, .stub => { + if (self.stubs_map.contains(resolv.where_index)) break :outer_blk; const stub_helper_atom = blk: { const atom = try self.createStubHelperAtom(); const match = MatchingSection{ @@ -2654,7 +2655,7 @@ fn resolveSymbols(self: *MachO) !void { try self.writeAtom(atom, match); break :blk atom; }; - { + const stub_atom = blk: { const atom = try self.createStubAtom(laptr_atom.local_sym_index); const match = MatchingSection{ .seg = self.text_segment_cmd_index.?, @@ -2662,7 +2663,9 @@ fn resolveSymbols(self: *MachO) !void { }; _ = try self.allocateAtom(atom, match); try self.writeAtom(atom, match); - } + break :blk atom; + }; + try self.stubs_map.putNoClobber(self.base.allocator, resolv.where_index, stub_atom); }, } } From 570c75cb7440935990338ee733cce4a0b966c57b Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 26 Aug 2021 14:23:28 +0200 Subject: [PATCH 30/78] macho: write all atoms in flush so that we can resolve relocs --- src/link/MachO.zig | 107 +++++++++++++++-------------------- src/link/MachO/TextBlock.zig | 39 +++++-------- 2 files changed, 59 insertions(+), 87 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index cb030f8d12..1dd369c981 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -788,6 +788,7 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.allocateTextBlocks(); try self.flushZld(); } else { + try self.writeAtoms(); try self.flushModule(comp); } } @@ -1901,8 +1902,7 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 const last_atom_sym = self.locals.items[last.local_sym_index]; break :blk last_atom_sym.n_value + last.size; } else sect.addr; - // const atom_alignment = try math.powi(u32, 2, atom.alignment); TODO - const atom_alignment = math.powi(u32, 2, atom.alignment) catch unreachable; + const atom_alignment = try math.powi(u32, 2, atom.alignment); const vaddr = mem.alignForwardGeneric(u64, base_addr, atom_alignment); log.debug("allocating atom for symbol {s} at address 0x{x}", .{ self.getString(sym.n_strx), vaddr }); @@ -1954,6 +1954,20 @@ pub fn allocateAtomStage1(self: *MachO, atom: *TextBlock, match: MatchingSection } } +fn writeAtoms(self: *MachO) !void { + var it = self.blocks.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var atom: *TextBlock = entry.value_ptr.*; + + while (atom.prev) |prev| { + try self.writeAtom(atom, match); + atom = prev; + } + try self.writeAtom(atom, match); + } +} + pub fn createGotAtom(self: *MachO, key: GotIndirectionKey) !*TextBlock { const local_sym_index = @intCast(u32, self.locals.items.len); try self.locals.append(self.base.allocator, .{ @@ -2588,21 +2602,17 @@ fn resolveSymbols(self: *MachO) !void { if (!use_stage1) { { const atom = try self.createDyldPrivateAtom(); - const match = MatchingSection{ + _ = try self.allocateAtom(atom, .{ .seg = self.data_segment_cmd_index.?, .sect = self.data_section_index.?, - }; - _ = try self.allocateAtom(atom, match); - try self.writeAtom(atom, match); + }); } { const atom = try self.createStubHelperPreambleAtom(); - const match = MatchingSection{ + _ = try self.allocateAtom(atom, .{ .seg = self.text_segment_cmd_index.?, .sect = self.stub_helper_section_index.?, - }; - _ = try self.allocateAtom(atom, match); - try self.writeAtom(atom, match); + }); } } @@ -2634,12 +2644,10 @@ fn resolveSymbols(self: *MachO) !void { if (self.stubs_map.contains(resolv.where_index)) break :outer_blk; const stub_helper_atom = blk: { const atom = try self.createStubHelperAtom(); - const match = MatchingSection{ + _ = try self.allocateAtom(atom, .{ .seg = self.text_segment_cmd_index.?, .sect = self.stub_helper_section_index.?, - }; - _ = try self.allocateAtom(atom, match); - try self.writeAtom(atom, match); + }); break :blk atom; }; const laptr_atom = blk: { @@ -2647,22 +2655,18 @@ fn resolveSymbols(self: *MachO) !void { stub_helper_atom.local_sym_index, resolv.where_index, ); - const match = MatchingSection{ + _ = try self.allocateAtom(atom, .{ .seg = self.data_segment_cmd_index.?, .sect = self.la_symbol_ptr_section_index.?, - }; - _ = try self.allocateAtom(atom, match); - try self.writeAtom(atom, match); + }); break :blk atom; }; const stub_atom = blk: { const atom = try self.createStubAtom(laptr_atom.local_sym_index); - const match = MatchingSection{ + _ = try self.allocateAtom(atom, .{ .seg = self.text_segment_cmd_index.?, .sect = self.stubs_section_index.?, - }; - _ = try self.allocateAtom(atom, match); - try self.writeAtom(atom, match); + }); break :blk atom; }; try self.stubs_map.putNoClobber(self.base.allocator, resolv.where_index, stub_atom); @@ -2793,7 +2797,6 @@ fn resolveDyldStubBinder(self: *MachO) !void { // TODO remove once we can incrementally update in stage1 too. if (!(build_options.is_stage1 and self.base.options.use_stage1)) { _ = try self.allocateAtom(atom, match); - try self.writeAtom(atom, match); } else { try self.allocateAtomStage1(atom, match); } @@ -3480,10 +3483,6 @@ pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { .where_index = decl.link.macho.local_sym_index, }; const got_atom = try self.createGotAtom(key); - _ = try self.allocateAtom(got_atom, .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.got_section_index.?, - }); try self.got_entries_map.put(self.base.allocator, key, got_atom); self.rebase_info_dirty = true; } @@ -3549,9 +3548,7 @@ pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liv }, } - const symbol = try self.placeDecl(decl, decl.link.macho.code.items.len); - - try self.writeCode(symbol, decl.link.macho.code.items); + _ = try self.placeDecl(decl, decl.link.macho.code.items.len); if (debug_buffers) |db| { try self.d_sym.?.commitDeclDebugInfo( @@ -3642,9 +3639,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { }, } }; - const symbol = try self.placeDecl(decl, code.len); - - try self.writeCode(symbol, code); + _ = try self.placeDecl(decl, code.len); // Since we updated the vaddr and the size, each corresponding export symbol also // needs to be updated. @@ -3667,16 +3662,14 @@ fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 if (vaddr != symbol.n_value) { log.debug(" (writing new GOT entry)", .{}); - const match = MatchingSection{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.got_section_index.?, - }; const got_atom = self.got_entries_map.get(.{ .where = .local, .where_index = decl.link.macho.local_sym_index, }) orelse unreachable; - // _ = try self.allocateAtom(got_atom, match); - try self.writeAtom(got_atom, match); + _ = try self.allocateAtom(got_atom, .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, + }); } symbol.n_value = vaddr; @@ -3714,40 +3707,29 @@ fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 .n_desc = 0, .n_value = addr, }; - const match = MatchingSection{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.got_section_index.?, - }; const got_atom = self.got_entries_map.get(.{ .where = .local, .where_index = decl.link.macho.local_sym_index, }) orelse unreachable; - // _ = try self.allocateAtom(got_atom, match); - try self.writeAtom(got_atom, match); + _ = try self.allocateAtom(got_atom, .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, + }); try self.writeLocalSymbol(decl.link.macho.local_sym_index); if (self.d_sym) |*ds| try ds.writeLocalSymbol(decl.link.macho.local_sym_index); } - // Resolve relocations - try decl.link.macho.resolveRelocs(self); - // TODO this requires further investigation: should we dispose of resolved relocs, or keep them - // so that we can reapply them when moving/growing sections? - decl.link.macho.relocs.clearAndFree(self.base.allocator); + // // Resolve relocations + // try decl.link.macho.resolveRelocs(self); + // // TODO this requires further investigation: should we dispose of resolved relocs, or keep them + // // so that we can reapply them when moving/growing sections? + // decl.link.macho.relocs.clearAndFree(self.base.allocator); return symbol; } -fn writeCode(self: *MachO, symbol: *macho.nlist_64, code: []const u8) !void { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const text_section = text_segment.sections.items[self.text_section_index.?]; - const section_offset = symbol.n_value - text_section.addr; - const file_offset = text_section.offset + section_offset; - log.debug("writing code for symbol {s} at file offset 0x{x}", .{ self.getString(symbol.n_strx), file_offset }); - try self.base.file.?.pwriteAll(code, file_offset); -} - pub fn updateDeclLineNumber(self: *MachO, module: *Module, decl: *const Module.Decl) !void { if (self.d_sym) |*ds| { try ds.updateDeclLineNumber(module, decl); @@ -3983,7 +3965,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { .aarch64 => 3 * @sizeOf(u32), else => unreachable, // unhandled architecture type }; - const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const needed_size = stub_size * self.base.options.symbol_count_hint; const off = text_segment.findFreeSpace(needed_size, @alignOf(u64), self.header_pad); assert(off + needed_size <= text_segment.inner.fileoff + text_segment.inner.filesize); // TODO Must expand __TEXT segment. @@ -4739,8 +4721,9 @@ fn writeLocalSymbol(self: *MachO, index: usize) !void { try self.relocateSymbolTable(); const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const off = symtab.symoff + @sizeOf(macho.nlist_64) * index; - log.debug("writing local symbol {} at 0x{x}", .{ index, off }); - try self.base.file.?.pwriteAll(mem.asBytes(&self.locals.items[index]), off); + const sym = self.locals.items[index]; + log.debug("writing local symbol {s}: {} at 0x{x}", .{ self.getString(sym.n_strx), sym, off }); + try self.base.file.?.pwriteAll(mem.asBytes(&sym), off); } fn writeAllGlobalAndUndefSymbols(self: *MachO) !void { diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index 8fc1cef634..e2552a8b4f 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -75,6 +75,8 @@ dbg_info_off: u32, /// Size of the .debug_info tag for this Decl, not including padding. dbg_info_len: u32, +dirty: bool = true, + pub const SymbolAtOffset = struct { local_sym_index: u32, offset: u64, @@ -843,7 +845,6 @@ pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: R }; if (!(build_options.is_stage1 and context.macho_file.base.options.use_stage1)) { _ = try context.macho_file.allocateAtom(atom, match); - try context.macho_file.writeAtom(atom, match); } else { try context.macho_file.allocateAtomStage1(atom, match); } @@ -924,30 +925,18 @@ pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: R .sect = context.macho_file.stubs_section_index.?, }); } else { - { - const match = MachO.MatchingSection{ - .seg = context.macho_file.text_segment_cmd_index.?, - .sect = context.macho_file.stub_helper_section_index.?, - }; - _ = try context.macho_file.allocateAtom(stub_helper_atom, match); - try context.macho_file.writeAtom(stub_helper_atom, match); - } - { - const match = MachO.MatchingSection{ - .seg = context.macho_file.data_segment_cmd_index.?, - .sect = context.macho_file.la_symbol_ptr_section_index.?, - }; - _ = try context.macho_file.allocateAtom(laptr_atom, match); - try context.macho_file.writeAtom(laptr_atom, match); - } - { - const match = MachO.MatchingSection{ - .seg = context.macho_file.text_segment_cmd_index.?, - .sect = context.macho_file.stubs_section_index.?, - }; - _ = try context.macho_file.allocateAtom(stub_atom, match); - try context.macho_file.writeAtom(stub_atom, match); - } + _ = try context.macho_file.allocateAtom(stub_helper_atom, .{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stub_helper_section_index.?, + }); + _ = try context.macho_file.allocateAtom(laptr_atom, .{ + .seg = context.macho_file.data_segment_cmd_index.?, + .sect = context.macho_file.la_symbol_ptr_section_index.?, + }); + _ = try context.macho_file.allocateAtom(stub_atom, .{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stubs_section_index.?, + }); } } } From 8bd0687713855a41578605a3e1a6c9686a702bfe Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 26 Aug 2021 16:16:34 +0200 Subject: [PATCH 31/78] macho: port mechanism for allocating symbols within atoms such as entire sections represented as a single atom. --- src/link/MachO.zig | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 1dd369c981..390d4c243e 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1908,8 +1908,23 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 // TODO we should check if we need to expand the section or not like we // do in `allocateTextBlock`. + const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); sym.n_value = vaddr; - sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + sym.n_sect = n_sect; + + // Update each alias (if any) + for (atom.aliases.items) |index| { + const alias_sym = &self.locals.items[index]; + alias_sym.n_value = vaddr; + alias_sym.n_sect = n_sect; + } + + // Update each symbol contained within the TextBlock + for (atom.contained.items) |sym_at_off| { + const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; + contained_sym.n_value = vaddr + sym_at_off.offset; + contained_sym.n_sect = n_sect; + } if (self.blocks.getPtr(match)) |last| { last.*.next = atom; From 705cd64080a5555425de7d13a9fbc6c19af65bde Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 27 Aug 2021 10:45:56 +0200 Subject: [PATCH 32/78] macho: preactively add zerofill sections in correct order The required order on macOS nowadays is: * __data * __thread_vars * __thread_data * __thread_bss * __bss --- src/link/MachO.zig | 106 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 1 deletion(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 390d4c243e..73da945522 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -780,7 +780,7 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { } try self.parseTextBlocks(); - try self.sortSections(); + // try self.sortSections(); try self.allocateTextSegment(); try self.allocateDataConstSegment(); try self.allocateDataSegment(); @@ -4163,6 +4163,110 @@ pub fn populateMissingMetadata(self: *MachO) !void { self.load_commands_dirty = true; } + if (self.tlv_section_index == null) { + const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + self.tlv_section_index = @intCast(u16, data_segment.sections.items.len); + + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const off = data_segment.findFreeSpace(needed_size, @alignOf(u64), null); + assert(off + needed_size <= data_segment.inner.fileoff + data_segment.inner.filesize); // TODO Must expand __DATA segment. + + log.debug("found __thread_vars section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); + + try data_segment.addSection(self.base.allocator, "__thread_vars", .{ + .addr = data_segment.inner.vmaddr + off - data_segment.inner.fileoff, + .size = needed_size, + .offset = @intCast(u32, off), + .@"align" = 3, // 2^3 = @sizeOf(u64) + .flags = macho.S_THREAD_LOCAL_VARIABLES, + }); + const match = MatchingSection{ + .seg = self.data_segment_cmd_index.?, + .sect = self.tlv_section_index.?, + }; + _ = try self.section_ordinals.getOrPut(self.base.allocator, match); + try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); + self.load_commands_dirty = true; + } + + if (self.tlv_data_section_index == null) { + const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + self.tlv_data_section_index = @intCast(u16, data_segment.sections.items.len); + + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const off = data_segment.findFreeSpace(needed_size, @alignOf(u64), null); + assert(off + needed_size <= data_segment.inner.fileoff + data_segment.inner.filesize); // TODO Must expand __DATA segment. + + log.debug("found __thread_data section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); + + try data_segment.addSection(self.base.allocator, "__thread_data", .{ + .addr = data_segment.inner.vmaddr + off - data_segment.inner.fileoff, + .size = needed_size, + .offset = @intCast(u32, off), + .@"align" = 3, // 2^3 = @sizeOf(u64) + .flags = macho.S_THREAD_LOCAL_REGULAR, + }); + const match = MatchingSection{ + .seg = self.data_segment_cmd_index.?, + .sect = self.tlv_data_section_index.?, + }; + _ = try self.section_ordinals.getOrPut(self.base.allocator, match); + try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); + self.load_commands_dirty = true; + } + + if (self.tlv_bss_section_index == null) { + const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + self.tlv_bss_section_index = @intCast(u16, data_segment.sections.items.len); + + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const off = data_segment.findFreeSpace(needed_size, @alignOf(u64), null); + assert(off + needed_size <= data_segment.inner.fileoff + data_segment.inner.filesize); // TODO Must expand __DATA segment. + + log.debug("found __thread_bss section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); + + try data_segment.addSection(self.base.allocator, "__thread_bss", .{ + .addr = data_segment.inner.vmaddr + off - data_segment.inner.fileoff, + .size = needed_size, + .offset = @intCast(u32, off), + .@"align" = 3, // 2^3 = @sizeOf(u64) + .flags = macho.S_THREAD_LOCAL_ZEROFILL, + }); + const match = MatchingSection{ + .seg = self.data_segment_cmd_index.?, + .sect = self.tlv_bss_section_index.?, + }; + _ = try self.section_ordinals.getOrPut(self.base.allocator, match); + try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); + self.load_commands_dirty = true; + } + + if (self.bss_section_index == null) { + const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + self.bss_section_index = @intCast(u16, data_segment.sections.items.len); + + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const off = data_segment.findFreeSpace(needed_size, @alignOf(u64), null); + assert(off + needed_size <= data_segment.inner.fileoff + data_segment.inner.filesize); // TODO Must expand __DATA segment. + + log.debug("found __bss section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); + + try data_segment.addSection(self.base.allocator, "__bss", .{ + .addr = data_segment.inner.vmaddr + off - data_segment.inner.fileoff, + .size = 0, + .offset = @intCast(u32, off), + .@"align" = 3, // 2^3 = @sizeOf(u64) + .flags = macho.S_ZEROFILL, + }); + const match = MatchingSection{ + .seg = self.data_segment_cmd_index.?, + .sect = self.bss_section_index.?, + }; + _ = try self.section_ordinals.getOrPut(self.base.allocator, match); + try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); + self.load_commands_dirty = true; + } + if (self.linkedit_segment_cmd_index == null) { self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); const address_and_offset = self.nextSegmentAddressAndOffset(); From 1e65d41a659894eed6beb92ae89ca531f6f9dfd0 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 27 Aug 2021 11:30:02 +0200 Subject: [PATCH 33/78] macho: merge __common with __bss section --- src/link/MachO.zig | 70 ++++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 46 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 73da945522..473bd077d8 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -127,7 +127,6 @@ tlv_bss_section_index: ?u16 = null, la_symbol_ptr_section_index: ?u16 = null, data_section_index: ?u16 = null, bss_section_index: ?u16 = null, -common_section_index: ?u16 = null, objc_const_section_index: ?u16 = null, objc_selrefs_section_index: ?u16 = null, @@ -1222,31 +1221,17 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; }, macho.S_ZEROFILL => { - if (mem.eql(u8, sectname, "__common")) { - if (self.common_section_index == null) { - self.common_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__common", .{ - .flags = macho.S_ZEROFILL, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.common_section_index.?, - }; - } else { - if (self.bss_section_index == null) { - self.bss_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__bss", .{ - .flags = macho.S_ZEROFILL, - }); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; + if (self.bss_section_index == null) { + self.bss_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__bss", .{ + .flags = macho.S_ZEROFILL, + }); } + + break :blk .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.bss_section_index.?, + }; }, macho.S_THREAD_LOCAL_VARIABLES => { if (self.tlv_section_index == null) { @@ -1598,7 +1583,6 @@ fn sortSections(self: *MachO) !void { &self.tlv_data_section_index, &self.tlv_bss_section_index, &self.bss_section_index, - &self.common_section_index, }; for (indices) |maybe_index| { const new_index: u16 = if (maybe_index.*) |index| blk: { @@ -2578,16 +2562,16 @@ fn resolveSymbols(self: *MachO) !void { while (tentatives.popOrNull()) |entry| { const sym = &self.globals.items[entry.key]; const match: MatchingSection = blk: { - if (self.common_section_index == null) { + if (self.bss_section_index == null) { const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.common_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__common", .{ + self.bss_section_index = @intCast(u16, data_seg.sections.items.len); + try data_seg.addSection(self.base.allocator, "__bss", .{ .flags = macho.S_ZEROFILL, }); } break :blk .{ .seg = self.data_segment_cmd_index.?, - .sect = self.common_section_index.?, + .sect = self.bss_section_index.?, }; }; _ = try self.section_ordinals.getOrPut(self.base.allocator, match); @@ -2890,23 +2874,17 @@ fn addLoadDylibLCs(self: *MachO) !void { fn flushZld(self: *MachO) !void { try self.writeTextBlocks(); - if (self.common_section_index) |index| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - sect.offset = 0; - } + // if (self.bss_section_index) |index| { + // const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + // const sect = &seg.sections.items[index]; + // sect.offset = 0; + // } - if (self.bss_section_index) |index| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - sect.offset = 0; - } - - if (self.tlv_bss_section_index) |index| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[index]; - sect.offset = 0; - } + // if (self.tlv_bss_section_index) |index| { + // const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + // const sect = &seg.sections.items[index]; + // sect.offset = 0; + // } try self.setEntryPoint(); try self.writeRebaseInfoTableZld(); From ad4a8e76654cbb6cab8b2de49a3b83c941bb26c7 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 27 Aug 2021 13:47:43 +0200 Subject: [PATCH 34/78] macho: keep actual file offset for zerofill sections separately This way, we will conform to the standard practice of setting the offset within the section header to the beginning of the file and we will be able to track the location of the section in the file for incremental updates. --- src/link/MachO.zig | 74 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 19 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 473bd077d8..9e170ca1f4 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -133,6 +133,9 @@ objc_selrefs_section_index: ?u16 = null, objc_classrefs_section_index: ?u16 = null, objc_data_section_index: ?u16 = null, +bss_file_offset: ?u64 = null, +tlv_bss_file_offset: ?u64 = null, + locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, globals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, undefs: std.ArrayListUnmanaged(macho.nlist_64) = .{}, @@ -1714,11 +1717,30 @@ fn allocateSegment(self: *MachO, index: u16, offset: u64) !void { // Allocate the sections according to their alignment at the beginning of the segment. var start: u64 = offset; - for (seg.sections.items) |*sect| { + for (seg.sections.items) |*sect, sect_id| { const alignment = try math.powi(u32, 2, sect.@"align"); const start_aligned = mem.alignForwardGeneric(u64, start, alignment); const end_aligned = mem.alignForwardGeneric(u64, start_aligned + sect.size, alignment); - sect.offset = @intCast(u32, seg.inner.fileoff + start_aligned); + const file_offset = @intCast(u32, seg.inner.fileoff + start_aligned); + + blk: { + if (index == self.data_segment_cmd_index.?) { + if (self.bss_section_index) |idx| { + if (sect_id == idx) { + self.bss_file_offset = file_offset; + break :blk; + } + } + if (self.tlv_bss_section_index) |idx| { + if (sect_id == idx) { + self.tlv_bss_file_offset = file_offset; + break :blk; + } + } + } + sect.offset = @intCast(u32, seg.inner.fileoff + start_aligned); + } + sect.addr = seg.inner.vmaddr + start_aligned; start = end_aligned; } @@ -1821,6 +1843,18 @@ fn writeTextBlocks(self: *MachO) !void { var code = try self.base.allocator.alloc(u8, sect.size); defer self.base.allocator.free(code); + const file_offset: u64 = blk: { + if (self.data_segment_cmd_index.? == match.seg) { + if (self.bss_section_index) |idx| { + if (idx == match.sect) break :blk self.bss_file_offset.?; + } + if (self.tlv_bss_section_index) |idx| { + if (idx == match.sect) break :blk self.tlv_bss_file_offset.?; + } + } + break :blk sect.offset; + }; + if (sect_type == macho.S_ZEROFILL or sect_type == macho.S_THREAD_LOCAL_ZEROFILL) { mem.set(u8, code, 0); } else { @@ -1856,7 +1890,7 @@ fn writeTextBlocks(self: *MachO) !void { mem.set(u8, code[base_off..], 0); } - try self.base.file.?.pwriteAll(code, sect.offset); + try self.base.file.?.pwriteAll(code, file_offset); } } @@ -1925,7 +1959,18 @@ pub fn writeAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !void { const seg = self.load_commands.items[match.seg].Segment; const sect = seg.sections.items[match.sect]; const sym = self.locals.items[atom.local_sym_index]; - const file_offset = sect.offset + sym.n_value - sect.addr; + const sect_offset: u64 = blk: { + if (self.data_segment_cmd_index.? == match.seg) { + if (self.bss_section_index) |idx| { + if (idx == match.sect) break :blk self.bss_file_offset.?; + } + if (self.tlv_bss_section_index) |idx| { + if (idx == match.sect) break :blk self.tlv_bss_file_offset.?; + } + } + break :blk sect.offset; + }; + const file_offset = sect_offset + sym.n_value - sect.addr; try atom.resolveRelocs(self); log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ self.getString(sym.n_strx), file_offset }); try self.base.file.?.pwriteAll(atom.code.items, file_offset); @@ -2873,19 +2918,6 @@ fn addLoadDylibLCs(self: *MachO) !void { fn flushZld(self: *MachO) !void { try self.writeTextBlocks(); - - // if (self.bss_section_index) |index| { - // const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - // const sect = &seg.sections.items[index]; - // sect.offset = 0; - // } - - // if (self.tlv_bss_section_index) |index| { - // const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - // const sect = &seg.sections.items[index]; - // sect.offset = 0; - // } - try self.setEntryPoint(); try self.writeRebaseInfoTableZld(); try self.writeBindInfoTableZld(); @@ -4203,10 +4235,12 @@ pub fn populateMissingMetadata(self: *MachO) !void { log.debug("found __thread_bss section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); + // We keep offset to the section in a separate variable as the actual section is usually pointing at the + // beginning of the file. + self.tlv_bss_file_offset = off; try data_segment.addSection(self.base.allocator, "__thread_bss", .{ .addr = data_segment.inner.vmaddr + off - data_segment.inner.fileoff, .size = needed_size, - .offset = @intCast(u32, off), .@"align" = 3, // 2^3 = @sizeOf(u64) .flags = macho.S_THREAD_LOCAL_ZEROFILL, }); @@ -4229,10 +4263,12 @@ pub fn populateMissingMetadata(self: *MachO) !void { log.debug("found __bss section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); + // We keep offset to the section in a separate variable as the actual section is usually pointing at the + // beginning of the file. + self.bss_file_offset = off; try data_segment.addSection(self.base.allocator, "__bss", .{ .addr = data_segment.inner.vmaddr + off - data_segment.inner.fileoff, .size = 0, - .offset = @intCast(u32, off), .@"align" = 3, // 2^3 = @sizeOf(u64) .flags = macho.S_ZEROFILL, }); From a14e98fcaceed92be8b1859d7ae331176d4e4d84 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 27 Aug 2021 20:32:11 +0200 Subject: [PATCH 35/78] macho: remove sorting sections and refactor atom parsing in objects --- src/link/MachO.zig | 163 ++------------------------------------ src/link/MachO/Object.zig | 99 +++++------------------ 2 files changed, 29 insertions(+), 233 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 9e170ca1f4..effe6d641b 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -782,7 +782,6 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { } try self.parseTextBlocks(); - // try self.sortSections(); try self.allocateTextSegment(); try self.allocateDataConstSegment(); try self.allocateDataSegment(); @@ -1500,158 +1499,6 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio return res; } -fn sortSections(self: *MachO) !void { - var text_index_mapping = std.AutoHashMap(u16, u16).init(self.base.allocator); - defer text_index_mapping.deinit(); - var data_const_index_mapping = std.AutoHashMap(u16, u16).init(self.base.allocator); - defer data_const_index_mapping.deinit(); - var data_index_mapping = std.AutoHashMap(u16, u16).init(self.base.allocator); - defer data_index_mapping.deinit(); - - { - // __TEXT segment - const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - var sections = seg.sections.toOwnedSlice(self.base.allocator); - defer self.base.allocator.free(sections); - try seg.sections.ensureCapacity(self.base.allocator, sections.len); - - const indices = &[_]*?u16{ - &self.text_section_index, - &self.stubs_section_index, - &self.stub_helper_section_index, - &self.gcc_except_tab_section_index, - &self.cstring_section_index, - &self.ustring_section_index, - &self.text_const_section_index, - &self.objc_methlist_section_index, - &self.objc_methname_section_index, - &self.objc_methtype_section_index, - &self.objc_classname_section_index, - &self.eh_frame_section_index, - }; - for (indices) |maybe_index| { - const new_index: u16 = if (maybe_index.*) |index| blk: { - const idx = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sections[index]); - try text_index_mapping.putNoClobber(index, idx); - break :blk idx; - } else continue; - maybe_index.* = new_index; - } - } - - { - // __DATA_CONST segment - const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - var sections = seg.sections.toOwnedSlice(self.base.allocator); - defer self.base.allocator.free(sections); - try seg.sections.ensureCapacity(self.base.allocator, sections.len); - - const indices = &[_]*?u16{ - &self.got_section_index, - &self.mod_init_func_section_index, - &self.mod_term_func_section_index, - &self.data_const_section_index, - &self.objc_cfstring_section_index, - &self.objc_classlist_section_index, - &self.objc_imageinfo_section_index, - }; - for (indices) |maybe_index| { - const new_index: u16 = if (maybe_index.*) |index| blk: { - const idx = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sections[index]); - try data_const_index_mapping.putNoClobber(index, idx); - break :blk idx; - } else continue; - maybe_index.* = new_index; - } - } - - { - // __DATA segment - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - var sections = seg.sections.toOwnedSlice(self.base.allocator); - defer self.base.allocator.free(sections); - try seg.sections.ensureCapacity(self.base.allocator, sections.len); - - // __DATA segment - const indices = &[_]*?u16{ - &self.la_symbol_ptr_section_index, - &self.objc_const_section_index, - &self.objc_selrefs_section_index, - &self.objc_classrefs_section_index, - &self.objc_data_section_index, - &self.data_section_index, - &self.tlv_section_index, - &self.tlv_data_section_index, - &self.tlv_bss_section_index, - &self.bss_section_index, - }; - for (indices) |maybe_index| { - const new_index: u16 = if (maybe_index.*) |index| blk: { - const idx = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sections[index]); - try data_index_mapping.putNoClobber(index, idx); - break :blk idx; - } else continue; - maybe_index.* = new_index; - } - } - - { - var transient: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}; - try transient.ensureCapacity(self.base.allocator, self.blocks.count()); - - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const old = entry.key_ptr.*; - const sect = if (old.seg == self.text_segment_cmd_index.?) - text_index_mapping.get(old.sect).? - else if (old.seg == self.data_const_segment_cmd_index.?) - data_const_index_mapping.get(old.sect).? - else - data_index_mapping.get(old.sect).?; - transient.putAssumeCapacityNoClobber(.{ - .seg = old.seg, - .sect = sect, - }, entry.value_ptr.*); - } - - self.blocks.clearAndFree(self.base.allocator); - self.blocks.deinit(self.base.allocator); - self.blocks = transient; - } - - { - // Create new section ordinals. - self.section_ordinals.clearRetainingCapacity(); - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - for (text_seg.sections.items) |_, sect_id| { - const res = self.section_ordinals.getOrPutAssumeCapacity(.{ - .seg = self.text_segment_cmd_index.?, - .sect = @intCast(u16, sect_id), - }); - assert(!res.found_existing); - } - const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - for (data_const_seg.sections.items) |_, sect_id| { - const res = self.section_ordinals.getOrPutAssumeCapacity(.{ - .seg = self.data_const_segment_cmd_index.?, - .sect = @intCast(u16, sect_id), - }); - assert(!res.found_existing); - } - const data_seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - for (data_seg.sections.items) |_, sect_id| { - const res = self.section_ordinals.getOrPutAssumeCapacity(.{ - .seg = self.data_segment_cmd_index.?, - .sect = @intCast(u16, sect_id), - }); - assert(!res.found_existing); - } - } -} - fn allocateTextSegment(self: *MachO) !void { const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; const base_vmaddr = self.load_commands.items[self.pagezero_segment_cmd_index.?].Segment.inner.vmsize; @@ -1894,7 +1741,7 @@ fn writeTextBlocks(self: *MachO) !void { } } -fn createEmptyAtom(self: *MachO, local_sym_index: u32, size: u64, alignment: u32) !*TextBlock { +pub fn createEmptyAtom(self: *MachO, local_sym_index: u32, size: u64, alignment: u32) !*TextBlock { const code = try self.base.allocator.alloc(u8, size); defer self.base.allocator.free(code); mem.set(u8, code, 0); @@ -1924,8 +1771,12 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 const vaddr = mem.alignForwardGeneric(u64, base_addr, atom_alignment); log.debug("allocating atom for symbol {s} at address 0x{x}", .{ self.getString(sym.n_strx), vaddr }); - // TODO we should check if we need to expand the section or not like we - // do in `allocateTextBlock`. + const expand_section = true; + if (expand_section) { + // Expand the section, possibly shifting all the atoms for the sections following it. + // It might also be needed to shift entire segments too if there is not enough + // padding left. + } const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); sym.n_value = vaddr; sym.n_sect = n_sect; diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 06c76b259d..d90e3837b5 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -1,6 +1,7 @@ const Object = @This(); const std = @import("std"); +const build_options = @import("build_options"); const assert = std.debug.assert; const dwarf = std.dwarf; const fs = std.fs; @@ -405,15 +406,9 @@ const TextBlockParser = struct { break :blk .static; } else null; - const block = try context.allocator.create(TextBlock); - block.* = TextBlock.empty; - block.local_sym_index = senior_nlist.index; + const block = try context.macho_file.createEmptyAtom(senior_nlist.index, size, actual_align); block.stab = stab; - block.size = size; - block.alignment = actual_align; - try context.macho_file.managed_blocks.append(context.allocator, block); - - try block.code.appendSlice(context.allocator, code); + mem.copy(u8, block.code.items, code); try block.aliases.ensureTotalCapacity(context.allocator, aliases.items.len); for (aliases.items) |alias| { @@ -458,6 +453,7 @@ pub fn parseTextBlocks( object_id: u16, macho_file: *MachO, ) !void { + const use_stage1 = build_options.is_stage1 and macho_file.base.options.use_stage1; const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; log.debug("analysing {s}", .{self.name}); @@ -508,6 +504,7 @@ pub fn parseTextBlocks( log.debug("unhandled section", .{}); continue; }; + // TODO allocate section here. // Read section's code var code = try allocator.alloc(u8, @intCast(usize, sect.size)); @@ -568,18 +565,17 @@ pub fn parseTextBlocks( try self.sections_as_symbols.putNoClobber(allocator, sect_id, block_local_sym_index); break :blk block_local_sym_index; }; - const block_code = code[0 .. first_nlist.n_value - sect.addr]; const block_size = block_code.len; + const block = try macho_file.createEmptyAtom(block_local_sym_index, block_size, sect.@"align"); - const block = try allocator.create(TextBlock); - block.* = TextBlock.empty; - block.local_sym_index = block_local_sym_index; - block.size = block_size; - block.alignment = sect.@"align"; - try macho_file.managed_blocks.append(allocator, block); + if (use_stage1) { + try macho_file.allocateAtomStage1(block, match); + } else { + _ = try macho_file.allocateAtom(block, match); + } - try block.code.appendSlice(allocator, block_code); + mem.copy(u8, block.code.items, block_code); try block.parseRelocs(relocs, .{ .base_addr = 0, @@ -601,25 +597,6 @@ pub fn parseTextBlocks( } } - // Update target section's metadata - // TODO should we update segment's size here too? - // How does it tie with incremental space allocs? - const tseg = &macho_file.load_commands.items[match.seg].Segment; - const tsect = &tseg.sections.items[match.sect]; - const new_alignment = math.max(tsect.@"align", block.alignment); - const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; - tsect.size = new_size; - tsect.@"align" = new_alignment; - - if (macho_file.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; - } else { - try macho_file.blocks.putNoClobber(allocator, match, block); - } - try self.text_blocks.append(allocator, block); } @@ -666,23 +643,10 @@ pub fn parseTextBlocks( } } - // Update target section's metadata - // TODO should we update segment's size here too? - // How does it tie with incremental space allocs? - const tseg = &macho_file.load_commands.items[match.seg].Segment; - const tsect = &tseg.sections.items[match.sect]; - const new_alignment = math.max(tsect.@"align", block.alignment); - const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; - tsect.size = new_size; - tsect.@"align" = new_alignment; - - if (macho_file.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; + if (use_stage1) { + try macho_file.allocateAtomStage1(block, match); } else { - try macho_file.blocks.putNoClobber(allocator, match, block); + _ = try macho_file.allocateAtom(block, match); } try self.text_blocks.append(allocator, block); @@ -713,15 +677,15 @@ pub fn parseTextBlocks( try self.sections_as_symbols.putNoClobber(allocator, sect_id, block_local_sym_index); break :blk block_local_sym_index; }; + const block = try macho_file.createEmptyAtom(block_local_sym_index, sect.size, sect.@"align"); - const block = try allocator.create(TextBlock); - block.* = TextBlock.empty; - block.local_sym_index = block_local_sym_index; - block.size = sect.size; - block.alignment = sect.@"align"; - try macho_file.managed_blocks.append(allocator, block); + if (use_stage1) { + try macho_file.allocateAtomStage1(block, match); + } else { + _ = try macho_file.allocateAtom(block, match); + } - try block.code.appendSlice(allocator, code); + mem.copy(u8, block.code.items, code); try block.parseRelocs(relocs, .{ .base_addr = 0, @@ -779,25 +743,6 @@ pub fn parseTextBlocks( }); } - // Update target section's metadata - // TODO should we update segment's size here too? - // How does it tie with incremental space allocs? - const tseg = &macho_file.load_commands.items[match.seg].Segment; - const tsect = &tseg.sections.items[match.sect]; - const new_alignment = math.max(tsect.@"align", block.alignment); - const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + block.size; - tsect.size = new_size; - tsect.@"align" = new_alignment; - - if (macho_file.blocks.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; - } else { - try macho_file.blocks.putNoClobber(allocator, match, block); - } - try self.text_blocks.append(allocator, block); } } From 2831d6e9b8b29c21bc7417c5e370674e3130f6ae Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 30 Aug 2021 15:43:20 +0200 Subject: [PATCH 36/78] macho: add first pass at allocating parsed atoms in objects This commit makes it possible to combine self-hosted with a pre-compiled C object file, e.g.: ``` zig-out/bin/zig build-exe hello.zig add.o ``` where `add.o` is a pre-compiled C object file. --- src/link/MachO.zig | 171 ++++++++++++++++++++++++++++------- src/link/MachO/Object.zig | 24 ++--- src/link/MachO/TextBlock.zig | 13 +++ src/link/MachO/commands.zig | 2 +- 4 files changed, 164 insertions(+), 46 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index effe6d641b..1def32c41a 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -789,6 +789,31 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.allocateTextBlocks(); try self.flushZld(); } else { + try self.parseTextBlocks(); + try self.allocateGlobalSymbols(); + { + log.debug("locals:", .{}); + for (self.locals.items) |sym| { + log.debug(" {s}: {}", .{ self.getString(sym.n_strx), sym }); + } + log.debug("globals:", .{}); + for (self.globals.items) |sym| { + log.debug(" {s}: {}", .{ self.getString(sym.n_strx), sym }); + } + log.debug("undefs:", .{}); + for (self.undefs.items) |sym| { + log.debug(" {s}: {}", .{ self.getString(sym.n_strx), sym }); + } + log.debug("unresolved:", .{}); + for (self.unresolved.keys()) |key| { + log.debug(" {d} => {s}", .{ key, self.unresolved.get(key).? }); + } + log.debug("resolved:", .{}); + var it = self.symbol_resolver.iterator(); + while (it.next()) |entry| { + log.debug(" {s} => {}", .{ self.getString(entry.key_ptr.*), entry.value_ptr.* }); + } + } try self.writeAtoms(); try self.flushModule(comp); } @@ -1114,12 +1139,14 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio const segname = commands.segmentName(sect); const sectname = commands.sectionName(sect); + var needs_allocation = false; const res: ?MatchingSection = blk: { switch (commands.sectionType(sect)) { macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => { if (self.text_const_section_index == null) { self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); try text_seg.addSection(self.base.allocator, "__const", .{}); + needs_allocation = true; } break :blk .{ @@ -1136,6 +1163,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try text_seg.addSection(self.base.allocator, "__objc_methname", .{ .flags = macho.S_CSTRING_LITERALS, }); + needs_allocation = true; } break :blk .{ @@ -1148,6 +1176,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try text_seg.addSection(self.base.allocator, "__objc_methtype", .{ .flags = macho.S_CSTRING_LITERALS, }); + needs_allocation = true; } break :blk .{ @@ -1158,6 +1187,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.objc_classname_section_index == null) { self.objc_classname_section_index = @intCast(u16, text_seg.sections.items.len); try text_seg.addSection(self.base.allocator, "__objc_classname", .{}); + needs_allocation = true; } break :blk .{ @@ -1171,6 +1201,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try text_seg.addSection(self.base.allocator, "__cstring", .{ .flags = macho.S_CSTRING_LITERALS, }); + needs_allocation = true; } break :blk .{ @@ -1185,6 +1216,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try data_seg.addSection(self.base.allocator, "__objc_selrefs", .{ .flags = macho.S_LITERAL_POINTERS, }); + needs_allocation = true; } break :blk .{ @@ -1202,6 +1234,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try data_const_seg.addSection(self.base.allocator, "__mod_init_func", .{ .flags = macho.S_MOD_INIT_FUNC_POINTERS, }); + needs_allocation = true; } break :blk .{ @@ -1215,6 +1248,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try data_const_seg.addSection(self.base.allocator, "__mod_term_func", .{ .flags = macho.S_MOD_TERM_FUNC_POINTERS, }); + needs_allocation = true; } break :blk .{ @@ -1228,6 +1262,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try data_seg.addSection(self.base.allocator, "__bss", .{ .flags = macho.S_ZEROFILL, }); + needs_allocation = true; } break :blk .{ @@ -1241,6 +1276,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try data_seg.addSection(self.base.allocator, "__thread_vars", .{ .flags = macho.S_THREAD_LOCAL_VARIABLES, }); + needs_allocation = true; } break :blk .{ @@ -1254,6 +1290,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try data_seg.addSection(self.base.allocator, "__thread_data", .{ .flags = macho.S_THREAD_LOCAL_REGULAR, }); + needs_allocation = true; } break :blk .{ @@ -1267,6 +1304,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try data_seg.addSection(self.base.allocator, "__thread_bss", .{ .flags = macho.S_THREAD_LOCAL_ZEROFILL, }); + needs_allocation = true; } break :blk .{ @@ -1281,6 +1319,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.eh_frame_section_index == null) { self.eh_frame_section_index = @intCast(u16, text_seg.sections.items.len); try text_seg.addSection(self.base.allocator, "__eh_frame", .{}); + needs_allocation = true; } break :blk .{ @@ -1293,6 +1332,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.data_const_section_index == null) { self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); try data_const_seg.addSection(self.base.allocator, "__const", .{}); + needs_allocation = true; } break :blk .{ @@ -1307,6 +1347,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try text_seg.addSection(self.base.allocator, "__text", .{ .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, }); + needs_allocation = true; } break :blk .{ @@ -1329,6 +1370,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.ustring_section_index == null) { self.ustring_section_index = @intCast(u16, text_seg.sections.items.len); try text_seg.addSection(self.base.allocator, "__ustring", .{}); + needs_allocation = true; } break :blk .{ @@ -1339,6 +1381,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.gcc_except_tab_section_index == null) { self.gcc_except_tab_section_index = @intCast(u16, text_seg.sections.items.len); try text_seg.addSection(self.base.allocator, "__gcc_except_tab", .{}); + needs_allocation = true; } break :blk .{ @@ -1349,6 +1392,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.objc_methlist_section_index == null) { self.objc_methlist_section_index = @intCast(u16, text_seg.sections.items.len); try text_seg.addSection(self.base.allocator, "__objc_methlist", .{}); + needs_allocation = true; } break :blk .{ @@ -1364,6 +1408,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.data_const_section_index == null) { self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); try data_const_seg.addSection(self.base.allocator, "__const", .{}); + needs_allocation = true; } break :blk .{ @@ -1374,6 +1419,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.text_const_section_index == null) { self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); try text_seg.addSection(self.base.allocator, "__const", .{}); + needs_allocation = true; } break :blk .{ @@ -1387,6 +1433,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.data_const_section_index == null) { self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); try data_const_seg.addSection(self.base.allocator, "__const", .{}); + needs_allocation = true; } break :blk .{ @@ -1400,6 +1447,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.data_const_section_index == null) { self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); try data_const_seg.addSection(self.base.allocator, "__const", .{}); + needs_allocation = true; } break :blk .{ @@ -1410,6 +1458,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.objc_cfstring_section_index == null) { self.objc_cfstring_section_index = @intCast(u16, data_const_seg.sections.items.len); try data_const_seg.addSection(self.base.allocator, "__cfstring", .{}); + needs_allocation = true; } break :blk .{ @@ -1420,6 +1469,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.objc_classlist_section_index == null) { self.objc_classlist_section_index = @intCast(u16, data_const_seg.sections.items.len); try data_const_seg.addSection(self.base.allocator, "__objc_classlist", .{}); + needs_allocation = true; } break :blk .{ @@ -1430,6 +1480,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.objc_imageinfo_section_index == null) { self.objc_imageinfo_section_index = @intCast(u16, data_const_seg.sections.items.len); try data_const_seg.addSection(self.base.allocator, "__objc_imageinfo", .{}); + needs_allocation = true; } break :blk .{ @@ -1440,6 +1491,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.objc_const_section_index == null) { self.objc_const_section_index = @intCast(u16, data_seg.sections.items.len); try data_seg.addSection(self.base.allocator, "__objc_const", .{}); + needs_allocation = true; } break :blk .{ @@ -1450,6 +1502,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.objc_classrefs_section_index == null) { self.objc_classrefs_section_index = @intCast(u16, data_seg.sections.items.len); try data_seg.addSection(self.base.allocator, "__objc_classrefs", .{}); + needs_allocation = true; } break :blk .{ @@ -1460,6 +1513,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.objc_data_section_index == null) { self.objc_data_section_index = @intCast(u16, data_seg.sections.items.len); try data_seg.addSection(self.base.allocator, "__objc_data", .{}); + needs_allocation = true; } break :blk .{ @@ -1470,6 +1524,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.data_section_index == null) { self.data_section_index = @intCast(u16, data_seg.sections.items.len); try data_seg.addSection(self.base.allocator, "__data", .{}); + needs_allocation = true; } break :blk .{ @@ -1494,6 +1549,36 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (res) |match| { _ = try self.section_ordinals.getOrPut(self.base.allocator, match); _ = try self.block_free_lists.getOrPutValue(self.base.allocator, match, .{}); + + const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; + if (!use_stage1) { + const target_seg = &self.load_commands.items[match.seg].Segment; + const target_sect = &target_seg.sections.items[match.sect]; + + // Update section's alignment + // TODO if sect.@"align" > target_sect.@"align", should we move the entire + // section to match the required alignment? + target_sect.@"align" = math.max(target_sect.@"align", sect.@"align"); + + if (needs_allocation) { + const alignment = try math.powi(u32, 2, target_sect.@"align"); + const needed_size = sect.size; + const off = target_seg.findFreeSpace(needed_size, alignment, self.header_pad); + assert(off + needed_size <= target_seg.inner.fileoff + target_seg.inner.filesize); // TODO expand + + log.debug("found {s},{s} section free space 0x{x} to 0x{x}", .{ + segname, + sectname, + off, + off + needed_size, + }); + + target_sect.addr = target_seg.inner.vmaddr + off; + target_sect.size = needed_size; + target_sect.offset = @intCast(u32, off); + self.load_commands_dirty = true; + } + } } return res; @@ -1759,23 +1844,41 @@ pub fn createEmptyAtom(self: *MachO, local_sym_index: u32, size: u64, alignment: } pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 { - // TODO converge with `allocateTextBlock` - const seg = self.load_commands.items[match.seg].Segment; - const sect = seg.sections.items[match.sect]; + const seg = &self.load_commands.items[match.seg].Segment; + const sect = &seg.sections.items[match.sect]; const sym = &self.locals.items[atom.local_sym_index]; - const base_addr = if (self.blocks.get(match)) |last| blk: { + + var atom_placement: ?*TextBlock = null; + + // TODO converge with `allocateTextBlock` and handle free list + const vaddr = if (self.blocks.get(match)) |last| blk: { const last_atom_sym = self.locals.items[last.local_sym_index]; - break :blk last_atom_sym.n_value + last.size; + const ideal_capacity = padToIdeal(last.size); + const ideal_capacity_end_vaddr = last_atom_sym.n_value + ideal_capacity; + const last_atom_alignment = try math.powi(u32, 2, atom.alignment); + const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, last_atom_alignment); + atom_placement = last; + break :blk new_start_vaddr; } else sect.addr; - const atom_alignment = try math.powi(u32, 2, atom.alignment); - const vaddr = mem.alignForwardGeneric(u64, base_addr, atom_alignment); + log.debug("allocating atom for symbol {s} at address 0x{x}", .{ self.getString(sym.n_strx), vaddr }); - const expand_section = true; + const expand_section = atom_placement == null or atom_placement.?.next == null; if (expand_section) { - // Expand the section, possibly shifting all the atoms for the sections following it. - // It might also be needed to shift entire segments too if there is not enough - // padding left. + const needed_size = (vaddr + atom.size) - sect.addr; + const end_addr = blk: { + const next_ordinal = self.section_ordinals.getIndex(match).?; // Ordinals are +1 to begin with. + const end_addr = if (self.section_ordinals.keys().len > next_ordinal) inner: { + const next_match = self.section_ordinals.keys()[next_ordinal]; + const next_seg = self.load_commands.items[next_match.seg].Segment; + const next_sect = next_seg.sections.items[next_match.sect]; + break :inner next_sect.addr; + } else seg.inner.filesize; + break :blk end_addr; + }; + assert(needed_size <= end_addr); // TODO must expand the section + sect.size = needed_size; + self.load_commands_dirty = true; } const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); sym.n_value = vaddr; @@ -1828,6 +1931,21 @@ pub fn writeAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !void { try self.writeLocalSymbol(atom.local_sym_index); } +fn allocateGlobalSymbols(self: *MachO) !void { + // TODO should we do this in `allocateAtom` (or similar)? Then, we would need to + // store the link atom -> globals somewhere. + var sym_it = self.symbol_resolver.valueIterator(); + while (sym_it.next()) |resolv| { + if (resolv.where != .global) continue; + + assert(resolv.local_sym_index != 0); + const local_sym = self.locals.items[resolv.local_sym_index]; + const sym = &self.globals.items[resolv.where_index]; + sym.n_value = local_sym.n_value; + sym.n_sect = local_sym.n_sect; + } +} + pub fn allocateAtomStage1(self: *MachO, atom: *TextBlock, match: MatchingSection) !void { // Update target section's metadata // TODO should we update segment's size here too? @@ -2313,14 +2431,14 @@ fn resolveSymbolsInObject( continue; }, .undef => { - const undef = &self.undefs.items[resolv.where_index]; - undef.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; + // const undef = &self.undefs.items[resolv.where_index]; + // undef.* = .{ + // .n_strx = 0, + // .n_type = macho.N_UNDF, + // .n_sect = 0, + // .n_desc = 0, + // .n_value = 0, + // }; _ = self.unresolved.fetchSwapRemove(resolv.where_index); }, } @@ -2457,18 +2575,9 @@ fn resolveSymbols(self: *MachO) !void { // text blocks for each tentative defintion. while (tentatives.popOrNull()) |entry| { const sym = &self.globals.items[entry.key]; - const match: MatchingSection = blk: { - if (self.bss_section_index == null) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.bss_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__bss", .{ - .flags = macho.S_ZEROFILL, - }); - } - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; + const match = MatchingSection{ + .seg = self.data_segment_cmd_index.?, + .sect = self.bss_section_index.?, }; _ = try self.section_ordinals.getOrPut(self.base.allocator, match); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index d90e3837b5..ec9a4901fe 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -504,7 +504,6 @@ pub fn parseTextBlocks( log.debug("unhandled section", .{}); continue; }; - // TODO allocate section here. // Read section's code var code = try allocator.alloc(u8, @intCast(usize, sect.size)); @@ -569,12 +568,6 @@ pub fn parseTextBlocks( const block_size = block_code.len; const block = try macho_file.createEmptyAtom(block_local_sym_index, block_size, sect.@"align"); - if (use_stage1) { - try macho_file.allocateAtomStage1(block, match); - } else { - _ = try macho_file.allocateAtom(block, match); - } - mem.copy(u8, block.code.items, block_code); try block.parseRelocs(relocs, .{ @@ -597,6 +590,11 @@ pub fn parseTextBlocks( } } + if (use_stage1) { + try macho_file.allocateAtomStage1(block, match); + } else { + _ = try macho_file.allocateAtom(block, match); + } try self.text_blocks.append(allocator, block); } @@ -648,7 +646,6 @@ pub fn parseTextBlocks( } else { _ = try macho_file.allocateAtom(block, match); } - try self.text_blocks.append(allocator, block); } @@ -679,12 +676,6 @@ pub fn parseTextBlocks( }; const block = try macho_file.createEmptyAtom(block_local_sym_index, sect.size, sect.@"align"); - if (use_stage1) { - try macho_file.allocateAtomStage1(block, match); - } else { - _ = try macho_file.allocateAtom(block, match); - } - mem.copy(u8, block.code.items, code); try block.parseRelocs(relocs, .{ @@ -743,6 +734,11 @@ pub fn parseTextBlocks( }); } + if (use_stage1) { + try macho_file.allocateAtomStage1(block, match); + } else { + _ = try macho_file.allocateAtom(block, match); + } try self.text_blocks.append(allocator, block); } } diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index e2552a8b4f..d753fe29f4 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -1183,9 +1183,22 @@ pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void { }, .undef => { const atom = macho_file.stubs_map.get(rel.where_index) orelse { + // TODO this is required for incremental when we don't have every symbol + // resolved when creating relocations. In this case, we will insert a branch + // reloc to an undef symbol which may happen to be defined within the binary. + // Then, the undef we point at will be a null symbol (free symbol) which we + // should remove/repurpose. To circumvent this (for now), we check if the symbol + // we point to is garbage, and if so we fall back to symbol resolver to find by name. + const n_strx = macho_file.undefs.items[rel.where_index].n_strx; + if (macho_file.symbol_resolver.get(n_strx)) |resolv| inner: { + if (resolv.where != .global) break :inner; + break :blk macho_file.globals.items[resolv.where_index].n_value; + } + // TODO verify in TextBlock that the symbol is indeed dynamically bound. break :blk 0; // Dynamically bound by dyld. }; + break :blk macho_file.locals.items[atom.local_sym_index].n_value; }, } diff --git a/src/link/MachO/commands.zig b/src/link/MachO/commands.zig index 6e75af08c4..b50ce95acf 100644 --- a/src/link/MachO/commands.zig +++ b/src/link/MachO/commands.zig @@ -337,7 +337,7 @@ pub const SegmentCommand = struct { return null; } - pub fn findFreeSpace(self: SegmentCommand, object_size: u64, min_alignment: u16, start: ?u64) u64 { + pub fn findFreeSpace(self: SegmentCommand, object_size: u64, min_alignment: u32, start: ?u64) u64 { var st: u64 = if (start) |v| v else self.inner.fileoff; while (self.detectAllocCollision(st, object_size)) |item_end| { st = mem.alignForwardGeneric(u64, item_end, min_alignment); From 50db993119d9b1031700be94b757eaf100f35857 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 31 Aug 2021 23:05:01 +0200 Subject: [PATCH 37/78] macho: fix allocating sections within segment when parsing objects --- lib/std/macho.zig | 40 +- src/link/MachO.zig | 852 +++++++++++++++++--------------- src/link/MachO/DebugSymbols.zig | 171 +++---- src/link/MachO/commands.zig | 93 +--- 4 files changed, 552 insertions(+), 604 deletions(-) diff --git a/lib/std/macho.zig b/lib/std/macho.zig index 3c41b65522..b4d7964b5e 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -601,35 +601,35 @@ pub const segment_command = extern struct { /// command and their size is reflected in cmdsize. pub const segment_command_64 = extern struct { /// LC_SEGMENT_64 - cmd: u32, + cmd: u32 = LC_SEGMENT_64, /// includes sizeof section_64 structs - cmdsize: u32, + cmdsize: u32 = @sizeOf(segment_command_64), /// segment name segname: [16]u8, /// memory address of this segment - vmaddr: u64, + vmaddr: u64 = 0, /// memory size of this segment - vmsize: u64, + vmsize: u64 = 0, /// file offset of this segment - fileoff: u64, + fileoff: u64 = 0, /// amount to map from the file - filesize: u64, + filesize: u64 = 0, /// maximum VM protection - maxprot: vm_prot_t, + maxprot: vm_prot_t = VM_PROT_NONE, /// initial VM protection - initprot: vm_prot_t, + initprot: vm_prot_t = VM_PROT_NONE, /// number of sections in segment - nsects: u32, - flags: u32, + nsects: u32 = 0, + flags: u32 = 0, }; /// A segment is made up of zero or more sections. Non-MH_OBJECT files have @@ -700,34 +700,34 @@ pub const section_64 = extern struct { segname: [16]u8, /// memory address of this section - addr: u64, + addr: u64 = 0, /// size in bytes of this section - size: u64, + size: u64 = 0, /// file offset of this section - offset: u32, + offset: u32 = 0, /// section alignment (power of 2) - @"align": u32, + @"align": u32 = 0, /// file offset of relocation entries - reloff: u32, + reloff: u32 = 0, /// number of relocation entries - nreloc: u32, + nreloc: u32 = 0, /// flags (section type and attributes - flags: u32, + flags: u32 = S_REGULAR, /// reserved (for offset or index) - reserved1: u32, + reserved1: u32 = 0, /// reserved (for count or sizeof) - reserved2: u32, + reserved2: u32 = 0, /// reserved - reserved3: u32, + reserved3: u32 = 0, }; pub const nlist = extern struct { diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 1def32c41a..38446db5a6 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1133,20 +1133,19 @@ pub const MatchingSection = struct { }; pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSection { - const text_seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; const segname = commands.segmentName(sect); const sectname = commands.sectionName(sect); - - var needs_allocation = false; const res: ?MatchingSection = blk: { switch (commands.sectionType(sect)) { macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => { if (self.text_const_section_index == null) { - self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__const", .{}); - needs_allocation = true; + self.text_const_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__const", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1159,11 +1158,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio // TODO it seems the common values within the sections in objects are deduplicated/merged // on merging the sections' contents. if (self.objc_methname_section_index == null) { - self.objc_methname_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__objc_methname", .{ - .flags = macho.S_CSTRING_LITERALS, - }); - needs_allocation = true; + self.objc_methname_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__objc_methname", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1172,11 +1173,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_methtype")) { if (self.objc_methtype_section_index == null) { - self.objc_methtype_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__objc_methtype", .{ - .flags = macho.S_CSTRING_LITERALS, - }); - needs_allocation = true; + self.objc_methtype_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__objc_methtype", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1185,9 +1188,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_classname")) { if (self.objc_classname_section_index == null) { - self.objc_classname_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__objc_classname", .{}); - needs_allocation = true; + self.objc_classname_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__objc_classname", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1197,11 +1204,15 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio } if (self.cstring_section_index == null) { - self.cstring_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__cstring", .{ - .flags = macho.S_CSTRING_LITERALS, - }); - needs_allocation = true; + self.cstring_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__cstring", + sect.size, + sect.@"align", + .{ + .flags = macho.S_CSTRING_LITERALS, + }, + ); } break :blk .{ @@ -1212,29 +1223,37 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio macho.S_LITERAL_POINTERS => { if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__objc_selrefs")) { if (self.objc_selrefs_section_index == null) { - self.objc_selrefs_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__objc_selrefs", .{ - .flags = macho.S_LITERAL_POINTERS, - }); - needs_allocation = true; + self.objc_selrefs_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__objc_selrefs", + sect.size, + sect.@"align", + .{ + .flags = macho.S_LITERAL_POINTERS, + }, + ); } break :blk .{ .seg = self.data_segment_cmd_index.?, .sect = self.objc_selrefs_section_index.?, }; + } else { + // TODO investigate + break :blk null; } - - // TODO investigate - break :blk null; }, macho.S_MOD_INIT_FUNC_POINTERS => { if (self.mod_init_func_section_index == null) { - self.mod_init_func_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__mod_init_func", .{ - .flags = macho.S_MOD_INIT_FUNC_POINTERS, - }); - needs_allocation = true; + self.mod_init_func_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__mod_init_func", + sect.size, + sect.@"align", + .{ + .flags = macho.S_MOD_INIT_FUNC_POINTERS, + }, + ); } break :blk .{ @@ -1244,11 +1263,15 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, macho.S_MOD_TERM_FUNC_POINTERS => { if (self.mod_term_func_section_index == null) { - self.mod_term_func_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__mod_term_func", .{ - .flags = macho.S_MOD_TERM_FUNC_POINTERS, - }); - needs_allocation = true; + self.mod_term_func_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__mod_term_func", + sect.size, + sect.@"align", + .{ + .flags = macho.S_MOD_TERM_FUNC_POINTERS, + }, + ); } break :blk .{ @@ -1258,11 +1281,15 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, macho.S_ZEROFILL => { if (self.bss_section_index == null) { - self.bss_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__bss", .{ - .flags = macho.S_ZEROFILL, - }); - needs_allocation = true; + self.bss_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__bss", + sect.size, + sect.@"align", + .{ + .flags = macho.S_ZEROFILL, + }, + ); } break :blk .{ @@ -1272,11 +1299,15 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, macho.S_THREAD_LOCAL_VARIABLES => { if (self.tlv_section_index == null) { - self.tlv_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__thread_vars", .{ - .flags = macho.S_THREAD_LOCAL_VARIABLES, - }); - needs_allocation = true; + self.tlv_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__thread_vars", + sect.size, + sect.@"align", + .{ + .flags = macho.S_THREAD_LOCAL_VARIABLES, + }, + ); } break :blk .{ @@ -1286,11 +1317,15 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, macho.S_THREAD_LOCAL_REGULAR => { if (self.tlv_data_section_index == null) { - self.tlv_data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__thread_data", .{ - .flags = macho.S_THREAD_LOCAL_REGULAR, - }); - needs_allocation = true; + self.tlv_data_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__thread_data", + sect.size, + sect.@"align", + .{ + .flags = macho.S_THREAD_LOCAL_REGULAR, + }, + ); } break :blk .{ @@ -1300,11 +1335,15 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, macho.S_THREAD_LOCAL_ZEROFILL => { if (self.tlv_bss_section_index == null) { - self.tlv_bss_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__thread_bss", .{ - .flags = macho.S_THREAD_LOCAL_ZEROFILL, - }); - needs_allocation = true; + self.tlv_bss_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__thread_bss", + sect.size, + sect.@"align", + .{ + .flags = macho.S_THREAD_LOCAL_ZEROFILL, + }, + ); } break :blk .{ @@ -1317,9 +1356,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio // TODO I believe __eh_frame is currently part of __unwind_info section // in the latest ld64 output. if (self.eh_frame_section_index == null) { - self.eh_frame_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__eh_frame", .{}); - needs_allocation = true; + self.eh_frame_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__eh_frame", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1330,9 +1373,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio // TODO audit this: is this the right mapping? if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__const", .{}); - needs_allocation = true; + self.data_const_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__const", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1343,11 +1390,17 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio macho.S_REGULAR => { if (commands.sectionIsCode(sect)) { if (self.text_section_index == null) { - self.text_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__text", .{ - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); - needs_allocation = true; + self.text_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__text", + sect.size, + sect.@"align", + .{ + .flags = macho.S_REGULAR | + macho.S_ATTR_PURE_INSTRUCTIONS | + macho.S_ATTR_SOME_INSTRUCTIONS, + }, + ); } break :blk .{ @@ -1368,9 +1421,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (mem.eql(u8, segname, "__TEXT")) { if (mem.eql(u8, sectname, "__ustring")) { if (self.ustring_section_index == null) { - self.ustring_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__ustring", .{}); - needs_allocation = true; + self.ustring_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__ustring", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1379,9 +1436,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__gcc_except_tab")) { if (self.gcc_except_tab_section_index == null) { - self.gcc_except_tab_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__gcc_except_tab", .{}); - needs_allocation = true; + self.gcc_except_tab_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__gcc_except_tab", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1390,9 +1451,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_methlist")) { if (self.objc_methlist_section_index == null) { - self.objc_methlist_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__objc_methlist", .{}); - needs_allocation = true; + self.objc_methlist_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__objc_methlist", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1406,9 +1471,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio mem.eql(u8, sectname, "__gopclntab")) { if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__const", .{}); - needs_allocation = true; + self.data_const_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__const", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1417,9 +1486,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else { if (self.text_const_section_index == null) { - self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); - try text_seg.addSection(self.base.allocator, "__const", .{}); - needs_allocation = true; + self.text_const_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__const", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1431,9 +1504,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (mem.eql(u8, segname, "__DATA_CONST")) { if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__const", .{}); - needs_allocation = true; + self.data_const_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__const", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1445,9 +1522,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (mem.eql(u8, segname, "__DATA")) { if (mem.eql(u8, sectname, "__const")) { if (self.data_const_section_index == null) { - self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__const", .{}); - needs_allocation = true; + self.data_const_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__const", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1456,9 +1537,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__cfstring")) { if (self.objc_cfstring_section_index == null) { - self.objc_cfstring_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__cfstring", .{}); - needs_allocation = true; + self.objc_cfstring_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__cfstring", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1467,9 +1552,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_classlist")) { if (self.objc_classlist_section_index == null) { - self.objc_classlist_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__objc_classlist", .{}); - needs_allocation = true; + self.objc_classlist_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__objc_classlist", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1478,9 +1567,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_imageinfo")) { if (self.objc_imageinfo_section_index == null) { - self.objc_imageinfo_section_index = @intCast(u16, data_const_seg.sections.items.len); - try data_const_seg.addSection(self.base.allocator, "__objc_imageinfo", .{}); - needs_allocation = true; + self.objc_imageinfo_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__objc_imageinfo", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1489,9 +1582,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_const")) { if (self.objc_const_section_index == null) { - self.objc_const_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__objc_const", .{}); - needs_allocation = true; + self.objc_const_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__objc_const", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1500,9 +1597,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_classrefs")) { if (self.objc_classrefs_section_index == null) { - self.objc_classrefs_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__objc_classrefs", .{}); - needs_allocation = true; + self.objc_classrefs_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__objc_classrefs", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1511,9 +1612,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else if (mem.eql(u8, sectname, "__objc_data")) { if (self.objc_data_section_index == null) { - self.objc_data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__objc_data", .{}); - needs_allocation = true; + self.objc_data_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__objc_data", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1522,9 +1627,13 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }; } else { if (self.data_section_index == null) { - self.data_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__data", .{}); - needs_allocation = true; + self.data_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__data", + sect.size, + sect.@"align", + .{}, + ); } break :blk .{ @@ -1545,42 +1654,6 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio else => break :blk null, } }; - - if (res) |match| { - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - _ = try self.block_free_lists.getOrPutValue(self.base.allocator, match, .{}); - - const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; - if (!use_stage1) { - const target_seg = &self.load_commands.items[match.seg].Segment; - const target_sect = &target_seg.sections.items[match.sect]; - - // Update section's alignment - // TODO if sect.@"align" > target_sect.@"align", should we move the entire - // section to match the required alignment? - target_sect.@"align" = math.max(target_sect.@"align", sect.@"align"); - - if (needs_allocation) { - const alignment = try math.powi(u32, 2, target_sect.@"align"); - const needed_size = sect.size; - const off = target_seg.findFreeSpace(needed_size, alignment, self.header_pad); - assert(off + needed_size <= target_seg.inner.fileoff + target_seg.inner.filesize); // TODO expand - - log.debug("found {s},{s} section free space 0x{x} to 0x{x}", .{ - segname, - sectname, - off, - off + needed_size, - }); - - target_sect.addr = target_seg.inner.vmaddr + off; - target_sect.size = needed_size; - target_sect.offset = @intCast(u32, off); - self.load_commands_dirty = true; - } - } - } - return res; } @@ -3878,9 +3951,12 @@ pub fn populateMissingMetadata(self: *MachO) !void { if (self.pagezero_segment_cmd_index == null) { self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__PAGEZERO", .{ - .vmsize = 0x100000000, // size always set to 4GB - }), + .Segment = .{ + .inner = .{ + .segname = makeStaticString("__PAGEZERO"), + .vmsize = 0x100000000, // size always set to 4GB + }, + }, }); self.load_commands_dirty = true; } @@ -3895,51 +3971,39 @@ pub fn populateMissingMetadata(self: *MachO) !void { log.debug("found __TEXT segment free space 0x{x} to 0x{x}", .{ 0, needed_size }); try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__TEXT", .{ - .vmaddr = 0x100000000, // always starts at 4GB - .vmsize = needed_size, - .filesize = needed_size, - .maxprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, - .initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, - }), + .Segment = .{ + .inner = .{ + .segname = makeStaticString("__TEXT"), + .vmaddr = 0x100000000, // always starts at 4GB + .vmsize = needed_size, + .filesize = needed_size, + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, + .initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, + }, + }, }); self.load_commands_dirty = true; } if (self.text_section_index == null) { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.text_section_index = @intCast(u16, text_segment.sections.items.len); - const alignment: u2 = switch (self.base.options.target.cpu.arch) { .x86_64 => 0, .aarch64 => 2, else => unreachable, // unhandled architecture type }; const needed_size = self.base.options.program_code_size_hint; - const off = text_segment.findFreeSpace(needed_size, @as(u16, 1) << alignment, self.header_pad); - - log.debug("found __text section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - - try text_segment.addSection(self.base.allocator, "__text", .{ - .addr = text_segment.inner.vmaddr + off, - .size = @intCast(u32, needed_size), - .offset = @intCast(u32, off), - .@"align" = alignment, - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); - const match = MatchingSection{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }; - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); - self.load_commands_dirty = true; + self.text_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__text", + needed_size, + alignment, + .{ + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }, + ); } if (self.stubs_section_index == null) { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.stubs_section_index = @intCast(u16, text_segment.sections.items.len); - const alignment: u2 = switch (self.base.options.target.cpu.arch) { .x86_64 => 0, .aarch64 => 2, @@ -3951,32 +4015,19 @@ pub fn populateMissingMetadata(self: *MachO) !void { else => unreachable, // unhandled architecture type }; const needed_size = stub_size * self.base.options.symbol_count_hint; - const off = text_segment.findFreeSpace(needed_size, @alignOf(u64), self.header_pad); - assert(off + needed_size <= text_segment.inner.fileoff + text_segment.inner.filesize); // TODO Must expand __TEXT segment. - - log.debug("found __stubs section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - - try text_segment.addSection(self.base.allocator, "__stubs", .{ - .addr = text_segment.inner.vmaddr + off, - .size = needed_size, - .offset = @intCast(u32, off), - .@"align" = alignment, - .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - .reserved2 = stub_size, - }); - const match = MatchingSection{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stubs_section_index.?, - }; - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); - self.load_commands_dirty = true; + self.stubs_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__stubs", + needed_size, + alignment, + .{ + .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + .reserved2 = stub_size, + }, + ); } if (self.stub_helper_section_index == null) { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - self.stub_helper_section_index = @intCast(u16, text_segment.sections.items.len); - const alignment: u2 = switch (self.base.options.target.cpu.arch) { .x86_64 => 0, .aarch64 => 2, @@ -3993,25 +4044,15 @@ pub fn populateMissingMetadata(self: *MachO) !void { else => unreachable, }; const needed_size = stub_size * self.base.options.symbol_count_hint + preamble_size; - const off = text_segment.findFreeSpace(needed_size, @alignOf(u64), self.header_pad); - assert(off + needed_size <= text_segment.inner.fileoff + text_segment.inner.filesize); // TODO Must expand __TEXT segment. - - log.debug("found __stub_helper section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - - try text_segment.addSection(self.base.allocator, "__stub_helper", .{ - .addr = text_segment.inner.vmaddr + off, - .size = needed_size, - .offset = @intCast(u32, off), - .@"align" = alignment, - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); - const match = MatchingSection{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, - }; - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); - self.load_commands_dirty = true; + self.stub_helper_section_index = try self.allocateSection( + self.text_segment_cmd_index.?, + "__stub_helper", + needed_size, + alignment, + .{ + .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }, + ); } if (self.data_const_segment_cmd_index == null) { @@ -4020,45 +4061,39 @@ pub fn populateMissingMetadata(self: *MachO) !void { const ideal_size = @sizeOf(u64) * self.base.options.symbol_count_hint; const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); - log.debug("found __DATA_CONST segment free space 0x{x} to 0x{x}", .{ address_and_offset.offset, address_and_offset.offset + needed_size }); + log.debug("found __DATA_CONST segment free space 0x{x} to 0x{x}", .{ + address_and_offset.offset, + address_and_offset.offset + needed_size, + }); try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__DATA_CONST", .{ - .vmaddr = address_and_offset.address, - .vmsize = needed_size, - .fileoff = address_and_offset.offset, - .filesize = needed_size, - .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - }), + .Segment = .{ + .inner = .{ + .segname = makeStaticString("__DATA_CONST"), + .vmaddr = address_and_offset.address, + .vmsize = needed_size, + .fileoff = address_and_offset.offset, + .filesize = needed_size, + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + }, + }, }); self.load_commands_dirty = true; } if (self.got_section_index == null) { - const dc_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - self.got_section_index = @intCast(u16, dc_segment.sections.items.len); - const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const off = dc_segment.findFreeSpace(needed_size, @alignOf(u64), null); - assert(off + needed_size <= dc_segment.inner.fileoff + dc_segment.inner.filesize); // TODO Must expand __DATA_CONST segment. - - log.debug("found __got section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - - try dc_segment.addSection(self.base.allocator, "__got", .{ - .addr = dc_segment.inner.vmaddr + off - dc_segment.inner.fileoff, - .size = needed_size, - .offset = @intCast(u32, off), - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, - }); - const match = MatchingSection{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.got_section_index.?, - }; - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); - self.load_commands_dirty = true; + const alignment: u16 = 3; // 2^3 = @sizeOf(u64) + self.got_section_index = try self.allocateSection( + self.data_const_segment_cmd_index.?, + "__got", + needed_size, + alignment, + .{ + .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, + }, + ); } if (self.data_segment_cmd_index == null) { @@ -4070,175 +4105,115 @@ pub fn populateMissingMetadata(self: *MachO) !void { log.debug("found __DATA segment free space 0x{x} to 0x{x}", .{ address_and_offset.offset, address_and_offset.offset + needed_size }); try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__DATA", .{ - .vmaddr = address_and_offset.address, - .vmsize = needed_size, - .fileoff = address_and_offset.offset, - .filesize = needed_size, - .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, - }), + .Segment = .{ + .inner = .{ + .segname = makeStaticString("__DATA"), + .vmaddr = address_and_offset.address, + .vmsize = needed_size, + .fileoff = address_and_offset.offset, + .filesize = needed_size, + .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + .initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE, + }, + }, }); self.load_commands_dirty = true; } if (self.la_symbol_ptr_section_index == null) { - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.la_symbol_ptr_section_index = @intCast(u16, data_segment.sections.items.len); - const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const off = data_segment.findFreeSpace(needed_size, @alignOf(u64), null); - assert(off + needed_size <= data_segment.inner.fileoff + data_segment.inner.filesize); // TODO Must expand __DATA segment. - - log.debug("found __la_symbol_ptr section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - - try data_segment.addSection(self.base.allocator, "__la_symbol_ptr", .{ - .addr = data_segment.inner.vmaddr + off - data_segment.inner.fileoff, - .size = needed_size, - .offset = @intCast(u32, off), - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = macho.S_LAZY_SYMBOL_POINTERS, - }); - const match = MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.la_symbol_ptr_section_index.?, - }; - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); - self.load_commands_dirty = true; + const alignment: u16 = 3; // 2^3 = @sizeOf(u64) + self.la_symbol_ptr_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__la_symbol_ptr", + needed_size, + alignment, + .{ + .flags = macho.S_LAZY_SYMBOL_POINTERS, + }, + ); } if (self.data_section_index == null) { - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.data_section_index = @intCast(u16, data_segment.sections.items.len); - const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const off = data_segment.findFreeSpace(needed_size, @alignOf(u64), null); - assert(off + needed_size <= data_segment.inner.fileoff + data_segment.inner.filesize); // TODO Must expand __DATA segment. - - log.debug("found __data section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - - try data_segment.addSection(self.base.allocator, "__data", .{ - .addr = data_segment.inner.vmaddr + off - data_segment.inner.fileoff, - .size = needed_size, - .offset = @intCast(u32, off), - .@"align" = 3, // 2^3 = @sizeOf(u64) - }); - const match = MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }; - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); - self.load_commands_dirty = true; + const alignment: u16 = 3; // 2^3 = @sizeOf(u64) + self.data_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__data", + needed_size, + alignment, + .{}, + ); } if (self.tlv_section_index == null) { - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.tlv_section_index = @intCast(u16, data_segment.sections.items.len); - const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const off = data_segment.findFreeSpace(needed_size, @alignOf(u64), null); - assert(off + needed_size <= data_segment.inner.fileoff + data_segment.inner.filesize); // TODO Must expand __DATA segment. - - log.debug("found __thread_vars section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - - try data_segment.addSection(self.base.allocator, "__thread_vars", .{ - .addr = data_segment.inner.vmaddr + off - data_segment.inner.fileoff, - .size = needed_size, - .offset = @intCast(u32, off), - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = macho.S_THREAD_LOCAL_VARIABLES, - }); - const match = MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_section_index.?, - }; - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); - self.load_commands_dirty = true; + const alignment: u16 = 3; // 2^3 = @sizeOf(u64) + self.tlv_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__thread_vars", + needed_size, + alignment, + .{ + .flags = macho.S_THREAD_LOCAL_VARIABLES, + }, + ); } if (self.tlv_data_section_index == null) { - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.tlv_data_section_index = @intCast(u16, data_segment.sections.items.len); - const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const off = data_segment.findFreeSpace(needed_size, @alignOf(u64), null); - assert(off + needed_size <= data_segment.inner.fileoff + data_segment.inner.filesize); // TODO Must expand __DATA segment. - - log.debug("found __thread_data section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - - try data_segment.addSection(self.base.allocator, "__thread_data", .{ - .addr = data_segment.inner.vmaddr + off - data_segment.inner.fileoff, - .size = needed_size, - .offset = @intCast(u32, off), - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = macho.S_THREAD_LOCAL_REGULAR, - }); - const match = MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_data_section_index.?, - }; - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); - self.load_commands_dirty = true; + const alignment: u16 = 3; // 2^3 = @sizeOf(u64) + self.tlv_data_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__thread_data", + needed_size, + alignment, + .{ + .flags = macho.S_THREAD_LOCAL_REGULAR, + }, + ); } if (self.tlv_bss_section_index == null) { - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.tlv_bss_section_index = @intCast(u16, data_segment.sections.items.len); - const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const off = data_segment.findFreeSpace(needed_size, @alignOf(u64), null); - assert(off + needed_size <= data_segment.inner.fileoff + data_segment.inner.filesize); // TODO Must expand __DATA segment. - - log.debug("found __thread_bss section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); + const alignment: u16 = 3; // 2^3 = @sizeOf(u64) + self.tlv_bss_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__thread_bss", + needed_size, + alignment, + .{ + .flags = macho.S_THREAD_LOCAL_ZEROFILL, + }, + ); // We keep offset to the section in a separate variable as the actual section is usually pointing at the // beginning of the file. - self.tlv_bss_file_offset = off; - try data_segment.addSection(self.base.allocator, "__thread_bss", .{ - .addr = data_segment.inner.vmaddr + off - data_segment.inner.fileoff, - .size = needed_size, - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = macho.S_THREAD_LOCAL_ZEROFILL, - }); - const match = MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_bss_section_index.?, - }; - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); - self.load_commands_dirty = true; + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const out_sect = &seg.sections.items[self.tlv_bss_section_index.?]; + self.tlv_bss_file_offset = out_sect.offset; + out_sect.offset = 0; } if (self.bss_section_index == null) { - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.bss_section_index = @intCast(u16, data_segment.sections.items.len); - const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - const off = data_segment.findFreeSpace(needed_size, @alignOf(u64), null); - assert(off + needed_size <= data_segment.inner.fileoff + data_segment.inner.filesize); // TODO Must expand __DATA segment. - - log.debug("found __bss section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); + const alignment: u16 = 3; // 2^3 = @sizeOf(u64) + self.bss_section_index = try self.allocateSection( + self.data_segment_cmd_index.?, + "__bss", + needed_size, + alignment, + .{ + .flags = macho.S_ZEROFILL, + }, + ); // We keep offset to the section in a separate variable as the actual section is usually pointing at the // beginning of the file. - self.bss_file_offset = off; - try data_segment.addSection(self.base.allocator, "__bss", .{ - .addr = data_segment.inner.vmaddr + off - data_segment.inner.fileoff, - .size = 0, - .@"align" = 3, // 2^3 = @sizeOf(u64) - .flags = macho.S_ZEROFILL, - }); - const match = MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); - self.load_commands_dirty = true; + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const out_sect = &seg.sections.items[self.bss_section_index.?]; + self.bss_file_offset = out_sect.offset; + out_sect.offset = 0; } if (self.linkedit_segment_cmd_index == null) { @@ -4248,12 +4223,15 @@ pub fn populateMissingMetadata(self: *MachO) !void { log.debug("found __LINKEDIT segment free space at 0x{x}", .{address_and_offset.offset}); try self.load_commands.append(self.base.allocator, .{ - .Segment = SegmentCommand.empty("__LINKEDIT", .{ - .vmaddr = address_and_offset.address, - .fileoff = address_and_offset.offset, - .maxprot = macho.VM_PROT_READ, - .initprot = macho.VM_PROT_READ, - }), + .Segment = .{ + .inner = .{ + .segname = makeStaticString("__LINKEDIT"), + .vmaddr = address_and_offset.address, + .fileoff = address_and_offset.offset, + .maxprot = macho.VM_PROT_READ, + .initprot = macho.VM_PROT_READ, + }, + }, }); self.load_commands_dirty = true; } @@ -4485,6 +4463,67 @@ pub fn populateMissingMetadata(self: *MachO) !void { } } +const AllocateSectionOpts = struct { + flags: u32 = macho.S_REGULAR, + reserved1: u32 = 0, + reserved2: u32 = 0, +}; + +fn allocateSection( + self: *MachO, + segment_id: u16, + sectname: []const u8, + size: u64, + alignment: u32, + opts: AllocateSectionOpts, +) !u16 { + const seg = &self.load_commands.items[segment_id].Segment; + var sect = macho.section_64{ + .sectname = makeStaticString(sectname), + .segname = seg.inner.segname, + .size = @intCast(u32, size), + .@"align" = alignment, + .flags = opts.flags, + .reserved1 = opts.reserved1, + .reserved2 = opts.reserved2, + }; + + const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; + if (!use_stage1) { + const alignment_pow_2 = try math.powi(u32, 2, alignment); + const padding: ?u64 = if (segment_id == self.text_segment_cmd_index.?) self.header_pad else null; + const off = seg.findFreeSpace(size, alignment_pow_2, padding); + + assert(off + size <= seg.inner.fileoff + seg.inner.filesize); // TODO expand + + log.debug("found {s},{s} section free space 0x{x} to 0x{x}", .{ + commands.segmentName(sect), + commands.sectionName(sect), + off, + off + size, + }); + + sect.addr = seg.inner.vmaddr + off - seg.inner.fileoff; + sect.offset = @intCast(u32, off); + } + + const index = @intCast(u16, seg.sections.items.len); + try seg.sections.append(self.base.allocator, sect); + seg.inner.cmdsize += @sizeOf(macho.section_64); + seg.inner.nsects += 1; + + const match = MatchingSection{ + .seg = segment_id, + .sect = index, + }; + _ = try self.section_ordinals.getOrPut(self.base.allocator, match); + try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); + + self.load_commands_dirty = true; + + return index; +} + fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, alignment: u64) !u64 { const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; const text_section = &text_segment.sections.items[self.text_section_index.?]; @@ -5513,6 +5552,13 @@ pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) { std.math.maxInt(@TypeOf(actual_size)); } +pub fn makeStaticString(bytes: []const u8) [16]u8 { + var buf = [_]u8{0} ** 16; + assert(bytes.len <= buf.len); + mem.copy(u8, &buf, bytes); + return buf; +} + pub fn makeString(self: *MachO, string: []const u8) !u32 { if (self.strtab_dir.getAdapted(@as([]const u8, string), StringSliceAdapter{ .strtab = &self.strtab })) |off| { log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off }); diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 025959793e..450d842134 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -5,23 +5,26 @@ const assert = std.debug.assert; const fs = std.fs; const log = std.log.scoped(.dsym); const macho = std.macho; +const math = std.math; const mem = std.mem; const DW = std.dwarf; const leb = std.leb; const Allocator = mem.Allocator; const build_options = @import("build_options"); +const commands = @import("commands.zig"); const trace = @import("../../tracy.zig").trace; +const LoadCommand = commands.LoadCommand; const Module = @import("../../Module.zig"); const Type = @import("../../type.zig").Type; const link = @import("../../link.zig"); const MachO = @import("../MachO.zig"); -const SrcFn = MachO.SrcFn; const TextBlock = MachO.TextBlock; +const SegmentCommand = commands.SegmentCommand; +const SrcFn = MachO.SrcFn; +const makeStaticString = MachO.makeStaticString; const padToIdeal = MachO.padToIdeal; -usingnamespace @import("commands.zig"); - const page_size: u16 = 0x1000; base: *MachO, @@ -185,107 +188,86 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void log.debug("found __DWARF segment free space 0x{x} to 0x{x}", .{ off, off + needed_size }); try self.load_commands.append(allocator, .{ - .Segment = SegmentCommand.empty("__DWARF", .{ - .vmaddr = vmaddr, - .vmsize = needed_size, - .fileoff = off, - .filesize = needed_size, - }), + .Segment = .{ + .inner = .{ + .segname = makeStaticString("__DWARF"), + .vmaddr = vmaddr, + .vmsize = needed_size, + .fileoff = off, + .filesize = needed_size, + }, + }, }); self.load_commands_dirty = true; } if (self.debug_str_section_index == null) { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; - self.debug_str_section_index = @intCast(u16, dwarf_segment.sections.items.len); assert(self.debug_string_table.items.len == 0); - - try dwarf_segment.addSection(allocator, "__debug_str", .{ - .addr = dwarf_segment.inner.vmaddr, - .size = @intCast(u32, self.debug_string_table.items.len), - .offset = @intCast(u32, dwarf_segment.inner.fileoff), - .@"align" = 1, - }); - self.load_commands_dirty = true; + self.debug_str_section_index = try self.allocateSection( + "__debug_str", + @intCast(u32, self.debug_string_table.items.len), + 0, + ); self.debug_string_table_dirty = true; } if (self.debug_info_section_index == null) { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; - self.debug_info_section_index = @intCast(u16, dwarf_segment.sections.items.len); - - const file_size_hint = 200; - const p_align = 1; - const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); - - log.debug("found __debug_info free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); - - try dwarf_segment.addSection(allocator, "__debug_info", .{ - .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, - .size = file_size_hint, - .offset = @intCast(u32, off), - .@"align" = p_align, - }); - self.load_commands_dirty = true; + self.debug_info_section_index = try self.allocateSection("__debug_info", 200, 0); self.debug_info_header_dirty = true; } if (self.debug_abbrev_section_index == null) { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; - self.debug_abbrev_section_index = @intCast(u16, dwarf_segment.sections.items.len); - - const file_size_hint = 128; - const p_align = 1; - const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); - - log.debug("found __debug_abbrev free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); - - try dwarf_segment.addSection(allocator, "__debug_abbrev", .{ - .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, - .size = file_size_hint, - .offset = @intCast(u32, off), - .@"align" = p_align, - }); - self.load_commands_dirty = true; + self.debug_abbrev_section_index = try self.allocateSection("__debug_abbrev", 128, 0); self.debug_abbrev_section_dirty = true; } if (self.debug_aranges_section_index == null) { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; - self.debug_aranges_section_index = @intCast(u16, dwarf_segment.sections.items.len); - - const file_size_hint = 160; - const p_align = 16; - const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); - - log.debug("found __debug_aranges free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); - - try dwarf_segment.addSection(allocator, "__debug_aranges", .{ - .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, - .size = file_size_hint, - .offset = @intCast(u32, off), - .@"align" = p_align, - }); - self.load_commands_dirty = true; + self.debug_aranges_section_index = try self.allocateSection("__debug_aranges", 160, 4); self.debug_aranges_section_dirty = true; } if (self.debug_line_section_index == null) { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; - self.debug_line_section_index = @intCast(u16, dwarf_segment.sections.items.len); - - const file_size_hint = 250; - const p_align = 1; - const off = dwarf_segment.findFreeSpace(file_size_hint, p_align, null); - - log.debug("found __debug_line free space 0x{x} to 0x{x}", .{ off, off + file_size_hint }); - - try dwarf_segment.addSection(allocator, "__debug_line", .{ - .addr = dwarf_segment.inner.vmaddr + off - dwarf_segment.inner.fileoff, - .size = file_size_hint, - .offset = @intCast(u32, off), - .@"align" = p_align, - }); - self.load_commands_dirty = true; + self.debug_line_section_index = try self.allocateSection("__debug_line", 250, 0); self.debug_line_header_dirty = true; } } +fn allocateSection(self: *DebugSymbols, sectname: []const u8, size: u64, alignment: u16) !u16 { + const seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].Segment; + var sect = macho.section_64{ + .sectname = makeStaticString(sectname), + .segname = seg.inner.segname, + .size = @intCast(u32, size), + .@"align" = alignment, + }; + const alignment_pow_2 = try math.powi(u32, 2, alignment); + const off = seg.findFreeSpace(size, alignment_pow_2, null); + + assert(off + size <= seg.inner.fileoff + seg.inner.filesize); // TODO expand + + log.debug("found {s},{s} section free space 0x{x} to 0x{x}", .{ + commands.segmentName(sect), + commands.sectionName(sect), + off, + off + size, + }); + + sect.addr = seg.inner.vmaddr + off - seg.inner.fileoff; + sect.offset = @intCast(u32, off); + + const index = @intCast(u16, seg.sections.items.len); + try seg.sections.append(self.base.base.allocator, sect); + seg.inner.cmdsize += @sizeOf(macho.section_64); + seg.inner.nsects += 1; + + // TODO + // const match = MatchingSection{ + // .seg = segment_id, + // .sect = index, + // }; + // _ = try self.section_ordinals.getOrPut(self.base.allocator, match); + // try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); + + self.load_commands_dirty = true; + + return index; +} + pub fn flushModule(self: *DebugSymbols, allocator: *Allocator, options: link.Options) !void { // TODO This linker code currently assumes there is only 1 compilation unit and it corresponds to the // Zig source code. @@ -611,15 +593,18 @@ pub fn deinit(self: *DebugSymbols, allocator: *Allocator) void { } fn copySegmentCommand(self: *DebugSymbols, allocator: *Allocator, base_cmd: SegmentCommand) !SegmentCommand { - var cmd = SegmentCommand.empty("", .{ - .cmdsize = base_cmd.inner.cmdsize, - .vmaddr = base_cmd.inner.vmaddr, - .vmsize = base_cmd.inner.vmsize, - .maxprot = base_cmd.inner.maxprot, - .initprot = base_cmd.inner.initprot, - .nsects = base_cmd.inner.nsects, - .flags = base_cmd.inner.flags, - }); + var cmd = SegmentCommand{ + .inner = .{ + .segname = undefined, + .cmdsize = base_cmd.inner.cmdsize, + .vmaddr = base_cmd.inner.vmaddr, + .vmsize = base_cmd.inner.vmsize, + .maxprot = base_cmd.inner.maxprot, + .initprot = base_cmd.inner.initprot, + .nsects = base_cmd.inner.nsects, + .flags = base_cmd.inner.flags, + }, + }; mem.copy(u8, &cmd.inner.segname, &base_cmd.inner.segname); try cmd.sections.ensureCapacity(allocator, cmd.inner.nsects); @@ -689,7 +674,7 @@ fn writeLoadCommands(self: *DebugSymbols, allocator: *Allocator) !void { } fn writeHeader(self: *DebugSymbols) !void { - var header = emptyHeader(.{ + var header = commands.emptyHeader(.{ .filetype = macho.MH_DSYM, }); diff --git a/src/link/MachO/commands.zig b/src/link/MachO/commands.zig index b50ce95acf..25154e2d5b 100644 --- a/src/link/MachO/commands.zig +++ b/src/link/MachO/commands.zig @@ -9,6 +9,7 @@ const assert = std.debug.assert; const Allocator = std.mem.Allocator; const MachO = @import("../MachO.zig"); +const makeStaticString = MachO.makeStaticString; const padToIdeal = MachO.padToIdeal; pub const HeaderArgs = struct { @@ -217,75 +218,6 @@ pub const SegmentCommand = struct { inner: macho.segment_command_64, sections: std.ArrayListUnmanaged(macho.section_64) = .{}, - const SegmentOptions = struct { - cmdsize: u32 = @sizeOf(macho.segment_command_64), - vmaddr: u64 = 0, - vmsize: u64 = 0, - fileoff: u64 = 0, - filesize: u64 = 0, - maxprot: macho.vm_prot_t = macho.VM_PROT_NONE, - initprot: macho.vm_prot_t = macho.VM_PROT_NONE, - nsects: u32 = 0, - flags: u32 = 0, - }; - - pub fn empty(comptime segname: []const u8, opts: SegmentOptions) SegmentCommand { - return .{ - .inner = .{ - .cmd = macho.LC_SEGMENT_64, - .cmdsize = opts.cmdsize, - .segname = makeStaticString(segname), - .vmaddr = opts.vmaddr, - .vmsize = opts.vmsize, - .fileoff = opts.fileoff, - .filesize = opts.filesize, - .maxprot = opts.maxprot, - .initprot = opts.initprot, - .nsects = opts.nsects, - .flags = opts.flags, - }, - }; - } - - const SectionOptions = struct { - addr: u64 = 0, - size: u64 = 0, - offset: u32 = 0, - @"align": u32 = 0, - reloff: u32 = 0, - nreloc: u32 = 0, - flags: u32 = macho.S_REGULAR, - reserved1: u32 = 0, - reserved2: u32 = 0, - reserved3: u32 = 0, - }; - - pub fn addSection( - self: *SegmentCommand, - alloc: *Allocator, - comptime sectname: []const u8, - opts: SectionOptions, - ) !void { - var section = macho.section_64{ - .sectname = makeStaticString(sectname), - .segname = undefined, - .addr = opts.addr, - .size = opts.size, - .offset = opts.offset, - .@"align" = opts.@"align", - .reloff = opts.reloff, - .nreloc = opts.nreloc, - .flags = opts.flags, - .reserved1 = opts.reserved1, - .reserved2 = opts.reserved2, - .reserved3 = opts.reserved3, - }; - mem.copy(u8, §ion.segname, &self.inner.segname); - try self.sections.append(alloc, section); - self.inner.cmdsize += @sizeOf(macho.section_64); - self.inner.nsects += 1; - } - pub fn read(alloc: *Allocator, reader: anytype) !SegmentCommand { const inner = try reader.readStruct(macho.segment_command_64); var segment = SegmentCommand{ @@ -427,13 +359,6 @@ pub fn createLoadDylibCommand( return dylib_cmd; } -fn makeStaticString(bytes: []const u8) [16]u8 { - var buf = [_]u8{0} ** 16; - assert(bytes.len <= buf.len); - mem.copy(u8, &buf, bytes); - return buf; -} - fn parseName(name: *const [16]u8) []const u8 { const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len; return name[0..len]; @@ -513,34 +438,26 @@ test "read-write segment command" { 0x00, 0x00, 0x00, 0x00, // reserved3 }; var cmd = SegmentCommand{ - .inner = .{ - .cmd = macho.LC_SEGMENT_64, + .inner = macho.segment_command_64.new(.{ .cmdsize = 152, .segname = makeStaticString("__TEXT"), .vmaddr = 4294967296, .vmsize = 294912, - .fileoff = 0, .filesize = 294912, .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE, .initprot = macho.VM_PROT_EXECUTE | macho.VM_PROT_READ, .nsects = 1, - .flags = 0, - }, + }), }; - try cmd.sections.append(gpa, .{ + try cmd.sections.append(gpa, macho.section_64.new(.{ .sectname = makeStaticString("__text"), .segname = makeStaticString("__TEXT"), .addr = 4294983680, .size = 448, .offset = 16384, .@"align" = 2, - .reloff = 0, - .nreloc = 0, .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - .reserved1 = 0, - .reserved2 = 0, - .reserved3 = 0, - }); + })); defer cmd.deinit(gpa); try testRead(gpa, in_buffer, LoadCommand{ .Segment = cmd }); From d0dc622638716583eb4d9de78dfc87bef7969bc0 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 1 Sep 2021 00:01:57 +0200 Subject: [PATCH 38/78] macho: do not reset section's size after allocating atom --- src/link/MachO.zig | 12 ------------ src/link/MachO/commands.zig | 8 ++++---- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 38446db5a6..b320202fe1 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1950,8 +1950,6 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 break :blk end_addr; }; assert(needed_size <= end_addr); // TODO must expand the section - sect.size = needed_size; - self.load_commands_dirty = true; } const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); sym.n_value = vaddr; @@ -4599,17 +4597,7 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, if (expand_text_section) { const needed_size = (vaddr + new_block_size) - text_section.addr; assert(needed_size <= text_segment.inner.filesize); // TODO must move the entire text section. - _ = try self.blocks.put(self.base.allocator, match, text_block); - text_section.size = needed_size; - self.load_commands_dirty = true; // TODO Make more granular. - - if (self.d_sym) |*ds| { - const debug_text_seg = &ds.load_commands.items[ds.text_segment_cmd_index.?].Segment; - const debug_text_sect = &debug_text_seg.sections.items[ds.text_section_index.?]; - debug_text_sect.size = needed_size; - ds.load_commands_dirty = true; - } } text_block.size = new_block_size; diff --git a/src/link/MachO/commands.zig b/src/link/MachO/commands.zig index 25154e2d5b..ff7c5e841b 100644 --- a/src/link/MachO/commands.zig +++ b/src/link/MachO/commands.zig @@ -270,11 +270,11 @@ pub const SegmentCommand = struct { } pub fn findFreeSpace(self: SegmentCommand, object_size: u64, min_alignment: u32, start: ?u64) u64 { - var st: u64 = if (start) |v| v else self.inner.fileoff; - while (self.detectAllocCollision(st, object_size)) |item_end| { - st = mem.alignForwardGeneric(u64, item_end, min_alignment); + var offset: u64 = if (start) |v| v else self.inner.fileoff; + while (self.detectAllocCollision(offset, object_size)) |item_end| { + offset = mem.alignForwardGeneric(u64, item_end, min_alignment); } - return st; + return offset; } fn eql(self: SegmentCommand, other: SegmentCommand) bool { From 7a99cd069afed01b8573274c20f685e61d0950c8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 1 Sep 2021 11:55:32 +0200 Subject: [PATCH 39/78] macho: clean up allocating atom logic Instead of checking for stage1 at every callsite, move the logic inside `allocateAtom`. This is fine since this logic will disappear anyhow once I add expanding and shifting segments and sections. --- src/link/MachO.zig | 188 ++++++++++++++--------------------- src/link/MachO/Object.zig | 19 +--- src/link/MachO/TextBlock.zig | 46 +++------ 3 files changed, 90 insertions(+), 163 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index b320202fe1..e7ca3f9d6a 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -760,27 +760,6 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.addCodeSignatureLC(); if (use_stage1) { - { - const atom = try self.createDyldPrivateAtom(); - try self.allocateAtomStage1(atom, .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }); - } - { - const atom = try self.createStubHelperPreambleAtom(); - try self.allocateAtomStage1(atom, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, - }); - // TODO this is just a temp - // We already prealloc stub helper size in populateMissingMetadata(), but - // perhaps it's not needed after all? - const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const sect = &seg.sections.items[self.stub_helper_section_index.?]; - sect.size -= atom.size; - } - try self.parseTextBlocks(); try self.allocateTextSegment(); try self.allocateDataConstSegment(); @@ -1919,55 +1898,70 @@ pub fn createEmptyAtom(self: *MachO, local_sym_index: u32, size: u64, alignment: pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 { const seg = &self.load_commands.items[match.seg].Segment; const sect = &seg.sections.items[match.sect]; - const sym = &self.locals.items[atom.local_sym_index]; + const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; - var atom_placement: ?*TextBlock = null; + const vaddr = outer: { + if (!use_stage1) { + const sym = &self.locals.items[atom.local_sym_index]; - // TODO converge with `allocateTextBlock` and handle free list - const vaddr = if (self.blocks.get(match)) |last| blk: { - const last_atom_sym = self.locals.items[last.local_sym_index]; - const ideal_capacity = padToIdeal(last.size); - const ideal_capacity_end_vaddr = last_atom_sym.n_value + ideal_capacity; - const last_atom_alignment = try math.powi(u32, 2, atom.alignment); - const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, last_atom_alignment); - atom_placement = last; - break :blk new_start_vaddr; - } else sect.addr; + var atom_placement: ?*TextBlock = null; - log.debug("allocating atom for symbol {s} at address 0x{x}", .{ self.getString(sym.n_strx), vaddr }); + // TODO converge with `allocateTextBlock` and handle free list + const vaddr = if (self.blocks.get(match)) |last| blk: { + const last_atom_sym = self.locals.items[last.local_sym_index]; + const ideal_capacity = padToIdeal(last.size); + const ideal_capacity_end_vaddr = last_atom_sym.n_value + ideal_capacity; + const last_atom_alignment = try math.powi(u32, 2, atom.alignment); + const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, last_atom_alignment); + atom_placement = last; + break :blk new_start_vaddr; + } else sect.addr; - const expand_section = atom_placement == null or atom_placement.?.next == null; - if (expand_section) { - const needed_size = (vaddr + atom.size) - sect.addr; - const end_addr = blk: { - const next_ordinal = self.section_ordinals.getIndex(match).?; // Ordinals are +1 to begin with. - const end_addr = if (self.section_ordinals.keys().len > next_ordinal) inner: { - const next_match = self.section_ordinals.keys()[next_ordinal]; - const next_seg = self.load_commands.items[next_match.seg].Segment; - const next_sect = next_seg.sections.items[next_match.sect]; - break :inner next_sect.addr; - } else seg.inner.filesize; - break :blk end_addr; - }; - assert(needed_size <= end_addr); // TODO must expand the section - } - const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - sym.n_value = vaddr; - sym.n_sect = n_sect; + log.debug("allocating atom for symbol {s} at address 0x{x}", .{ self.getString(sym.n_strx), vaddr }); - // Update each alias (if any) - for (atom.aliases.items) |index| { - const alias_sym = &self.locals.items[index]; - alias_sym.n_value = vaddr; - alias_sym.n_sect = n_sect; - } + const expand_section = atom_placement == null or atom_placement.?.next == null; + if (expand_section) { + const needed_size = (vaddr + atom.size) - sect.addr; + const end_addr = blk: { + const next_ordinal = self.section_ordinals.getIndex(match).?; // Ordinals are +1 to begin with. + const end_addr = if (self.section_ordinals.keys().len > next_ordinal) inner: { + const next_match = self.section_ordinals.keys()[next_ordinal]; + const next_seg = self.load_commands.items[next_match.seg].Segment; + const next_sect = next_seg.sections.items[next_match.sect]; + break :inner next_sect.addr; + } else seg.inner.filesize; + break :blk end_addr; + }; + assert(needed_size <= end_addr); // TODO must expand the section + } + const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + sym.n_value = vaddr; + sym.n_sect = n_sect; - // Update each symbol contained within the TextBlock - for (atom.contained.items) |sym_at_off| { - const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; - contained_sym.n_value = vaddr + sym_at_off.offset; - contained_sym.n_sect = n_sect; - } + // Update each alias (if any) + for (atom.aliases.items) |index| { + const alias_sym = &self.locals.items[index]; + alias_sym.n_value = vaddr; + alias_sym.n_sect = n_sect; + } + + // Update each symbol contained within the TextBlock + for (atom.contained.items) |sym_at_off| { + const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; + contained_sym.n_value = vaddr + sym_at_off.offset; + contained_sym.n_sect = n_sect; + } + + break :outer vaddr; + } else { + const new_alignment = math.max(sect.@"align", atom.alignment); + const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); + const new_size = mem.alignForwardGeneric(u64, sect.size, new_alignment_pow_2) + atom.size; + sect.size = new_size; + sect.@"align" = new_alignment; + break :outer 0; + } + }; if (self.blocks.getPtr(match)) |last| { last.*.next = atom; @@ -2017,27 +2011,6 @@ fn allocateGlobalSymbols(self: *MachO) !void { } } -pub fn allocateAtomStage1(self: *MachO, atom: *TextBlock, match: MatchingSection) !void { - // Update target section's metadata - // TODO should we update segment's size here too? - // How does it tie with incremental space allocs? - const tseg = &self.load_commands.items[match.seg].Segment; - const tsect = &tseg.sections.items[match.sect]; - const new_alignment = math.max(tsect.@"align", atom.alignment); - const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, tsect.size, new_alignment_pow_2) + atom.size; - tsect.size = new_size; - tsect.@"align" = new_alignment; - - if (self.blocks.getPtr(match)) |last| { - last.*.next = atom; - atom.prev = last.*; - last.* = atom; - } else { - try self.blocks.putNoClobber(self.base.allocator, match, atom); - } -} - fn writeAtoms(self: *MachO) !void { var it = self.blocks.iterator(); while (it.next()) |entry| { @@ -2607,8 +2580,6 @@ fn resolveSymbolsInObject( } fn resolveSymbols(self: *MachO) !void { - const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; - var tentatives = std.AutoArrayHashMap(u32, void).init(self.base.allocator); defer tentatives.deinit(); @@ -2668,27 +2639,23 @@ fn resolveSymbols(self: *MachO) !void { resolv.local_sym_index = local_sym_index; const atom = try self.createEmptyAtom(local_sym_index, size, alignment); - if (use_stage1) { - try self.allocateAtomStage1(atom, match); - } + _ = try self.allocateAtom(atom, match); } try self.resolveDyldStubBinder(); - if (!use_stage1) { - { - const atom = try self.createDyldPrivateAtom(); - _ = try self.allocateAtom(atom, .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }); - } - { - const atom = try self.createStubHelperPreambleAtom(); - _ = try self.allocateAtom(atom, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, - }); - } + { + const atom = try self.createDyldPrivateAtom(); + _ = try self.allocateAtom(atom, .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.data_section_index.?, + }); + } + { + const atom = try self.createStubHelperPreambleAtom(); + _ = try self.allocateAtom(atom, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, + }); } // Third pass, resolve symbols in dynamic libraries. @@ -2800,9 +2767,7 @@ fn resolveSymbols(self: *MachO) !void { // TODO perhaps we should special-case special symbols? Create a separate // linked list of atoms? const atom = try self.createEmptyAtom(local_sym_index, 0, 0); - if (use_stage1) { - try self.allocateAtomStage1(atom, match); - } + _ = try self.allocateAtom(atom, match); } for (self.unresolved.keys()) |index| { @@ -2869,12 +2834,7 @@ fn resolveDyldStubBinder(self: *MachO) !void { .seg = self.data_const_segment_cmd_index.?, .sect = self.got_section_index.?, }; - // TODO remove once we can incrementally update in stage1 too. - if (!(build_options.is_stage1 and self.base.options.use_stage1)) { - _ = try self.allocateAtom(atom, match); - } else { - try self.allocateAtomStage1(atom, match); - } + _ = try self.allocateAtom(atom, match); self.binding_info_dirty = true; } diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index ec9a4901fe..72dbb05de9 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -453,7 +453,6 @@ pub fn parseTextBlocks( object_id: u16, macho_file: *MachO, ) !void { - const use_stage1 = build_options.is_stage1 and macho_file.base.options.use_stage1; const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; log.debug("analysing {s}", .{self.name}); @@ -590,11 +589,7 @@ pub fn parseTextBlocks( } } - if (use_stage1) { - try macho_file.allocateAtomStage1(block, match); - } else { - _ = try macho_file.allocateAtom(block, match); - } + _ = try macho_file.allocateAtom(block, match); try self.text_blocks.append(allocator, block); } @@ -641,11 +636,7 @@ pub fn parseTextBlocks( } } - if (use_stage1) { - try macho_file.allocateAtomStage1(block, match); - } else { - _ = try macho_file.allocateAtom(block, match); - } + _ = try macho_file.allocateAtom(block, match); try self.text_blocks.append(allocator, block); } @@ -734,11 +725,7 @@ pub fn parseTextBlocks( }); } - if (use_stage1) { - try macho_file.allocateAtomStage1(block, match); - } else { - _ = try macho_file.allocateAtom(block, match); - } + _ = try macho_file.allocateAtom(block, match); try self.text_blocks.append(allocator, block); } } diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index d753fe29f4..160ba5cd8c 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -843,11 +843,7 @@ pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: R .seg = context.macho_file.data_const_segment_cmd_index.?, .sect = context.macho_file.got_section_index.?, }; - if (!(build_options.is_stage1 and context.macho_file.base.options.use_stage1)) { - _ = try context.macho_file.allocateAtom(atom, match); - } else { - try context.macho_file.allocateAtomStage1(atom, match); - } + _ = try context.macho_file.allocateAtom(atom, match); } else if (parsed_rel.payload == .unsigned) { switch (parsed_rel.where) { .undef => { @@ -910,34 +906,18 @@ pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: R ); const stub_atom = try context.macho_file.createStubAtom(laptr_atom.local_sym_index); try context.macho_file.stubs_map.putNoClobber(context.allocator, parsed_rel.where_index, stub_atom); - - if (build_options.is_stage1 and context.macho_file.base.options.use_stage1) { - try context.macho_file.allocateAtomStage1(stub_helper_atom, .{ - .seg = context.macho_file.text_segment_cmd_index.?, - .sect = context.macho_file.stub_helper_section_index.?, - }); - try context.macho_file.allocateAtomStage1(laptr_atom, .{ - .seg = context.macho_file.data_segment_cmd_index.?, - .sect = context.macho_file.la_symbol_ptr_section_index.?, - }); - try context.macho_file.allocateAtomStage1(stub_atom, .{ - .seg = context.macho_file.text_segment_cmd_index.?, - .sect = context.macho_file.stubs_section_index.?, - }); - } else { - _ = try context.macho_file.allocateAtom(stub_helper_atom, .{ - .seg = context.macho_file.text_segment_cmd_index.?, - .sect = context.macho_file.stub_helper_section_index.?, - }); - _ = try context.macho_file.allocateAtom(laptr_atom, .{ - .seg = context.macho_file.data_segment_cmd_index.?, - .sect = context.macho_file.la_symbol_ptr_section_index.?, - }); - _ = try context.macho_file.allocateAtom(stub_atom, .{ - .seg = context.macho_file.text_segment_cmd_index.?, - .sect = context.macho_file.stubs_section_index.?, - }); - } + _ = try context.macho_file.allocateAtom(stub_helper_atom, .{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stub_helper_section_index.?, + }); + _ = try context.macho_file.allocateAtom(laptr_atom, .{ + .seg = context.macho_file.data_segment_cmd_index.?, + .sect = context.macho_file.la_symbol_ptr_section_index.?, + }); + _ = try context.macho_file.allocateAtom(stub_atom, .{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stubs_section_index.?, + }); } } } From 4eff0f4ea17da9ca8819d34fbf855596c3398497 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 1 Sep 2021 16:49:16 +0200 Subject: [PATCH 40/78] macho: fix condition for checking available size for an atom --- src/link/MachO.zig | 37 +++++++++++++++++++------------------ src/link/MachO/commands.zig | 6 ++---- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index e7ca3f9d6a..0892053a4f 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -281,6 +281,10 @@ const DEFAULT_DYLD_PATH: [*:0]const u8 = "/usr/lib/dyld"; const minimum_text_block_size = 64; pub const min_text_capacity = padToIdeal(minimum_text_block_size); +/// Virtual memory offset corresponds to the size of __PAGEZERO segment and start of +/// __TEXT segment. +const pagezero_vmsize: u64 = 0x100000000; + pub const Export = struct { sym_index: ?u32 = null, }; @@ -1903,13 +1907,19 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 const vaddr = outer: { if (!use_stage1) { const sym = &self.locals.items[atom.local_sym_index]; + const needs_padding = blk: { + // TODO is __text the only section that benefits from padding? + if (match.seg == self.text_segment_cmd_index.? and + match.sect == self.text_section_index.?) break :blk true; + break :blk false; + }; var atom_placement: ?*TextBlock = null; // TODO converge with `allocateTextBlock` and handle free list const vaddr = if (self.blocks.get(match)) |last| blk: { const last_atom_sym = self.locals.items[last.local_sym_index]; - const ideal_capacity = padToIdeal(last.size); + const ideal_capacity = if (needs_padding) padToIdeal(last.size) else last.size; const ideal_capacity_end_vaddr = last_atom_sym.n_value + ideal_capacity; const last_atom_alignment = try math.powi(u32, 2, atom.alignment); const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, last_atom_alignment); @@ -1921,18 +1931,9 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 const expand_section = atom_placement == null or atom_placement.?.next == null; if (expand_section) { - const needed_size = (vaddr + atom.size) - sect.addr; - const end_addr = blk: { - const next_ordinal = self.section_ordinals.getIndex(match).?; // Ordinals are +1 to begin with. - const end_addr = if (self.section_ordinals.keys().len > next_ordinal) inner: { - const next_match = self.section_ordinals.keys()[next_ordinal]; - const next_seg = self.load_commands.items[next_match.seg].Segment; - const next_sect = next_seg.sections.items[next_match.sect]; - break :inner next_sect.addr; - } else seg.inner.filesize; - break :blk end_addr; - }; - assert(needed_size <= end_addr); // TODO must expand the section + const max_size = seg.allocatedSize(vaddr - pagezero_vmsize); + log.debug(" (atom size 0x{x}, max available size 0x{x})", .{ atom.size, max_size }); + assert(atom.size <= max_size); // TODO must expand the section } const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); sym.n_value = vaddr; @@ -3912,7 +3913,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { .Segment = .{ .inner = .{ .segname = makeStaticString("__PAGEZERO"), - .vmsize = 0x100000000, // size always set to 4GB + .vmsize = pagezero_vmsize, }, }, }); @@ -3932,7 +3933,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { .Segment = .{ .inner = .{ .segname = makeStaticString("__TEXT"), - .vmaddr = 0x100000000, // always starts at 4GB + .vmaddr = pagezero_vmsize, .vmsize = needed_size, .filesize = needed_size, .maxprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE, @@ -4452,8 +4453,6 @@ fn allocateSection( const padding: ?u64 = if (segment_id == self.text_segment_cmd_index.?) self.header_pad else null; const off = seg.findFreeSpace(size, alignment_pow_2, padding); - assert(off + size <= seg.inner.fileoff + seg.inner.filesize); // TODO expand - log.debug("found {s},{s} section free space 0x{x} to 0x{x}", .{ commands.segmentName(sect), commands.sectionName(sect), @@ -4556,7 +4555,9 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, const expand_text_section = block_placement == null or block_placement.?.next == null; if (expand_text_section) { const needed_size = (vaddr + new_block_size) - text_section.addr; - assert(needed_size <= text_segment.inner.filesize); // TODO must move the entire text section. + const max_size = text_segment.allocatedSize(vaddr - pagezero_vmsize); + log.debug(" (atom needed size 0x{x}, max available size 0x{x})", .{ needed_size, max_size }); + assert(needed_size <= max_size); // TODO must expand the section _ = try self.blocks.put(self.base.allocator, match, text_block); } text_block.size = new_block_size; diff --git a/src/link/MachO/commands.zig b/src/link/MachO/commands.zig index ff7c5e841b..7bb1b12c32 100644 --- a/src/link/MachO/commands.zig +++ b/src/link/MachO/commands.zig @@ -246,10 +246,8 @@ pub const SegmentCommand = struct { } pub fn allocatedSize(self: SegmentCommand, start: u64) u64 { - assert(start > 0); - if (start == self.inner.fileoff) - return 0; - var min_pos: u64 = std.math.maxInt(u64); + assert(start >= self.inner.fileoff); + var min_pos: u64 = self.inner.fileoff + self.inner.filesize; for (self.sections.items) |section| { if (section.offset <= start) continue; if (section.offset < min_pos) min_pos = section.offset; From 17066cc12e6fc3fb5349032dbcfbfd3552000a8d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 1 Sep 2021 23:26:24 +0200 Subject: [PATCH 41/78] macho: minor refactor of const namings --- src/link/MachO.zig | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 0892053a4f..705e4d2ee6 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -271,9 +271,7 @@ pub const GotIndirectionKey = struct { const ideal_factor = 2; /// Default path to dyld -/// TODO instead of hardcoding it, we should probably look through some env vars and search paths -/// instead but this will do for now. -const DEFAULT_DYLD_PATH: [*:0]const u8 = "/usr/lib/dyld"; +const default_dyld_path: [*:0]const u8 = "/usr/lib/dyld"; /// In order for a slice of bytes to be considered eligible to keep metadata pointing at /// it as a possible place to put new symbols, it must have enough room for this many bytes @@ -4315,7 +4313,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { self.dylinker_cmd_index = @intCast(u16, self.load_commands.items.len); const cmdsize = @intCast(u32, mem.alignForwardGeneric( u64, - @sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH), + @sizeOf(macho.dylinker_command) + mem.lenZ(default_dyld_path), @sizeOf(u64), )); var dylinker_cmd = commands.emptyGenericCommandWithData(macho.dylinker_command{ @@ -4325,7 +4323,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { }); dylinker_cmd.data = try self.base.allocator.alloc(u8, cmdsize - dylinker_cmd.inner.name); mem.set(u8, dylinker_cmd.data, 0); - mem.copy(u8, dylinker_cmd.data, mem.spanZ(DEFAULT_DYLD_PATH)); + mem.copy(u8, dylinker_cmd.data, mem.spanZ(default_dyld_path)); try self.load_commands.append(self.base.allocator, .{ .Dylinker = dylinker_cmd }); self.load_commands_dirty = true; } From 9dbad2d1888aae19f0411cc6de27171abc4e96f5 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 1 Sep 2021 23:44:01 +0200 Subject: [PATCH 42/78] macho: fix calculating file offset for atom when allocating --- src/link/MachO.zig | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 705e4d2ee6..f4c3e17cad 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1929,7 +1929,19 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 const expand_section = atom_placement == null or atom_placement.?.next == null; if (expand_section) { - const max_size = seg.allocatedSize(vaddr - pagezero_vmsize); + const sect_offset: u64 = blk: { + if (self.data_segment_cmd_index.? == match.seg) { + if (self.bss_section_index) |idx| { + if (idx == match.sect) break :blk self.bss_file_offset.?; + } + if (self.tlv_bss_section_index) |idx| { + if (idx == match.sect) break :blk self.tlv_bss_file_offset.?; + } + } + break :blk sect.offset; + }; + const file_offset = sect_offset + vaddr - sect.addr; + const max_size = seg.allocatedSize(file_offset); log.debug(" (atom size 0x{x}, max available size 0x{x})", .{ atom.size, max_size }); assert(atom.size <= max_size); // TODO must expand the section } From 4741c04254790fedcebdce7d13b27c6ba31ac412 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 2 Sep 2021 00:44:33 +0200 Subject: [PATCH 43/78] macho: better spec for sections which don't require padding --- src/link/MachO.zig | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index f4c3e17cad..4d2db62002 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1905,11 +1905,21 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 const vaddr = outer: { if (!use_stage1) { const sym = &self.locals.items[atom.local_sym_index]; - const needs_padding = blk: { - // TODO is __text the only section that benefits from padding? - if (match.seg == self.text_segment_cmd_index.? and - match.sect == self.text_section_index.?) break :blk true; - break :blk false; + // Padding is not required for pointer-type sections and any synthetic sections such as + // stubs or stub_helper. + // TODO audit this. + const needs_padding = switch (commands.sectionType(sect.*)) { + macho.S_SYMBOL_STUBS, + macho.S_NON_LAZY_SYMBOL_POINTERS, + macho.S_LAZY_SYMBOL_POINTERS, + macho.S_LITERAL_POINTERS, + macho.S_THREAD_LOCAL_VARIABLES, + => false, + else => blk: { + if (match.seg == self.text_segment_cmd_index.? and + match.sect == self.stub_helper_section_index.?) break :blk false; + break :blk true; + }, }; var atom_placement: ?*TextBlock = null; From 5af13f35f98c75d063cadf5e5607d066cbdb3e0e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 2 Sep 2021 18:19:07 +0200 Subject: [PATCH 44/78] macho: implement basic section movement and reallocation --- src/link/MachO.zig | 93 ++++++++++++++++++++++++++++++++++--- src/link/MachO/commands.zig | 2 +- 2 files changed, 87 insertions(+), 8 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 4d2db62002..3f6ceed38a 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1925,7 +1925,7 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 var atom_placement: ?*TextBlock = null; // TODO converge with `allocateTextBlock` and handle free list - const vaddr = if (self.blocks.get(match)) |last| blk: { + var vaddr = if (self.blocks.get(match)) |last| blk: { const last_atom_sym = self.locals.items[last.local_sym_index]; const ideal_capacity = if (needs_padding) padToIdeal(last.size) else last.size; const ideal_capacity_end_vaddr = last_atom_sym.n_value + ideal_capacity; @@ -1939,6 +1939,7 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 const expand_section = atom_placement == null or atom_placement.?.next == null; if (expand_section) { + const needed_size = (vaddr + atom.size) - sect.addr; const sect_offset: u64 = blk: { if (self.data_segment_cmd_index.? == match.seg) { if (self.bss_section_index) |idx| { @@ -1952,8 +1953,35 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 }; const file_offset = sect_offset + vaddr - sect.addr; const max_size = seg.allocatedSize(file_offset); - log.debug(" (atom size 0x{x}, max available size 0x{x})", .{ atom.size, max_size }); - assert(atom.size <= max_size); // TODO must expand the section + log.debug(" (section {s},{s} needed size 0x{x}, max available size 0x{x})", .{ + commands.segmentName(sect.*), + commands.sectionName(sect.*), + needed_size, + max_size, + }); + + if (needed_size > max_size) { + const old_base_addr = sect.addr; + sect.size = 0; + const padding: ?u64 = if (match.seg == self.text_segment_cmd_index.?) self.header_pad else null; + const new_offset = @intCast(u32, seg.findFreeSpace(needed_size, atom.alignment, padding)); + sect.offset = new_offset; + sect.addr = seg.inner.vmaddr + sect.offset - seg.inner.fileoff; + log.debug(" (found new {s},{s} free space from 0x{x} to 0x{x})", .{ + commands.segmentName(sect.*), + commands.sectionName(sect.*), + new_offset, + new_offset + needed_size, + }); + try self.allocateLocalSymbols(match, old_base_addr); + vaddr = @intCast( + u64, + @intCast(i64, vaddr) + @intCast(i64, sect.addr) - @intCast(i64, old_base_addr), + ); + } + + sect.size = needed_size; + self.load_commands_dirty = true; } const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); sym.n_value = vaddr; @@ -2017,6 +2045,32 @@ pub fn writeAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !void { try self.writeLocalSymbol(atom.local_sym_index); } +fn allocateLocalSymbols(self: *MachO, match: MatchingSection, old_base_addr: u64) !void { + var atom = self.blocks.get(match) orelse return; + const seg = self.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + const offset = @intCast(i64, sect.addr) - @intCast(i64, old_base_addr); + + while (true) { + const atom_sym = &self.locals.items[atom.local_sym_index]; + atom_sym.n_value = @intCast(u64, @intCast(i64, atom_sym.n_value) + offset); + + for (atom.aliases.items) |index| { + const alias_sym = &self.locals.items[index]; + alias_sym.n_value = @intCast(u64, @intCast(i64, alias_sym.n_value) + offset); + } + + for (atom.contained.items) |sym_at_off| { + const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; + contained_sym.n_value = @intCast(u64, @intCast(i64, contained_sym.n_value) + offset); + } + + if (atom.prev) |prev| { + atom = prev; + } else break; + } +} + fn allocateGlobalSymbols(self: *MachO) !void { // TODO should we do this in `allocateAtom` (or similar)? Then, we would need to // store the link atom -> globals somewhere. @@ -3943,8 +3997,9 @@ pub fn populateMissingMetadata(self: *MachO) !void { if (self.text_segment_cmd_index == null) { self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); const program_code_size_hint = self.base.options.program_code_size_hint; + // const program_code_size_hint = 10; const got_size_hint = @sizeOf(u64) * self.base.options.symbol_count_hint; - const ideal_size = self.header_pad + program_code_size_hint + 3 * got_size_hint; + const ideal_size = self.header_pad + program_code_size_hint + got_size_hint; const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); log.debug("found __TEXT segment free space 0x{x} to 0x{x}", .{ 0, needed_size }); @@ -3971,6 +4026,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { else => unreachable, // unhandled architecture type }; const needed_size = self.base.options.program_code_size_hint; + // const needed_size = 10; self.text_section_index = try self.allocateSection( self.text_segment_cmd_index.?, "__text", @@ -4521,7 +4577,7 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, // First we look for an appropriately sized free list node. // The list is unordered. We'll just take the first thing that works. - const vaddr = blk: { + var vaddr = blk: { var i: usize = 0; while (i < text_block_free_list.items.len) { const big_block = text_block_free_list.items[i]; @@ -4576,8 +4632,31 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, if (expand_text_section) { const needed_size = (vaddr + new_block_size) - text_section.addr; const max_size = text_segment.allocatedSize(vaddr - pagezero_vmsize); - log.debug(" (atom needed size 0x{x}, max available size 0x{x})", .{ needed_size, max_size }); - assert(needed_size <= max_size); // TODO must expand the section + log.debug(" (section __TEXT,__text needed size 0x{x}, max available size 0x{x})", .{ needed_size, max_size }); + + if (needed_size > max_size) { + const old_base_addr = text_section.addr; + text_section.size = 0; + const new_offset = @intCast(u32, text_segment.findFreeSpace(needed_size, alignment, self.header_pad)); + text_section.offset = new_offset; + text_section.addr = text_segment.inner.vmaddr + text_section.offset - text_segment.inner.fileoff; + log.debug(" (found new __TEXT,__text free space from 0x{x} to 0x{x})", .{ + new_offset, + new_offset + needed_size, + }); + try self.allocateLocalSymbols(.{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }, old_base_addr); + vaddr = @intCast( + u64, + @intCast(i64, vaddr) + @intCast(i64, text_section.addr) - @intCast(i64, old_base_addr), + ); + } + + text_section.size = needed_size; + self.load_commands_dirty = true; + _ = try self.blocks.put(self.base.allocator, match, text_block); } text_block.size = new_block_size; diff --git a/src/link/MachO/commands.zig b/src/link/MachO/commands.zig index 7bb1b12c32..77d2b942a8 100644 --- a/src/link/MachO/commands.zig +++ b/src/link/MachO/commands.zig @@ -267,7 +267,7 @@ pub const SegmentCommand = struct { return null; } - pub fn findFreeSpace(self: SegmentCommand, object_size: u64, min_alignment: u32, start: ?u64) u64 { + pub fn findFreeSpace(self: SegmentCommand, object_size: u64, min_alignment: u64, start: ?u64) u64 { var offset: u64 = if (start) |v| v else self.inner.fileoff; while (self.detectAllocCollision(offset, object_size)) |item_end| { offset = mem.alignForwardGeneric(u64, item_end, min_alignment); From 4b07da7090542292f77a11bcfe1ef79e8baf1e97 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 3 Sep 2021 00:21:45 +0200 Subject: [PATCH 45/78] macho: remove all Zld codepaths --- src/link/MachO.zig | 764 ++++----------------------------------------- 1 file changed, 68 insertions(+), 696 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 3f6ceed38a..c8058f1525 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -761,43 +761,34 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.addDataInCodeLC(); try self.addCodeSignatureLC(); - if (use_stage1) { - try self.parseTextBlocks(); - try self.allocateTextSegment(); - try self.allocateDataConstSegment(); - try self.allocateDataSegment(); - self.allocateLinkeditSegment(); - try self.allocateTextBlocks(); - try self.flushZld(); - } else { - try self.parseTextBlocks(); - try self.allocateGlobalSymbols(); - { - log.debug("locals:", .{}); - for (self.locals.items) |sym| { - log.debug(" {s}: {}", .{ self.getString(sym.n_strx), sym }); - } - log.debug("globals:", .{}); - for (self.globals.items) |sym| { - log.debug(" {s}: {}", .{ self.getString(sym.n_strx), sym }); - } - log.debug("undefs:", .{}); - for (self.undefs.items) |sym| { - log.debug(" {s}: {}", .{ self.getString(sym.n_strx), sym }); - } - log.debug("unresolved:", .{}); - for (self.unresolved.keys()) |key| { - log.debug(" {d} => {s}", .{ key, self.unresolved.get(key).? }); - } - log.debug("resolved:", .{}); - var it = self.symbol_resolver.iterator(); - while (it.next()) |entry| { - log.debug(" {s} => {}", .{ self.getString(entry.key_ptr.*), entry.value_ptr.* }); - } + try self.parseTextBlocks(); + try self.allocateGlobalSymbols(); + { + log.debug("locals:", .{}); + for (self.locals.items) |sym| { + log.debug(" {s}: {}", .{ self.getString(sym.n_strx), sym }); + } + log.debug("globals:", .{}); + for (self.globals.items) |sym| { + log.debug(" {s}: {}", .{ self.getString(sym.n_strx), sym }); + } + log.debug("undefs:", .{}); + for (self.undefs.items) |sym| { + log.debug(" {s}: {}", .{ self.getString(sym.n_strx), sym }); + } + log.debug("unresolved:", .{}); + for (self.unresolved.keys()) |key| { + log.debug(" {d} => {s}", .{ key, self.unresolved.get(key).? }); + } + log.debug("resolved:", .{}); + var it = self.symbol_resolver.iterator(); + while (it.next()) |entry| { + log.debug(" {s} => {}", .{ self.getString(entry.key_ptr.*), entry.value_ptr.* }); } - try self.writeAtoms(); - try self.flushModule(comp); } + try self.writeAtoms(); + try self.writeDices(); + try self.flushModule(comp); } if (!self.base.options.disable_lld_caching) { @@ -1638,248 +1629,6 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio return res; } -fn allocateTextSegment(self: *MachO) !void { - const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const base_vmaddr = self.load_commands.items[self.pagezero_segment_cmd_index.?].Segment.inner.vmsize; - seg.inner.fileoff = 0; - seg.inner.vmaddr = base_vmaddr; - - var sizeofcmds: u64 = 0; - for (self.load_commands.items) |lc| { - sizeofcmds += lc.cmdsize(); - } - - try self.allocateSegment(self.text_segment_cmd_index.?, @sizeOf(macho.mach_header_64) + sizeofcmds); - - // Shift all sections to the back to minimize jump size between __TEXT and __DATA segments. - var min_alignment: u32 = 0; - for (seg.sections.items) |sect| { - const alignment = try math.powi(u32, 2, sect.@"align"); - min_alignment = math.max(min_alignment, alignment); - } - - assert(min_alignment > 0); - const last_sect_idx = seg.sections.items.len - 1; - const last_sect = seg.sections.items[last_sect_idx]; - const shift: u32 = blk: { - const diff = seg.inner.filesize - last_sect.offset - last_sect.size; - const factor = @divTrunc(diff, min_alignment); - break :blk @intCast(u32, factor * min_alignment); - }; - - if (shift > 0) { - for (seg.sections.items) |*sect| { - sect.offset += shift; - sect.addr += shift; - } - } -} - -fn allocateDataConstSegment(self: *MachO) !void { - const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - seg.inner.fileoff = text_seg.inner.fileoff + text_seg.inner.filesize; - seg.inner.vmaddr = text_seg.inner.vmaddr + text_seg.inner.vmsize; - try self.allocateSegment(self.data_const_segment_cmd_index.?, 0); -} - -fn allocateDataSegment(self: *MachO) !void { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - seg.inner.fileoff = data_const_seg.inner.fileoff + data_const_seg.inner.filesize; - seg.inner.vmaddr = data_const_seg.inner.vmaddr + data_const_seg.inner.vmsize; - try self.allocateSegment(self.data_segment_cmd_index.?, 0); -} - -fn allocateLinkeditSegment(self: *MachO) void { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const data_seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - seg.inner.fileoff = data_seg.inner.fileoff + data_seg.inner.filesize; - seg.inner.vmaddr = data_seg.inner.vmaddr + data_seg.inner.vmsize; -} - -fn allocateSegment(self: *MachO, index: u16, offset: u64) !void { - const seg = &self.load_commands.items[index].Segment; - - // Allocate the sections according to their alignment at the beginning of the segment. - var start: u64 = offset; - for (seg.sections.items) |*sect, sect_id| { - const alignment = try math.powi(u32, 2, sect.@"align"); - const start_aligned = mem.alignForwardGeneric(u64, start, alignment); - const end_aligned = mem.alignForwardGeneric(u64, start_aligned + sect.size, alignment); - const file_offset = @intCast(u32, seg.inner.fileoff + start_aligned); - - blk: { - if (index == self.data_segment_cmd_index.?) { - if (self.bss_section_index) |idx| { - if (sect_id == idx) { - self.bss_file_offset = file_offset; - break :blk; - } - } - if (self.tlv_bss_section_index) |idx| { - if (sect_id == idx) { - self.tlv_bss_file_offset = file_offset; - break :blk; - } - } - } - sect.offset = @intCast(u32, seg.inner.fileoff + start_aligned); - } - - sect.addr = seg.inner.vmaddr + start_aligned; - start = end_aligned; - } - - const seg_size_aligned = mem.alignForwardGeneric(u64, start, self.page_size); - seg.inner.filesize = seg_size_aligned; - seg.inner.vmsize = seg_size_aligned; -} - -fn allocateTextBlocks(self: *MachO) !void { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - // Find the first block - while (block.prev) |prev| { - block = prev; - } - - const seg = self.load_commands.items[match.seg].Segment; - const sect = seg.sections.items[match.sect]; - - var base_addr: u64 = sect.addr; - const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - - log.debug(" within section {s},{s}", .{ commands.segmentName(sect), commands.sectionName(sect) }); - log.debug(" {}", .{sect}); - - while (true) { - const block_alignment = try math.powi(u32, 2, block.alignment); - base_addr = mem.alignForwardGeneric(u64, base_addr, block_alignment); - - const sym = &self.locals.items[block.local_sym_index]; - sym.n_value = base_addr; - sym.n_sect = n_sect; - - log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ - self.getString(sym.n_strx), - base_addr, - base_addr + block.size, - block.size, - block.alignment, - }); - - // Update each alias (if any) - for (block.aliases.items) |index| { - const alias_sym = &self.locals.items[index]; - alias_sym.n_value = base_addr; - alias_sym.n_sect = n_sect; - } - - // Update each symbol contained within the TextBlock - for (block.contained.items) |sym_at_off| { - const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; - contained_sym.n_value = base_addr + sym_at_off.offset; - contained_sym.n_sect = n_sect; - } - - base_addr += block.size; - - if (block.next) |next| { - block = next; - } else break; - } - } - - // Update globals - { - var sym_it = self.symbol_resolver.valueIterator(); - while (sym_it.next()) |resolv| { - if (resolv.where != .global) continue; - - assert(resolv.local_sym_index != 0); - const local_sym = self.locals.items[resolv.local_sym_index]; - const sym = &self.globals.items[resolv.where_index]; - sym.n_value = local_sym.n_value; - sym.n_sect = local_sym.n_sect; - } - } -} - -fn writeTextBlocks(self: *MachO) !void { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - while (block.prev) |prev| { - block = prev; - } - - const seg = self.load_commands.items[match.seg].Segment; - const sect = seg.sections.items[match.sect]; - const sect_type = commands.sectionType(sect); - - log.debug(" for section {s},{s}", .{ commands.segmentName(sect), commands.sectionName(sect) }); - log.debug(" {}", .{sect}); - - var code = try self.base.allocator.alloc(u8, sect.size); - defer self.base.allocator.free(code); - - const file_offset: u64 = blk: { - if (self.data_segment_cmd_index.? == match.seg) { - if (self.bss_section_index) |idx| { - if (idx == match.sect) break :blk self.bss_file_offset.?; - } - if (self.tlv_bss_section_index) |idx| { - if (idx == match.sect) break :blk self.tlv_bss_file_offset.?; - } - } - break :blk sect.offset; - }; - - if (sect_type == macho.S_ZEROFILL or sect_type == macho.S_THREAD_LOCAL_ZEROFILL) { - mem.set(u8, code, 0); - } else { - var base_off: u64 = 0; - - while (true) { - const block_alignment = try math.powi(u32, 2, block.alignment); - const aligned_base_off = mem.alignForwardGeneric(u64, base_off, block_alignment); - - const sym = self.locals.items[block.local_sym_index]; - log.debug(" {s}: start=0x{x}, end=0x{x}, size={}, align={}", .{ - self.getString(sym.n_strx), - aligned_base_off, - aligned_base_off + block.size, - block.size, - block.alignment, - }); - - try block.resolveRelocs(self); - mem.copy(u8, code[aligned_base_off..][0..block.size], block.code.items); - - // TODO NOP for machine code instead of just zeroing out - const padding_len = aligned_base_off - base_off; - mem.set(u8, code[base_off..][0..padding_len], 0); - - base_off = aligned_base_off + block.size; - - if (block.next) |next| { - block = next; - } else break; - } - - mem.set(u8, code[base_off..], 0); - } - - try self.base.file.?.pwriteAll(code, file_offset); - } -} - pub fn createEmptyAtom(self: *MachO, local_sym_index: u32, size: u64, alignment: u32) !*TextBlock { const code = try self.base.allocator.alloc(u8, size); defer self.base.allocator.free(code); @@ -2920,17 +2669,23 @@ fn parseTextBlocks(self: *MachO) !void { } fn addDataInCodeLC(self: *MachO) !void { - if (self.data_in_code_cmd_index == null) { - self.data_in_code_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .LinkeditData = .{ - .cmd = macho.LC_DATA_IN_CODE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - } + if (self.data_in_code_cmd_index != null) return; + self.data_in_code_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .LinkeditData = .{ + .cmd = macho.LC_DATA_IN_CODE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = 0, + .datasize = 0, + }, + }); + const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].LinkeditData; + const needed_size = 10 * @sizeOf(macho.data_in_code_entry); + const dataoff = self.findFreeSpaceLinkedit(needed_size, @alignOf(macho.data_in_code_entry), null); + log.debug("found data-in-code free space 0x{x} to 0x{x}", .{ dataoff, dataoff + needed_size }); + dice_cmd.dataoff = @intCast(u32, dataoff); + dice_cmd.datasize = needed_size; + self.load_commands_dirty = true; } fn addCodeSignatureLC(self: *MachO) !void { @@ -2982,42 +2737,6 @@ fn addLoadDylibLCs(self: *MachO) !void { } } -fn flushZld(self: *MachO) !void { - try self.writeTextBlocks(); - try self.setEntryPoint(); - try self.writeRebaseInfoTableZld(); - try self.writeBindInfoTableZld(); - try self.writeLazyBindInfoTableZld(); - try self.writeExportInfoZld(); - try self.writeDices(); - - { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - symtab.symoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - } - - try self.writeSymbolTable(); - try self.writeStringTableZld(); - - { - // Seal __LINKEDIT size - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); - } - - if (self.requires_adhoc_codesig) { - try self.writeCodeSignaturePadding(); - } - - try self.writeLoadCommands(); - try self.writeHeader(); - - if (self.requires_adhoc_codesig) { - try self.writeCodeSignature(); - } -} - fn setEntryPoint(self: *MachO) !void { if (self.base.options.output_mode != .Exe) return; @@ -3039,343 +2758,6 @@ fn setEntryPoint(self: *MachO) !void { self.load_commands_dirty = true; } -fn writeRebaseInfoTableZld(self: *MachO) !void { - var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); - defer pointers.deinit(); - - { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable - - const seg = self.load_commands.items[match.seg].Segment; - - while (true) { - const sym = self.locals.items[block.local_sym_index]; - const base_offset = sym.n_value - seg.inner.vmaddr; - - for (block.rebases.items) |offset| { - try pointers.append(.{ - .offset = base_offset + offset, - .segment_id = match.seg, - }); - } - - if (block.prev) |prev| { - block = prev; - } else break; - } - } - } - - const size = try bind.rebaseInfoSize(pointers.items); - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); - defer self.base.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try bind.writeRebaseInfo(pointers.items, stream.writer()); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.rebase_off = @intCast(u32, seg.inner.fileoff); - dyld_info.rebase_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @sizeOf(u64))); - seg.inner.filesize += dyld_info.rebase_size; - - log.debug("writing rebase info from 0x{x} to 0x{x}", .{ dyld_info.rebase_off, dyld_info.rebase_off + dyld_info.rebase_size }); - - try self.base.file.?.pwriteAll(buffer, dyld_info.rebase_off); -} - -fn writeBindInfoTableZld(self: *MachO) !void { - var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); - defer pointers.deinit(); - - { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable - - const seg = self.load_commands.items[match.seg].Segment; - - while (true) { - const sym = self.locals.items[block.local_sym_index]; - const base_offset = sym.n_value - seg.inner.vmaddr; - - for (block.bindings.items) |binding| { - const bind_sym = self.undefs.items[binding.local_sym_index]; - try pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = match.seg, - .dylib_ordinal = @divExact(bind_sym.n_desc, macho.N_SYMBOL_RESOLVER), - .name = self.getString(bind_sym.n_strx), - }); - } - - if (block.prev) |prev| { - block = prev; - } else break; - } - } - } - - const size = try bind.bindInfoSize(pointers.items); - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); - defer self.base.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try bind.writeBindInfo(pointers.items, stream.writer()); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.bind_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dyld_info.bind_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); - seg.inner.filesize += dyld_info.bind_size; - - log.debug("writing binding info from 0x{x} to 0x{x}", .{ dyld_info.bind_off, dyld_info.bind_off + dyld_info.bind_size }); - - try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off); -} - -fn writeLazyBindInfoTableZld(self: *MachO) !void { - var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); - defer pointers.deinit(); - - if (self.la_symbol_ptr_section_index) |sect| blk: { - var atom = self.blocks.get(.{ - .seg = self.data_segment_cmd_index.?, - .sect = sect, - }) orelse break :blk; - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - - while (true) { - const sym = self.locals.items[atom.local_sym_index]; - const base_offset = sym.n_value - seg.inner.vmaddr; - - for (atom.lazy_bindings.items) |binding| { - const bind_sym = self.undefs.items[binding.local_sym_index]; - try pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = self.data_segment_cmd_index.?, - .dylib_ordinal = @divExact(bind_sym.n_desc, macho.N_SYMBOL_RESOLVER), - .name = self.getString(bind_sym.n_strx), - }); - } - if (atom.prev) |prev| { - atom = prev; - } else break; - } - } - - const size = try bind.lazyBindInfoSize(pointers.items); - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); - defer self.base.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try bind.writeLazyBindInfo(pointers.items, stream.writer()); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.lazy_bind_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dyld_info.lazy_bind_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); - seg.inner.filesize += dyld_info.lazy_bind_size; - - log.debug("writing lazy binding info from 0x{x} to 0x{x}", .{ dyld_info.lazy_bind_off, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size }); - - try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off); - try self.populateLazyBindOffsetsInStubHelper(buffer); -} - -fn writeExportInfoZld(self: *MachO) !void { - var trie: Trie = .{}; - defer trie.deinit(self.base.allocator); - - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const base_address = text_segment.inner.vmaddr; - - // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. - log.debug("writing export trie", .{}); - - for (self.globals.items) |sym| { - const sym_name = self.getString(sym.n_strx); - log.debug(" | putting '{s}' defined at 0x{x}", .{ sym_name, sym.n_value }); - - try trie.put(self.base.allocator, .{ - .name = sym_name, - .vmaddr_offset = sym.n_value - base_address, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, - }); - } - - try trie.finalize(self.base.allocator); - - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, trie.size)); - defer self.base.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - const nwritten = try trie.write(stream.writer()); - assert(nwritten == trie.size); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - dyld_info.export_off = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dyld_info.export_size = @intCast(u32, mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64))); - seg.inner.filesize += dyld_info.export_size; - - log.debug("writing export info from 0x{x} to 0x{x}", .{ dyld_info.export_off, dyld_info.export_off + dyld_info.export_size }); - - try self.base.file.?.pwriteAll(buffer, dyld_info.export_off); -} - -fn writeSymbolTable(self: *MachO) !void { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - - var locals = std.ArrayList(macho.nlist_64).init(self.base.allocator); - defer locals.deinit(); - try locals.appendSlice(self.locals.items); - - if (self.has_stabs) { - for (self.objects.items) |object| { - if (object.debug_info == null) continue; - - // Open scope - try locals.ensureUnusedCapacity(3); - locals.appendAssumeCapacity(.{ - .n_strx = try self.makeString(object.tu_comp_dir.?), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.makeString(object.tu_name.?), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.makeString(object.name), - .n_type = macho.N_OSO, - .n_sect = 0, - .n_desc = 1, - .n_value = object.mtime orelse 0, - }); - - for (object.text_blocks.items) |block| { - if (block.stab) |stab| { - const nlists = try stab.asNlists(block.local_sym_index, self); - defer self.base.allocator.free(nlists); - try locals.appendSlice(nlists); - } else { - for (block.contained.items) |sym_at_off| { - const stab = sym_at_off.stab orelse continue; - const nlists = try stab.asNlists(sym_at_off.local_sym_index, self); - defer self.base.allocator.free(nlists); - try locals.appendSlice(nlists); - } - } - } - - // Close scope - try locals.append(.{ - .n_strx = 0, - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - } - } - - const nlocals = locals.items.len; - const nexports = self.globals.items.len; - const nundefs = self.undefs.items.len; - - const locals_off = symtab.symoff; - const locals_size = nlocals * @sizeOf(macho.nlist_64); - log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); - - const exports_off = locals_off + locals_size; - const exports_size = nexports * @sizeOf(macho.nlist_64); - log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.globals.items), exports_off); - - const undefs_off = exports_off + exports_size; - const undefs_size = nundefs * @sizeOf(macho.nlist_64); - log.debug("writing undefined symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.undefs.items), undefs_off); - - symtab.nsyms = @intCast(u32, nlocals + nexports + nundefs); - seg.inner.filesize += locals_size + exports_size + undefs_size; - - // Update dynamic symbol table. - const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - dysymtab.nlocalsym = @intCast(u32, nlocals); - dysymtab.iextdefsym = dysymtab.nlocalsym; - dysymtab.nextdefsym = @intCast(u32, nexports); - dysymtab.iundefsym = dysymtab.nlocalsym + dysymtab.nextdefsym; - dysymtab.nundefsym = @intCast(u32, nundefs); - - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stubs = &text_segment.sections.items[self.stubs_section_index.?]; - const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const got = &data_const_segment.sections.items[self.got_section_index.?]; - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - - const nstubs = @intCast(u32, self.stubs_map.keys().len); - const ngot_entries = @intCast(u32, self.got_entries_map.keys().len); - - dysymtab.indirectsymoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - dysymtab.nindirectsyms = nstubs * 2 + ngot_entries; - - const needed_size = dysymtab.nindirectsyms * @sizeOf(u32); - seg.inner.filesize += needed_size; - - log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ - dysymtab.indirectsymoff, - dysymtab.indirectsymoff + needed_size, - }); - - var buf = try self.base.allocator.alloc(u8, needed_size); - defer self.base.allocator.free(buf); - - var stream = std.io.fixedBufferStream(buf); - var writer = stream.writer(); - - stubs.reserved1 = 0; - for (self.stubs_map.keys()) |key| { - try writer.writeIntLittle(u32, dysymtab.iundefsym + key); - } - - got.reserved1 = nstubs; - for (self.got_entries_map.keys()) |key| { - switch (key.where) { - .undef => { - try writer.writeIntLittle(u32, dysymtab.iundefsym + key.where_index); - }, - .local => { - try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); - }, - } - } - - la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; - for (self.stubs_map.keys()) |key| { - try writer.writeIntLittle(u32, dysymtab.iundefsym + key); - } - - try self.base.file.?.pwriteAll(buf, dysymtab.indirectsymoff); -} - pub fn deinit(self: *MachO) void { if (build_options.have_llvm) { if (self.llvm_object) |llvm_object| llvm_object.destroy(self.base.allocator); @@ -3812,12 +3194,6 @@ fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 try ds.writeLocalSymbol(decl.link.macho.local_sym_index); } - // // Resolve relocations - // try decl.link.macho.resolveRelocs(self); - // // TODO this requires further investigation: should we dispose of resolved relocs, or keep them - // // so that we can reapply them when moving/growing sections? - // decl.link.macho.relocs.clearAndFree(self.base.allocator); - return symbol; } @@ -5003,9 +4379,8 @@ fn writeIndirectSymbolTable(self: *MachO) !void { fn writeDices(self: *MachO) !void { if (!self.has_dices) return; - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].LinkeditData; - const fileoff = seg.inner.fileoff + seg.inner.filesize; + const tracy = trace(@src()); + defer tracy.end(); var buf = std.ArrayList(u8).init(self.base.allocator); defer buf.deinit(); @@ -5043,15 +4418,26 @@ fn writeDices(self: *MachO) !void { } else break; } - const datasize = @intCast(u32, buf.items.len); + const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].LinkeditData; + const allocated_size = self.allocatedSizeLinkedit(dice_cmd.dataoff); + const needed_size = @intCast(u32, buf.items.len); - dice_cmd.dataoff = @intCast(u32, fileoff); - dice_cmd.datasize = datasize; - seg.inner.filesize += datasize; + if (needed_size > allocated_size) { + dice_cmd.datasize = 0; + dice_cmd.dataoff = @intCast(u32, self.findFreeSpaceLinkedit( + needed_size, + @alignOf(macho.data_in_code_entry), + dice_cmd.dataoff, + )); + } + dice_cmd.datasize = needed_size; + log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ + dice_cmd.dataoff, + dice_cmd.dataoff + dice_cmd.datasize, + }); - log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ fileoff, fileoff + datasize }); - - try self.base.file.?.pwriteAll(buf.items, fileoff); + try self.base.file.?.pwriteAll(buf.items, dice_cmd.dataoff); + self.load_commands_dirty = true; } fn writeCodeSignaturePadding(self: *MachO) !void { @@ -5130,7 +4516,7 @@ fn writeExportInfo(self: *MachO) !void { for (self.globals.items) |sym| { const sym_name = self.getString(sym.n_strx); - log.debug(" | putting '{s}' defined at 0x{x}", .{ sym_name, sym.n_value }); + log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); try trie.put(self.base.allocator, .{ .name = sym_name, @@ -5453,30 +4839,16 @@ fn writeStringTable(self: *MachO) !void { self.strtab_needs_relocation = false; } symtab.strsize = @intCast(u32, needed_size); - log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); + log.debug("writing string table from 0x{x} to 0x{x}", .{ + symtab.stroff, + symtab.stroff + symtab.strsize, + }); try self.base.file.?.pwriteAll(self.strtab.items, symtab.stroff); self.load_commands_dirty = true; self.strtab_dirty = false; } -fn writeStringTableZld(self: *MachO) !void { - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - symtab.stroff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - symtab.strsize = @intCast(u32, mem.alignForwardGeneric(u64, self.strtab.items.len, @alignOf(u64))); - seg.inner.filesize += symtab.strsize; - - log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - - try self.base.file.?.pwriteAll(self.strtab.items, symtab.stroff); - - if (symtab.strsize > self.strtab.items.len) { - // This is potentially the last section, so we need to pad it out. - try self.base.file.?.pwriteAll(&[_]u8{0}, seg.inner.fileoff + seg.inner.filesize - 1); - } -} - fn updateLinkeditSegmentSizes(self: *MachO) !void { if (!self.load_commands_dirty) return; From a783f3a36952449f11b7e763490bdb4076bd62c0 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 3 Sep 2021 00:25:47 +0200 Subject: [PATCH 46/78] macho: remove obsolete mentions of stage1 --- src/link/MachO.zig | 229 +++++++++++++++++++++------------------------ 1 file changed, 106 insertions(+), 123 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index c8058f1525..e9b98d6ca9 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1649,118 +1649,104 @@ pub fn createEmptyAtom(self: *MachO, local_sym_index: u32, size: u64, alignment: pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 { const seg = &self.load_commands.items[match.seg].Segment; const sect = &seg.sections.items[match.sect]; - const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; - const vaddr = outer: { - if (!use_stage1) { - const sym = &self.locals.items[atom.local_sym_index]; - // Padding is not required for pointer-type sections and any synthetic sections such as - // stubs or stub_helper. - // TODO audit this. - const needs_padding = switch (commands.sectionType(sect.*)) { - macho.S_SYMBOL_STUBS, - macho.S_NON_LAZY_SYMBOL_POINTERS, - macho.S_LAZY_SYMBOL_POINTERS, - macho.S_LITERAL_POINTERS, - macho.S_THREAD_LOCAL_VARIABLES, - => false, - else => blk: { - if (match.seg == self.text_segment_cmd_index.? and - match.sect == self.stub_helper_section_index.?) break :blk false; - break :blk true; - }, - }; - - var atom_placement: ?*TextBlock = null; - - // TODO converge with `allocateTextBlock` and handle free list - var vaddr = if (self.blocks.get(match)) |last| blk: { - const last_atom_sym = self.locals.items[last.local_sym_index]; - const ideal_capacity = if (needs_padding) padToIdeal(last.size) else last.size; - const ideal_capacity_end_vaddr = last_atom_sym.n_value + ideal_capacity; - const last_atom_alignment = try math.powi(u32, 2, atom.alignment); - const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, last_atom_alignment); - atom_placement = last; - break :blk new_start_vaddr; - } else sect.addr; - - log.debug("allocating atom for symbol {s} at address 0x{x}", .{ self.getString(sym.n_strx), vaddr }); - - const expand_section = atom_placement == null or atom_placement.?.next == null; - if (expand_section) { - const needed_size = (vaddr + atom.size) - sect.addr; - const sect_offset: u64 = blk: { - if (self.data_segment_cmd_index.? == match.seg) { - if (self.bss_section_index) |idx| { - if (idx == match.sect) break :blk self.bss_file_offset.?; - } - if (self.tlv_bss_section_index) |idx| { - if (idx == match.sect) break :blk self.tlv_bss_file_offset.?; - } - } - break :blk sect.offset; - }; - const file_offset = sect_offset + vaddr - sect.addr; - const max_size = seg.allocatedSize(file_offset); - log.debug(" (section {s},{s} needed size 0x{x}, max available size 0x{x})", .{ - commands.segmentName(sect.*), - commands.sectionName(sect.*), - needed_size, - max_size, - }); - - if (needed_size > max_size) { - const old_base_addr = sect.addr; - sect.size = 0; - const padding: ?u64 = if (match.seg == self.text_segment_cmd_index.?) self.header_pad else null; - const new_offset = @intCast(u32, seg.findFreeSpace(needed_size, atom.alignment, padding)); - sect.offset = new_offset; - sect.addr = seg.inner.vmaddr + sect.offset - seg.inner.fileoff; - log.debug(" (found new {s},{s} free space from 0x{x} to 0x{x})", .{ - commands.segmentName(sect.*), - commands.sectionName(sect.*), - new_offset, - new_offset + needed_size, - }); - try self.allocateLocalSymbols(match, old_base_addr); - vaddr = @intCast( - u64, - @intCast(i64, vaddr) + @intCast(i64, sect.addr) - @intCast(i64, old_base_addr), - ); - } - - sect.size = needed_size; - self.load_commands_dirty = true; - } - const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - sym.n_value = vaddr; - sym.n_sect = n_sect; - - // Update each alias (if any) - for (atom.aliases.items) |index| { - const alias_sym = &self.locals.items[index]; - alias_sym.n_value = vaddr; - alias_sym.n_sect = n_sect; - } - - // Update each symbol contained within the TextBlock - for (atom.contained.items) |sym_at_off| { - const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; - contained_sym.n_value = vaddr + sym_at_off.offset; - contained_sym.n_sect = n_sect; - } - - break :outer vaddr; - } else { - const new_alignment = math.max(sect.@"align", atom.alignment); - const new_alignment_pow_2 = try math.powi(u32, 2, new_alignment); - const new_size = mem.alignForwardGeneric(u64, sect.size, new_alignment_pow_2) + atom.size; - sect.size = new_size; - sect.@"align" = new_alignment; - break :outer 0; - } + const sym = &self.locals.items[atom.local_sym_index]; + // Padding is not required for pointer-type sections and any synthetic sections such as + // stubs or stub_helper. + // TODO audit this. + const needs_padding = switch (commands.sectionType(sect.*)) { + macho.S_SYMBOL_STUBS, + macho.S_NON_LAZY_SYMBOL_POINTERS, + macho.S_LAZY_SYMBOL_POINTERS, + macho.S_LITERAL_POINTERS, + macho.S_THREAD_LOCAL_VARIABLES, + => false, + else => blk: { + if (match.seg == self.text_segment_cmd_index.? and + match.sect == self.stub_helper_section_index.?) break :blk false; + break :blk true; + }, }; + var atom_placement: ?*TextBlock = null; + + // TODO converge with `allocateTextBlock` and handle free list + var vaddr = if (self.blocks.get(match)) |last| blk: { + const last_atom_sym = self.locals.items[last.local_sym_index]; + const ideal_capacity = if (needs_padding) padToIdeal(last.size) else last.size; + const ideal_capacity_end_vaddr = last_atom_sym.n_value + ideal_capacity; + const last_atom_alignment = try math.powi(u32, 2, atom.alignment); + const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, last_atom_alignment); + atom_placement = last; + break :blk new_start_vaddr; + } else sect.addr; + + log.debug("allocating atom for symbol {s} at address 0x{x}", .{ self.getString(sym.n_strx), vaddr }); + + const expand_section = atom_placement == null or atom_placement.?.next == null; + if (expand_section) { + const needed_size = (vaddr + atom.size) - sect.addr; + const sect_offset: u64 = blk: { + if (self.data_segment_cmd_index.? == match.seg) { + if (self.bss_section_index) |idx| { + if (idx == match.sect) break :blk self.bss_file_offset.?; + } + if (self.tlv_bss_section_index) |idx| { + if (idx == match.sect) break :blk self.tlv_bss_file_offset.?; + } + } + break :blk sect.offset; + }; + const file_offset = sect_offset + vaddr - sect.addr; + const max_size = seg.allocatedSize(file_offset); + log.debug(" (section {s},{s} needed size 0x{x}, max available size 0x{x})", .{ + commands.segmentName(sect.*), + commands.sectionName(sect.*), + needed_size, + max_size, + }); + + if (needed_size > max_size) { + const old_base_addr = sect.addr; + sect.size = 0; + const padding: ?u64 = if (match.seg == self.text_segment_cmd_index.?) self.header_pad else null; + const new_offset = @intCast(u32, seg.findFreeSpace(needed_size, atom.alignment, padding)); + sect.offset = new_offset; + sect.addr = seg.inner.vmaddr + sect.offset - seg.inner.fileoff; + log.debug(" (found new {s},{s} free space from 0x{x} to 0x{x})", .{ + commands.segmentName(sect.*), + commands.sectionName(sect.*), + new_offset, + new_offset + needed_size, + }); + try self.allocateLocalSymbols(match, old_base_addr); + vaddr = @intCast( + u64, + @intCast(i64, vaddr) + @intCast(i64, sect.addr) - @intCast(i64, old_base_addr), + ); + } + + sect.size = needed_size; + self.load_commands_dirty = true; + } + const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + sym.n_value = vaddr; + sym.n_sect = n_sect; + + // Update each alias (if any) + for (atom.aliases.items) |index| { + const alias_sym = &self.locals.items[index]; + alias_sym.n_value = vaddr; + alias_sym.n_sect = n_sect; + } + + // Update each symbol contained within the TextBlock + for (atom.contained.items) |sym_at_off| { + const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; + contained_sym.n_value = vaddr + sym_at_off.offset; + contained_sym.n_sect = n_sect; + } + if (self.blocks.getPtr(match)) |last| { last.*.next = atom; atom.prev = last.*; @@ -3899,22 +3885,19 @@ fn allocateSection( .reserved2 = opts.reserved2, }; - const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; - if (!use_stage1) { - const alignment_pow_2 = try math.powi(u32, 2, alignment); - const padding: ?u64 = if (segment_id == self.text_segment_cmd_index.?) self.header_pad else null; - const off = seg.findFreeSpace(size, alignment_pow_2, padding); + const alignment_pow_2 = try math.powi(u32, 2, alignment); + const padding: ?u64 = if (segment_id == self.text_segment_cmd_index.?) self.header_pad else null; + const off = seg.findFreeSpace(size, alignment_pow_2, padding); - log.debug("found {s},{s} section free space 0x{x} to 0x{x}", .{ - commands.segmentName(sect), - commands.sectionName(sect), - off, - off + size, - }); + log.debug("found {s},{s} section free space 0x{x} to 0x{x}", .{ + commands.segmentName(sect), + commands.sectionName(sect), + off, + off + size, + }); - sect.addr = seg.inner.vmaddr + off - seg.inner.fileoff; - sect.offset = @intCast(u32, off); - } + sect.addr = seg.inner.vmaddr + off - seg.inner.fileoff; + sect.offset = @intCast(u32, off); const index = @intCast(u16, seg.sections.items.len); try seg.sections.append(self.base.allocator, sect); From 7536a2f8cfdd66fc3b277514ec28ccbedfe83fc1 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 3 Sep 2021 11:47:58 +0200 Subject: [PATCH 47/78] macho: minor fixes to allow the linker to output malformed stage1 --- src/link/MachO.zig | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index e9b98d6ca9..dfd70c4492 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1710,7 +1710,8 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 const old_base_addr = sect.addr; sect.size = 0; const padding: ?u64 = if (match.seg == self.text_segment_cmd_index.?) self.header_pad else null; - const new_offset = @intCast(u32, seg.findFreeSpace(needed_size, atom.alignment, padding)); + const atom_alignment = try math.powi(u64, 2, atom.alignment); + const new_offset = @intCast(u32, seg.findFreeSpace(needed_size, atom_alignment, padding)); sect.offset = new_offset; sect.addr = seg.inner.vmaddr + sect.offset - seg.inner.fileoff; log.debug(" (found new {s},{s} free space from 0x{x} to 0x{x})", .{ @@ -2741,6 +2742,7 @@ fn setEntryPoint(self: *MachO) !void { const ec = &self.load_commands.items[self.main_cmd_index.?].Main; ec.entryoff = @intCast(u32, sym.n_value - seg.inner.vmaddr); ec.stacksize = self.base.options.stack_size_override orelse 0; + self.entry_addr = sym.n_value; self.load_commands_dirty = true; } @@ -3234,15 +3236,7 @@ pub fn updateDeclExports( n_type |= macho.N_PEXT; n_desc |= macho.N_WEAK_DEF; }, - .Strong => { - // Check if the export is _main, and note if os. - // Otherwise, don't do anything since we already have all the flags - // set that we need for global (strong) linkage. - // n_type == N_SECT | N_EXT - if (mem.eql(u8, exp_name, "_main")) { - self.entry_addr = decl_sym.n_value; - } - }, + .Strong => {}, .Weak => { // Weak linkage is specified as part of n_desc field. // Symbol's n_type is like for a symbol with strong linkage. @@ -4244,7 +4238,14 @@ fn relocateSymbolTable(self: *MachO) !void { new_symoff + existing_size, }); - const amt = try self.base.file.?.copyRangeAll(symtab.symoff, self.base.file.?, new_symoff, existing_size); + // TODO copyRangeAll doesn't seem to extend the file beyond its allocated size + try self.base.file.?.pwriteAll(&[_]u8{0}, new_symoff + existing_size - 1); + const amt = try self.base.file.?.copyRangeAll( + symtab.symoff, + self.base.file.?, + new_symoff, + existing_size, + ); if (amt != existing_size) return error.InputOutput; symtab.symoff = @intCast(u32, new_symoff); self.strtab_needs_relocation = true; From 1d2199b71c06e97be407c2bd0b05c07c8bd6cd2c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 3 Sep 2021 13:41:28 +0200 Subject: [PATCH 48/78] macho: dirty export trie when adding globals Similarly, dirty rebase info when adding a GOT atom. --- src/link/MachO.zig | 1 + src/link/MachO/TextBlock.zig | 3 +++ 2 files changed, 4 insertions(+) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index dfd70c4492..cd5a0218d1 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2312,6 +2312,7 @@ fn resolveSymbolsInObject( .local_sym_index = local_sym_index, .file = object_id, }; + self.export_info_dirty = true; } else if (symbolIsTentative(sym)) { // Symbol is a tentative definition. const resolv = self.symbol_resolver.getPtr(n_strx) orelse { diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index 160ba5cd8c..af9bf5b3eb 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -844,6 +844,9 @@ pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: R .sect = context.macho_file.got_section_index.?, }; _ = try context.macho_file.allocateAtom(atom, match); + // TODO don't need both at once + context.macho_file.rebase_info_dirty = true; + context.macho_file.binding_info_dirty = true; } else if (parsed_rel.payload == .unsigned) { switch (parsed_rel.where) { .undef => { From 80e1c244b6c9d5f3e855878d92ecc09dc8eb970a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 3 Sep 2021 17:12:39 +0200 Subject: [PATCH 49/78] macho: dyld info subsections need to follow in strict order MachO, why are doing this to me? --- src/link/MachO.zig | 364 +++++++++++------------------------ src/link/MachO/TextBlock.zig | 4 +- 2 files changed, 116 insertions(+), 252 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index cd5a0218d1..c1fb6977c5 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -162,10 +162,7 @@ stubs_map: std.AutoArrayHashMapUnmanaged(u32, *TextBlock) = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, load_commands_dirty: bool = false, -rebase_info_dirty: bool = false, -binding_info_dirty: bool = false, -lazy_binding_info_dirty: bool = false, -export_info_dirty: bool = false, +dyld_info_dirty: bool = false, strtab_dirty: bool = false, strtab_needs_relocation: bool = false, @@ -814,10 +811,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { defer tracy.end(); try self.setEntryPoint(); - try self.writeRebaseInfoTable(); - try self.writeBindInfoTable(); - try self.writeLazyBindInfoTable(); - try self.writeExportInfo(); + try self.writeDyldInfoData(); try self.writeAllGlobalAndUndefSymbols(); try self.writeIndirectSymbolTable(); try self.writeStringTable(); @@ -849,10 +843,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { } assert(!self.load_commands_dirty); - assert(!self.rebase_info_dirty); - assert(!self.binding_info_dirty); - assert(!self.lazy_binding_info_dirty); - assert(!self.export_info_dirty); + assert(!self.dyld_info_dirty); assert(!self.strtab_dirty); assert(!self.strtab_needs_relocation); @@ -2111,7 +2102,7 @@ pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, lazy_binding_sym .local_sym_index = lazy_binding_sym_index, .offset = 0, }); - self.lazy_binding_info_dirty = true; + self.dyld_info_dirty = true; return atom; } @@ -2312,7 +2303,7 @@ fn resolveSymbolsInObject( .local_sym_index = local_sym_index, .file = object_id, }; - self.export_info_dirty = true; + self.dyld_info_dirty = true; } else if (symbolIsTentative(sym)) { // Symbol is a tentative definition. const resolv = self.symbol_resolver.getPtr(n_strx) orelse { @@ -2647,7 +2638,7 @@ fn resolveDyldStubBinder(self: *MachO) !void { .sect = self.got_section_index.?, }; _ = try self.allocateAtom(atom, match); - self.binding_info_dirty = true; + self.dyld_info_dirty = true; } fn parseTextBlocks(self: *MachO) !void { @@ -2946,7 +2937,7 @@ pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { }; const got_atom = try self.createGotAtom(key); try self.got_entries_map.put(self.base.allocator, key, got_atom); - self.rebase_info_dirty = true; + self.dyld_info_dirty = true; } pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liveness: Liveness) !void { @@ -3266,7 +3257,7 @@ pub fn updateDeclExports( const name_str_index = try self.makeString(exp_name); const i = if (self.globals_free_list.popOrNull()) |i| i else blk: { _ = self.globals.addOneAssumeCapacity(); - self.export_info_dirty = true; + self.dyld_info_dirty = true; break :blk @intCast(u32, self.globals.items.len - 1); }; self.globals.items[i] = .{ @@ -3648,29 +3639,34 @@ pub fn populateMissingMetadata(self: *MachO) !void { }, }); - const dyld = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - // Preallocate rebase, binding, lazy binding info, and export info. - const expected_size = 48; // TODO This is totally random. - const rebase_off = self.findFreeSpaceLinkedit(expected_size, 1, null); - log.debug("found rebase info free space 0x{x} to 0x{x}", .{ rebase_off, rebase_off + expected_size }); - dyld.rebase_off = @intCast(u32, rebase_off); - dyld.rebase_size = expected_size; + const dyld = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; + const subsection_size = 128; // TODO this is totally random + const needed_size = 4 * subsection_size; + const offset = self.findFreeSpaceLinkedit(needed_size, 1, null); - const bind_off = self.findFreeSpaceLinkedit(expected_size, 1, null); - log.debug("found binding info free space 0x{x} to 0x{x}", .{ bind_off, bind_off + expected_size }); - dyld.bind_off = @intCast(u32, bind_off); - dyld.bind_size = expected_size; + const rebase_off = @intCast(u32, offset); + log.debug("found rebase info free space 0x{x} to 0x{x}", .{ rebase_off, rebase_off + subsection_size }); + dyld.rebase_off = rebase_off; + dyld.rebase_size = subsection_size; - const lazy_bind_off = self.findFreeSpaceLinkedit(expected_size, 1, null); - log.debug("found lazy binding info free space 0x{x} to 0x{x}", .{ lazy_bind_off, lazy_bind_off + expected_size }); - dyld.lazy_bind_off = @intCast(u32, lazy_bind_off); - dyld.lazy_bind_size = expected_size; + const bind_off = rebase_off + subsection_size; + log.debug("found binding info free space 0x{x} to 0x{x}", .{ bind_off, bind_off + subsection_size }); + dyld.bind_off = bind_off; + dyld.bind_size = subsection_size; - const export_off = self.findFreeSpaceLinkedit(expected_size, 1, null); - log.debug("found export info free space 0x{x} to 0x{x}", .{ export_off, export_off + expected_size }); - dyld.export_off = @intCast(u32, export_off); - dyld.export_size = expected_size; + const lazy_bind_off = bind_off + subsection_size; + log.debug("found lazy binding info free space 0x{x} to 0x{x}", .{ + lazy_bind_off, + lazy_bind_off + subsection_size, + }); + dyld.lazy_bind_off = lazy_bind_off; + dyld.lazy_bind_size = subsection_size; + + const export_off = lazy_bind_off + subsection_size; + log.debug("found export info free space 0x{x} to 0x{x}", .{ export_off, export_off + subsection_size }); + dyld.export_off = export_off; + dyld.export_size = subsection_size; self.load_commands_dirty = true; } @@ -4097,10 +4093,6 @@ fn allocatedSizeLinkedit(self: *MachO, start: u64) u64 { if (self.dyld_info_cmd_index) |idx| { const dyld_info = self.load_commands.items[idx].DyldInfoOnly; if (dyld_info.rebase_off > start and dyld_info.rebase_off < min_pos) min_pos = dyld_info.rebase_off; - if (dyld_info.bind_off > start and dyld_info.bind_off < min_pos) min_pos = dyld_info.bind_off; - if (dyld_info.weak_bind_off > start and dyld_info.weak_bind_off < min_pos) min_pos = dyld_info.weak_bind_off; - if (dyld_info.lazy_bind_off > start and dyld_info.lazy_bind_off < min_pos) min_pos = dyld_info.lazy_bind_off; - if (dyld_info.export_off > start and dyld_info.export_off < min_pos) min_pos = dyld_info.export_off; } if (self.function_starts_cmd_index) |idx| { @@ -4141,27 +4133,14 @@ fn detectAllocCollisionLinkedit(self: *MachO, start: u64, size: u64) ?u64 { // __LINKEDIT is a weird segment where sections get their own load commands so we // special-case it. - if (self.dyld_info_cmd_index) |idx| outer: { - if (self.load_commands.items.len == idx) break :outer; + if (self.dyld_info_cmd_index) |idx| { const dyld_info = self.load_commands.items[idx].DyldInfoOnly; - if (checkForCollision(start, end, dyld_info.rebase_off, dyld_info.rebase_size)) |pos| { - return pos; - } - // Binding info - if (checkForCollision(start, end, dyld_info.bind_off, dyld_info.bind_size)) |pos| { - return pos; - } - // Weak binding info - if (checkForCollision(start, end, dyld_info.weak_bind_off, dyld_info.weak_bind_size)) |pos| { - return pos; - } - // Lazy binding info - if (checkForCollision(start, end, dyld_info.lazy_bind_off, dyld_info.lazy_bind_size)) |pos| { - return pos; - } - // Export info - if (checkForCollision(start, end, dyld_info.export_off, dyld_info.export_size)) |pos| { - return pos; + const offset = dyld_info.rebase_off; + const actual_size = dyld_info.export_off + dyld_info.export_size - offset; + const increased_size = padToIdeal(actual_size); + const test_end = offset + increased_size; + if (end > offset and start < test_end) { + return test_end; } } @@ -4483,145 +4462,43 @@ fn writeCodeSignature(self: *MachO) !void { try self.base.file.?.pwriteAll(buffer, code_sig_cmd.dataoff); } -fn writeExportInfo(self: *MachO) !void { - if (!self.export_info_dirty) return; - if (self.globals.items.len == 0) return; +fn writeDyldInfoData(self: *MachO) !void { + if (!self.dyld_info_dirty) return; const tracy = trace(@src()); defer tracy.end(); - var trie: Trie = .{}; - defer trie.deinit(self.base.allocator); - - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const base_address = text_segment.inner.vmaddr; - - // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. - log.debug("writing export trie", .{}); - - for (self.globals.items) |sym| { - const sym_name = self.getString(sym.n_strx); - log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); - - try trie.put(self.base.allocator, .{ - .name = sym_name, - .vmaddr_offset = sym.n_value - base_address, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, - }); - } - try trie.finalize(self.base.allocator); - - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, trie.size)); - defer self.base.allocator.free(buffer); - var stream = std.io.fixedBufferStream(buffer); - const nwritten = try trie.write(stream.writer()); - assert(nwritten == trie.size); - - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - const allocated_size = self.allocatedSizeLinkedit(dyld_info.export_off); - const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)); - - if (needed_size > allocated_size) { - dyld_info.export_off = 0; - dyld_info.export_off = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1, null)); - // TODO this might require relocating all following LC_DYLD_INFO_ONLY sections too. - } - dyld_info.export_size = @intCast(u32, needed_size); - log.debug("writing export info from 0x{x} to 0x{x}", .{ dyld_info.export_off, dyld_info.export_off + dyld_info.export_size }); - - try self.base.file.?.pwriteAll(buffer, dyld_info.export_off); - self.load_commands_dirty = true; - self.export_info_dirty = false; -} - -fn writeRebaseInfoTable(self: *MachO) !void { - if (!self.rebase_info_dirty) return; - - const tracy = trace(@src()); - defer tracy.end(); - - var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); - defer pointers.deinit(); + var rebase_pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); + defer rebase_pointers.deinit(); + var bind_pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); + defer bind_pointers.deinit(); + var lazy_bind_pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); + defer lazy_bind_pointers.deinit(); { var it = self.blocks.iterator(); while (it.next()) |entry| { const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; + var atom: *TextBlock = entry.value_ptr.*; if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable const seg = self.load_commands.items[match.seg].Segment; while (true) { - const sym = self.locals.items[block.local_sym_index]; + const sym = self.locals.items[atom.local_sym_index]; const base_offset = sym.n_value - seg.inner.vmaddr; - for (block.rebases.items) |offset| { - try pointers.append(.{ + for (atom.rebases.items) |offset| { + try rebase_pointers.append(.{ .offset = base_offset + offset, .segment_id = match.seg, }); } - if (block.prev) |prev| { - block = prev; - } else break; - } - } - } - - const size = try bind.rebaseInfoSize(pointers.items); - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); - defer self.base.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try bind.writeRebaseInfo(pointers.items, stream.writer()); - - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - const allocated_size = self.allocatedSizeLinkedit(dyld_info.rebase_off); - const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)); - - if (needed_size > allocated_size) { - dyld_info.rebase_off = 0; - dyld_info.rebase_off = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1, null)); - // TODO this might require relocating all following LC_DYLD_INFO_ONLY sections too. - } - - dyld_info.rebase_size = @intCast(u32, needed_size); - log.debug("writing rebase info from 0x{x} to 0x{x}", .{ dyld_info.rebase_off, dyld_info.rebase_off + dyld_info.rebase_size }); - - try self.base.file.?.pwriteAll(buffer, dyld_info.rebase_off); - self.load_commands_dirty = true; - self.rebase_info_dirty = false; -} - -fn writeBindInfoTable(self: *MachO) !void { - if (!self.binding_info_dirty) return; - - const tracy = trace(@src()); - defer tracy.end(); - - var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); - defer pointers.deinit(); - - { - var it = self.blocks.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var block: *TextBlock = entry.value_ptr.*; - - if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable - - const seg = self.load_commands.items[match.seg].Segment; - - while (true) { - const sym = self.locals.items[block.local_sym_index]; - const base_offset = sym.n_value - seg.inner.vmaddr; - - for (block.bindings.items) |binding| { + for (atom.bindings.items) |binding| { const bind_sym = self.undefs.items[binding.local_sym_index]; - try pointers.append(.{ + try bind_pointers.append(.{ .offset = binding.offset + base_offset, .segment_id = match.seg, .dylib_ordinal = @divExact(bind_sym.n_desc, macho.N_SYMBOL_RESOLVER), @@ -4629,97 +4506,86 @@ fn writeBindInfoTable(self: *MachO) !void { }); } - if (block.prev) |prev| { - block = prev; + for (atom.lazy_bindings.items) |binding| { + const bind_sym = self.undefs.items[binding.local_sym_index]; + try lazy_bind_pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = match.seg, + .dylib_ordinal = @divExact(bind_sym.n_desc, macho.N_SYMBOL_RESOLVER), + .name = self.getString(bind_sym.n_strx), + }); + } + + if (atom.prev) |prev| { + atom = prev; } else break; } } } - const size = try bind.bindInfoSize(pointers.items); - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); - defer self.base.allocator.free(buffer); + var trie: Trie = .{}; + defer trie.deinit(self.base.allocator); - var stream = std.io.fixedBufferStream(buffer); - try bind.writeBindInfo(pointers.items, stream.writer()); + { + // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. + log.debug("writing export trie", .{}); + const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const base_address = text_segment.inner.vmaddr; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - const allocated_size = self.allocatedSizeLinkedit(dyld_info.bind_off); - const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)); + for (self.globals.items) |sym| { + const sym_name = self.getString(sym.n_strx); + log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); - if (needed_size > allocated_size) { - dyld_info.bind_off = 0; - dyld_info.bind_off = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1, null)); - // TODO this might require relocating all following LC_DYLD_INFO_ONLY sections too. - } - - dyld_info.bind_size = @intCast(u32, needed_size); - log.debug("writing binding info from 0x{x} to 0x{x}", .{ dyld_info.bind_off, dyld_info.bind_off + dyld_info.bind_size }); - - try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off); - self.load_commands_dirty = true; - self.binding_info_dirty = false; -} - -fn writeLazyBindInfoTable(self: *MachO) !void { - if (!self.lazy_binding_info_dirty) return; - - const tracy = trace(@src()); - defer tracy.end(); - - var pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); - defer pointers.deinit(); - - if (self.la_symbol_ptr_section_index) |sect| blk: { - var atom = self.blocks.get(.{ - .seg = self.data_segment_cmd_index.?, - .sect = sect, - }) orelse break :blk; - const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; - - while (true) { - const sym = self.locals.items[atom.local_sym_index]; - const base_offset = sym.n_value - seg.inner.vmaddr; - - for (atom.lazy_bindings.items) |binding| { - const bind_sym = self.undefs.items[binding.local_sym_index]; - try pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = self.data_segment_cmd_index.?, - .dylib_ordinal = @divExact(bind_sym.n_desc, macho.N_SYMBOL_RESOLVER), - .name = self.getString(bind_sym.n_strx), - }); - } - if (atom.prev) |prev| { - atom = prev; - } else break; + try trie.put(self.base.allocator, .{ + .name = sym_name, + .vmaddr_offset = sym.n_value - base_address, + .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, + }); } + + try trie.finalize(self.base.allocator); } - const size = try bind.lazyBindInfoSize(pointers.items); - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); - defer self.base.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try bind.writeLazyBindInfo(pointers.items, stream.writer()); - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - const allocated_size = self.allocatedSizeLinkedit(dyld_info.lazy_bind_off); - const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)); + const allocated_size = self.allocatedSizeLinkedit(dyld_info.rebase_off); + const rebase_size = @intCast(u32, try bind.rebaseInfoSize(rebase_pointers.items)); + const bind_size = @intCast(u32, try bind.bindInfoSize(bind_pointers.items)); + const lazy_bind_size = @intCast(u32, try bind.lazyBindInfoSize(lazy_bind_pointers.items)); + const export_size = @intCast(u32, trie.size); + const total_size = rebase_size + bind_size + lazy_bind_size + export_size; + const needed_size = mem.alignForwardGeneric(u64, total_size, @alignOf(u64)); if (needed_size > allocated_size) { - dyld_info.lazy_bind_off = 0; - dyld_info.lazy_bind_off = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1, null)); - // TODO this might require relocating all following LC_DYLD_INFO_ONLY sections too. + dyld_info.rebase_off = 0; + dyld_info.rebase_off = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1, null)); } - dyld_info.lazy_bind_size = @intCast(u32, needed_size); - log.debug("writing lazy binding info from 0x{x} to 0x{x}", .{ dyld_info.lazy_bind_off, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size }); + dyld_info.rebase_size = rebase_size; + dyld_info.bind_off = dyld_info.rebase_off + dyld_info.rebase_size; + dyld_info.bind_size = bind_size; + dyld_info.lazy_bind_off = dyld_info.bind_off + dyld_info.bind_size; + dyld_info.lazy_bind_size = lazy_bind_size; + dyld_info.export_off = dyld_info.lazy_bind_off + dyld_info.lazy_bind_size; + dyld_info.export_size = export_size; - try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off); - try self.populateLazyBindOffsetsInStubHelper(buffer); + var buffer = try self.base.allocator.alloc(u8, needed_size); + defer self.base.allocator.free(buffer); + mem.set(u8, buffer, 0); + + var stream = std.io.fixedBufferStream(buffer); + const writer = stream.writer(); + + try bind.writeRebaseInfo(rebase_pointers.items, writer); + try bind.writeBindInfo(bind_pointers.items, writer); + try bind.writeLazyBindInfo(lazy_bind_pointers.items, writer); + _ = try trie.write(writer); + + log.debug("writing dyld info from 0x{x} to 0x{x}", .{ dyld_info.rebase_off, dyld_info.rebase_off + needed_size }); + + try self.base.file.?.pwriteAll(buffer, dyld_info.rebase_off); + try self.populateLazyBindOffsetsInStubHelper(buffer[rebase_size + bind_size ..][0..lazy_bind_size]); self.load_commands_dirty = true; - self.lazy_binding_info_dirty = false; + self.dyld_info_dirty = false; } fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index af9bf5b3eb..e6ceed9c55 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -844,9 +844,7 @@ pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: R .sect = context.macho_file.got_section_index.?, }; _ = try context.macho_file.allocateAtom(atom, match); - // TODO don't need both at once - context.macho_file.rebase_info_dirty = true; - context.macho_file.binding_info_dirty = true; + context.macho_file.dyld_info_dirty = true; } else if (parsed_rel.payload == .unsigned) { switch (parsed_rel.where) { .undef => { From 7e87f93e068af192a3c39bdeebfe8e9b612311b0 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 4 Sep 2021 22:34:16 +0200 Subject: [PATCH 50/78] macho: unfortunately, LINKEDIT commands NEED to be in order Otherwise, Apple's tooling goes mental and reports that the executable is malformed/fails strict validation. We absolutely have to get it right to support tools such `codesign` which are required to successfully launch an app on an iOS device for instance. When Zig matures enough so that we can ditch any Apple tooling and still be able to successfully codesign for iOS and other, we can revisit this area. Until then however, we are stuck in having to rewrite the LINKEDIT segment at every update run of the self-hosted. FYI, the strict layout for the MachO binary apparently is (please, read this with a pinch of salt as this is inferred by me): * __TEXT segment * __DATA_CONST segment * __DATA segment * __LINKEDIT segment * dyld info (rebase, bind, weak bind, lazy bind, export) * symbol table * dynamic symbol table * string table * code signature (if expected) --- src/link/MachO.zig | 914 +++++++++++++---------------------- src/link/MachO/TextBlock.zig | 1 - 2 files changed, 344 insertions(+), 571 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index c1fb6977c5..4ee81e7c43 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -162,10 +162,6 @@ stubs_map: std.AutoArrayHashMapUnmanaged(u32, *TextBlock) = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, load_commands_dirty: bool = false, -dyld_info_dirty: bool = false, - -strtab_dirty: bool = false, -strtab_needs_relocation: bool = false, has_dices: bool = false, has_stabs: bool = false, @@ -368,13 +364,12 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio .n_desc = 0, .n_value = 0, }); + try self.strtab.append(allocator, 0); try self.populateMissingMetadata(); - try self.writeLocalSymbol(0); if (self.d_sym) |*ds| { try ds.populateMissingMetadata(allocator); - try ds.writeLocalSymbol(0); } return self; @@ -553,6 +548,7 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { .n_desc = 0, .n_value = 0, }); + try self.strtab.append(self.base.allocator, 0); } // Positional arguments to the linker such as object files and static archives. @@ -784,7 +780,6 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { } } try self.writeAtoms(); - try self.writeDices(); try self.flushModule(comp); } @@ -792,11 +787,11 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { // Update the file with the digest. If it fails we can continue; it only // means that the next invocation will have an unnecessary cache miss. Cache.writeSmallFile(directory.handle, id_symlink_basename, &digest) catch |err| { - log.warn("failed to save linking hash digest file: {s}", .{@errorName(err)}); + log.debug("failed to save linking hash digest file: {s}", .{@errorName(err)}); }; // Again failure here only means an unnecessary cache miss. man.writeManifest() catch |err| { - log.warn("failed to write cache manifest when linking: {s}", .{@errorName(err)}); + log.debug("failed to write cache manifest when linking: {s}", .{@errorName(err)}); }; // We hang on to this lock so that the output file path can be used without // other processes clobbering it. @@ -811,11 +806,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { defer tracy.end(); try self.setEntryPoint(); - try self.writeDyldInfoData(); - try self.writeAllGlobalAndUndefSymbols(); - try self.writeIndirectSymbolTable(); - try self.writeStringTable(); - try self.updateLinkeditSegmentSizes(); + try self.writeLinkeditSegment(); if (self.d_sym) |*ds| { // Flush debug symbols bundle. @@ -843,9 +834,6 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { } assert(!self.load_commands_dirty); - assert(!self.dyld_info_dirty); - assert(!self.strtab_dirty); - assert(!self.strtab_needs_relocation); if (self.requires_adhoc_codesig) { try self.writeCodeSignature(); // code signing always comes last @@ -1769,7 +1757,6 @@ pub fn writeAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !void { try atom.resolveRelocs(self); log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ self.getString(sym.n_strx), file_offset }); try self.base.file.?.pwriteAll(atom.code.items, file_offset); - try self.writeLocalSymbol(atom.local_sym_index); } fn allocateLocalSymbols(self: *MachO, match: MatchingSection, old_base_addr: u64) !void { @@ -1830,7 +1817,7 @@ fn writeAtoms(self: *MachO) !void { pub fn createGotAtom(self: *MachO, key: GotIndirectionKey) !*TextBlock { const local_sym_index = @intCast(u32, self.locals.items.len); try self.locals.append(self.base.allocator, .{ - .n_strx = try self.makeString("got_entry"), + .n_strx = try self.makeString("l_zld_got_entry"), .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, @@ -1866,7 +1853,7 @@ pub fn createGotAtom(self: *MachO, key: GotIndirectionKey) !*TextBlock { fn createDyldPrivateAtom(self: *MachO) !*TextBlock { const local_sym_index = @intCast(u32, self.locals.items.len); try self.locals.append(self.base.allocator, .{ - .n_strx = try self.makeString("dyld_private"), + .n_strx = try self.makeString("l_zld_dyld_private"), .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, @@ -1890,7 +1877,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !*TextBlock { }; const local_sym_index = @intCast(u32, self.locals.items.len); try self.locals.append(self.base.allocator, .{ - .n_strx = try self.makeString("stub_preamble"), + .n_strx = try self.makeString("l_zld_stub_preamble"), .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, @@ -2023,7 +2010,7 @@ pub fn createStubHelperAtom(self: *MachO) !*TextBlock { }; const local_sym_index = @intCast(u32, self.locals.items.len); try self.locals.append(self.base.allocator, .{ - .n_strx = try self.makeString("stub_in_stub_helper"), + .n_strx = try self.makeString("l_zld_stub_in_stub_helper"), .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, @@ -2078,7 +2065,7 @@ pub fn createStubHelperAtom(self: *MachO) !*TextBlock { pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, lazy_binding_sym_index: u32) !*TextBlock { const local_sym_index = @intCast(u32, self.locals.items.len); try self.locals.append(self.base.allocator, .{ - .n_strx = try self.makeString("lazy_ptr"), + .n_strx = try self.makeString("l_zld_lazy_ptr"), .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, @@ -2102,7 +2089,6 @@ pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, lazy_binding_sym .local_sym_index = lazy_binding_sym_index, .offset = 0, }); - self.dyld_info_dirty = true; return atom; } @@ -2120,7 +2106,7 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*TextBlock { }; const local_sym_index = @intCast(u32, self.locals.items.len); try self.locals.append(self.base.allocator, .{ - .n_strx = try self.makeString("stub"), + .n_strx = try self.makeString("l_zld_stub"), .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, @@ -2303,7 +2289,6 @@ fn resolveSymbolsInObject( .local_sym_index = local_sym_index, .file = object_id, }; - self.dyld_info_dirty = true; } else if (symbolIsTentative(sym)) { // Symbol is a tentative definition. const resolv = self.symbol_resolver.getPtr(n_strx) orelse { @@ -2638,7 +2623,6 @@ fn resolveDyldStubBinder(self: *MachO) !void { .sect = self.got_section_index.?, }; _ = try self.allocateAtom(atom, match); - self.dyld_info_dirty = true; } fn parseTextBlocks(self: *MachO) !void { @@ -2658,27 +2642,21 @@ fn addDataInCodeLC(self: *MachO) !void { .datasize = 0, }, }); - const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].LinkeditData; - const needed_size = 10 * @sizeOf(macho.data_in_code_entry); - const dataoff = self.findFreeSpaceLinkedit(needed_size, @alignOf(macho.data_in_code_entry), null); - log.debug("found data-in-code free space 0x{x} to 0x{x}", .{ dataoff, dataoff + needed_size }); - dice_cmd.dataoff = @intCast(u32, dataoff); - dice_cmd.datasize = needed_size; self.load_commands_dirty = true; } fn addCodeSignatureLC(self: *MachO) !void { - if (self.code_signature_cmd_index == null and self.requires_adhoc_codesig) { - self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .LinkeditData = .{ - .cmd = macho.LC_CODE_SIGNATURE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - } + if (self.code_signature_cmd_index != null or !self.requires_adhoc_codesig) return; + self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .LinkeditData = .{ + .cmd = macho.LC_CODE_SIGNATURE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = 0, + .datasize = 0, + }, + }); + self.load_commands_dirty = true; } fn addRpathLCs(self: *MachO, rpaths: []const []const u8) !void { @@ -2697,6 +2675,7 @@ fn addRpathLCs(self: *MachO, rpaths: []const []const u8) !void { mem.set(u8, rpath_cmd.data, 0); mem.copy(u8, rpath_cmd.data, rpath); try self.load_commands.append(self.base.allocator, .{ .Rpath = rpath_cmd }); + self.load_commands_dirty = true; } } @@ -2713,6 +2692,7 @@ fn addLoadDylibLCs(self: *MachO) !void { ); errdefer dylib_cmd.deinit(self.base.allocator); try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); + self.load_commands_dirty = true; } } @@ -2937,7 +2917,6 @@ pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { }; const got_atom = try self.createGotAtom(key); try self.got_entries_map.put(self.base.allocator, key, got_atom); - self.dyld_info_dirty = true; } pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liveness: Liveness) !void { @@ -3138,10 +3117,6 @@ fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 symbol.n_type = macho.N_SECT; symbol.n_sect = @intCast(u8, self.text_section_index.?) + 1; symbol.n_desc = 0; - - try self.writeLocalSymbol(decl.link.macho.local_sym_index); - if (self.d_sym) |*ds| - try ds.writeLocalSymbol(decl.link.macho.local_sym_index); } else { const decl_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{mem.spanZ(decl.name)}); defer self.base.allocator.free(decl_name); @@ -3168,10 +3143,6 @@ fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 .seg = self.data_const_segment_cmd_index.?, .sect = self.got_section_index.?, }); - - try self.writeLocalSymbol(decl.link.macho.local_sym_index); - if (self.d_sym) |*ds| - try ds.writeLocalSymbol(decl.link.macho.local_sym_index); } return symbol; @@ -3257,7 +3228,6 @@ pub fn updateDeclExports( const name_str_index = try self.makeString(exp_name); const i = if (self.globals_free_list.popOrNull()) |i| i else blk: { _ = self.globals.addOneAssumeCapacity(); - self.dyld_info_dirty = true; break :blk @intCast(u32, self.globals.items.len - 1); }; self.globals.items[i] = .{ @@ -3347,7 +3317,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { const program_code_size_hint = self.base.options.program_code_size_hint; // const program_code_size_hint = 10; const got_size_hint = @sizeOf(u64) * self.base.options.symbol_count_hint; - const ideal_size = self.header_pad + program_code_size_hint + got_size_hint; + const ideal_size = self.header_pad + (program_code_size_hint + got_size_hint) * 5; const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); log.debug("found __TEXT segment free space 0x{x} to 0x{x}", .{ 0, needed_size }); @@ -3441,7 +3411,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { if (self.data_const_segment_cmd_index == null) { self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); const address_and_offset = self.nextSegmentAddressAndOffset(); - const ideal_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const ideal_size = @sizeOf(u64) * self.base.options.symbol_count_hint * 1000; const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); log.debug("found __DATA_CONST segment free space 0x{x} to 0x{x}", .{ @@ -3482,7 +3452,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { if (self.data_segment_cmd_index == null) { self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); const address_and_offset = self.nextSegmentAddressAndOffset(); - const ideal_size = 2 * @sizeOf(u64) * self.base.options.symbol_count_hint; + const ideal_size = 2 * @sizeOf(u64) * self.base.options.symbol_count_hint * 1000; const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); log.debug("found __DATA segment free space 0x{x} to 0x{x}", .{ address_and_offset.offset, address_and_offset.offset + needed_size }); @@ -3621,7 +3591,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { if (self.dyld_info_cmd_index == null) { self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ .DyldInfoOnly = .{ .cmd = macho.LC_DYLD_INFO_ONLY, @@ -3638,42 +3607,11 @@ pub fn populateMissingMetadata(self: *MachO) !void { .export_size = 0, }, }); - - // Preallocate rebase, binding, lazy binding info, and export info. - const dyld = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - const subsection_size = 128; // TODO this is totally random - const needed_size = 4 * subsection_size; - const offset = self.findFreeSpaceLinkedit(needed_size, 1, null); - - const rebase_off = @intCast(u32, offset); - log.debug("found rebase info free space 0x{x} to 0x{x}", .{ rebase_off, rebase_off + subsection_size }); - dyld.rebase_off = rebase_off; - dyld.rebase_size = subsection_size; - - const bind_off = rebase_off + subsection_size; - log.debug("found binding info free space 0x{x} to 0x{x}", .{ bind_off, bind_off + subsection_size }); - dyld.bind_off = bind_off; - dyld.bind_size = subsection_size; - - const lazy_bind_off = bind_off + subsection_size; - log.debug("found lazy binding info free space 0x{x} to 0x{x}", .{ - lazy_bind_off, - lazy_bind_off + subsection_size, - }); - dyld.lazy_bind_off = lazy_bind_off; - dyld.lazy_bind_size = subsection_size; - - const export_off = lazy_bind_off + subsection_size; - log.debug("found export info free space 0x{x} to 0x{x}", .{ export_off, export_off + subsection_size }); - dyld.export_off = export_off; - dyld.export_size = subsection_size; - self.load_commands_dirty = true; } if (self.symtab_cmd_index == null) { self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ .Symtab = .{ .cmd = macho.LC_SYMTAB, @@ -3684,35 +3622,11 @@ pub fn populateMissingMetadata(self: *MachO) !void { .strsize = 0, }, }); - - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - - const symtab_size = self.base.options.symbol_count_hint * @sizeOf(macho.nlist_64); - const symtab_off = self.findFreeSpaceLinkedit(symtab_size, @sizeOf(macho.nlist_64), null); - log.debug("found symbol table free space 0x{x} to 0x{x}", .{ symtab_off, symtab_off + symtab_size }); - symtab.symoff = @intCast(u32, symtab_off); - symtab.nsyms = @intCast(u32, self.base.options.symbol_count_hint); - - try self.strtab.append(self.base.allocator, 0); - const strtab_size = self.strtab.items.len; - const strtab_off = self.findFreeSpaceLinkedit(strtab_size, 1, symtab_off); - log.debug("found string table free space 0x{x} to 0x{x}", .{ strtab_off, strtab_off + strtab_size }); - symtab.stroff = @intCast(u32, strtab_off); - symtab.strsize = @intCast(u32, strtab_size); - self.load_commands_dirty = true; - self.strtab_dirty = true; } if (self.dysymtab_cmd_index == null) { self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); - - // Preallocate space for indirect symbol table. - const indsymtab_size = self.base.options.symbol_count_hint * @sizeOf(u64); // Each entry is just a u64. - const indsymtab_off = self.findFreeSpaceLinkedit(indsymtab_size, @sizeOf(u64), null); - - log.debug("found indirect symbol table free space 0x{x} to 0x{x}", .{ indsymtab_off, indsymtab_off + indsymtab_size }); - try self.load_commands.append(self.base.allocator, .{ .Dysymtab = .{ .cmd = macho.LC_DYSYMTAB, @@ -3729,8 +3643,8 @@ pub fn populateMissingMetadata(self: *MachO) !void { .nmodtab = 0, .extrefsymoff = 0, .nextrefsyms = 0, - .indirectsymoff = @intCast(u32, indsymtab_off), - .nindirectsyms = @intCast(u32, self.base.options.symbol_count_hint), + .indirectsymoff = 0, + .nindirectsyms = 0, .extreloff = 0, .nextrel = 0, .locreloff = 0, @@ -4084,387 +3998,7 @@ fn nextSegmentAddressAndOffset(self: *MachO) NextSegmentAddressAndOffset { }; } -fn allocatedSizeLinkedit(self: *MachO, start: u64) u64 { - assert(start > 0); - var min_pos: u64 = std.math.maxInt(u64); - - // __LINKEDIT is a weird segment where sections get their own load commands so we - // special-case it. - if (self.dyld_info_cmd_index) |idx| { - const dyld_info = self.load_commands.items[idx].DyldInfoOnly; - if (dyld_info.rebase_off > start and dyld_info.rebase_off < min_pos) min_pos = dyld_info.rebase_off; - } - - if (self.function_starts_cmd_index) |idx| { - const fstart = self.load_commands.items[idx].LinkeditData; - if (fstart.dataoff > start and fstart.dataoff < min_pos) min_pos = fstart.dataoff; - } - - if (self.data_in_code_cmd_index) |idx| { - const dic = self.load_commands.items[idx].LinkeditData; - if (dic.dataoff > start and dic.dataoff < min_pos) min_pos = dic.dataoff; - } - - if (self.dysymtab_cmd_index) |idx| { - const dysymtab = self.load_commands.items[idx].Dysymtab; - if (dysymtab.indirectsymoff > start and dysymtab.indirectsymoff < min_pos) min_pos = dysymtab.indirectsymoff; - // TODO Handle more dynamic symbol table sections. - } - - if (self.symtab_cmd_index) |idx| { - const symtab = self.load_commands.items[idx].Symtab; - if (symtab.symoff > start and symtab.symoff < min_pos) min_pos = symtab.symoff; - if (symtab.stroff > start and symtab.stroff < min_pos) min_pos = symtab.stroff; - } - - return min_pos - start; -} -inline fn checkForCollision(start: u64, end: u64, off: u64, size: u64) ?u64 { - const increased_size = padToIdeal(size); - const test_end = off + increased_size; - if (end > off and start < test_end) { - return test_end; - } - return null; -} - -fn detectAllocCollisionLinkedit(self: *MachO, start: u64, size: u64) ?u64 { - const end = start + padToIdeal(size); - - // __LINKEDIT is a weird segment where sections get their own load commands so we - // special-case it. - if (self.dyld_info_cmd_index) |idx| { - const dyld_info = self.load_commands.items[idx].DyldInfoOnly; - const offset = dyld_info.rebase_off; - const actual_size = dyld_info.export_off + dyld_info.export_size - offset; - const increased_size = padToIdeal(actual_size); - const test_end = offset + increased_size; - if (end > offset and start < test_end) { - return test_end; - } - } - - if (self.function_starts_cmd_index) |idx| outer: { - if (self.load_commands.items.len == idx) break :outer; - const fstart = self.load_commands.items[idx].LinkeditData; - if (checkForCollision(start, end, fstart.dataoff, fstart.datasize)) |pos| { - return pos; - } - } - - if (self.data_in_code_cmd_index) |idx| outer: { - if (self.load_commands.items.len == idx) break :outer; - const dic = self.load_commands.items[idx].LinkeditData; - if (checkForCollision(start, end, dic.dataoff, dic.datasize)) |pos| { - return pos; - } - } - - if (self.dysymtab_cmd_index) |idx| outer: { - if (self.load_commands.items.len == idx) break :outer; - const dysymtab = self.load_commands.items[idx].Dysymtab; - // Indirect symbol table - const nindirectsize = dysymtab.nindirectsyms * @sizeOf(u32); - if (checkForCollision(start, end, dysymtab.indirectsymoff, nindirectsize)) |pos| { - return pos; - } - // TODO Handle more dynamic symbol table sections. - } - - if (self.symtab_cmd_index) |idx| outer: { - if (self.load_commands.items.len == idx) break :outer; - const symtab = self.load_commands.items[idx].Symtab; - // Symbol table - const symsize = symtab.nsyms * @sizeOf(macho.nlist_64); - if (checkForCollision(start, end, symtab.symoff, symsize)) |pos| { - return pos; - } - // String table - if (checkForCollision(start, end, symtab.stroff, symtab.strsize)) |pos| { - return pos; - } - } - - return null; -} - -fn findFreeSpaceLinkedit(self: *MachO, object_size: u64, min_alignment: u16, start: ?u64) u64 { - const linkedit = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - var st: u64 = start orelse linkedit.inner.fileoff; - while (self.detectAllocCollisionLinkedit(st, object_size)) |item_end| { - st = mem.alignForwardGeneric(u64, item_end, min_alignment); - } - return st; -} - -fn relocateSymbolTable(self: *MachO) !void { - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - const nlocals = self.locals.items.len; - const nglobals = self.globals.items.len; - const nundefs = self.undefs.items.len; - const nsyms = nlocals + nglobals + nundefs; - - if (symtab.nsyms < nsyms) { - const needed_size = nsyms * @sizeOf(macho.nlist_64); - if (needed_size > self.allocatedSizeLinkedit(symtab.symoff)) { - // Move the entire symbol table to a new location - const new_symoff = self.findFreeSpaceLinkedit(needed_size, @alignOf(macho.nlist_64), null); - const existing_size = symtab.nsyms * @sizeOf(macho.nlist_64); - - log.debug("relocating symbol table from 0x{x}-0x{x} to 0x{x}-0x{x}", .{ - symtab.symoff, - symtab.symoff + existing_size, - new_symoff, - new_symoff + existing_size, - }); - - // TODO copyRangeAll doesn't seem to extend the file beyond its allocated size - try self.base.file.?.pwriteAll(&[_]u8{0}, new_symoff + existing_size - 1); - const amt = try self.base.file.?.copyRangeAll( - symtab.symoff, - self.base.file.?, - new_symoff, - existing_size, - ); - if (amt != existing_size) return error.InputOutput; - symtab.symoff = @intCast(u32, new_symoff); - self.strtab_needs_relocation = true; - } - symtab.nsyms = @intCast(u32, nsyms); - self.load_commands_dirty = true; - } -} - -fn writeLocalSymbol(self: *MachO, index: usize) !void { - const tracy = trace(@src()); - defer tracy.end(); - try self.relocateSymbolTable(); - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - const off = symtab.symoff + @sizeOf(macho.nlist_64) * index; - const sym = self.locals.items[index]; - log.debug("writing local symbol {s}: {} at 0x{x}", .{ self.getString(sym.n_strx), sym, off }); - try self.base.file.?.pwriteAll(mem.asBytes(&sym), off); -} - -fn writeAllGlobalAndUndefSymbols(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - - try self.relocateSymbolTable(); - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - const nlocals = self.locals.items.len; - const nglobals = self.globals.items.len; - const nundefs = self.undefs.items.len; - - const locals_off = symtab.symoff; - const locals_size = nlocals * @sizeOf(macho.nlist_64); - - const globals_off = locals_off + locals_size; - const globals_size = nglobals * @sizeOf(macho.nlist_64); - log.debug("writing global symbols from 0x{x} to 0x{x}", .{ globals_off, globals_size + globals_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.globals.items), globals_off); - - const undefs_off = globals_off + globals_size; - const undefs_size = nundefs * @sizeOf(macho.nlist_64); - log.debug("writing undef symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.undefs.items), undefs_off); - - // Update dynamic symbol table. - const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - dysymtab.nlocalsym = @intCast(u32, nlocals); - dysymtab.iextdefsym = @intCast(u32, nlocals); - dysymtab.nextdefsym = @intCast(u32, nglobals); - dysymtab.iundefsym = @intCast(u32, nlocals + nglobals); - dysymtab.nundefsym = @intCast(u32, nundefs); - self.load_commands_dirty = true; -} - -fn writeIndirectSymbolTable(self: *MachO) !void { - // TODO figure out a way not to rewrite the table every time if - // no new undefs are not added. - const tracy = trace(@src()); - defer tracy.end(); - - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stubs = &text_segment.sections.items[self.stubs_section_index.?]; - const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; - const got = &data_const_seg.sections.items[self.got_section_index.?]; - const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; - const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - - const nstubs = @intCast(u32, self.stubs_map.keys().len); - const ngot_entries = @intCast(u32, self.got_entries_map.keys().len); - const allocated_size = self.allocatedSizeLinkedit(dysymtab.indirectsymoff); - const nindirectsyms = nstubs * 2 + ngot_entries; - const needed_size = @intCast(u32, nindirectsyms * @sizeOf(u32)); - - if (needed_size > allocated_size) { - dysymtab.nindirectsyms = 0; - dysymtab.indirectsymoff = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, @sizeOf(u32), null)); - } - dysymtab.nindirectsyms = nindirectsyms; - log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ - dysymtab.indirectsymoff, - dysymtab.indirectsymoff + needed_size, - }); - - var buf = try self.base.allocator.alloc(u8, needed_size); - defer self.base.allocator.free(buf); - var stream = std.io.fixedBufferStream(buf); - var writer = stream.writer(); - - stubs.reserved1 = 0; - for (self.stubs_map.keys()) |key| { - try writer.writeIntLittle(u32, dysymtab.iundefsym + key); - } - - got.reserved1 = nstubs; - for (self.got_entries_map.keys()) |key| { - switch (key.where) { - .undef => { - try writer.writeIntLittle(u32, dysymtab.iundefsym + key.where_index); - }, - .local => { - try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); - }, - } - } - - la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; - for (self.stubs_map.keys()) |key| { - try writer.writeIntLittle(u32, dysymtab.iundefsym + key); - } - - try self.base.file.?.pwriteAll(buf, dysymtab.indirectsymoff); - self.load_commands_dirty = true; -} - -fn writeDices(self: *MachO) !void { - if (!self.has_dices) return; - - const tracy = trace(@src()); - defer tracy.end(); - - var buf = std.ArrayList(u8).init(self.base.allocator); - defer buf.deinit(); - - var block: *TextBlock = self.blocks.get(.{ - .seg = self.text_segment_cmd_index orelse return, - .sect = self.text_section_index orelse return, - }) orelse return; - - while (block.prev) |prev| { - block = prev; - } - - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const text_sect = text_seg.sections.items[self.text_section_index.?]; - - while (true) { - if (block.dices.items.len > 0) { - const sym = self.locals.items[block.local_sym_index]; - const base_off = try math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset); - - try buf.ensureUnusedCapacity(block.dices.items.len * @sizeOf(macho.data_in_code_entry)); - for (block.dices.items) |dice| { - const rebased_dice = macho.data_in_code_entry{ - .offset = base_off + dice.offset, - .length = dice.length, - .kind = dice.kind, - }; - buf.appendSliceAssumeCapacity(mem.asBytes(&rebased_dice)); - } - } - - if (block.next) |next| { - block = next; - } else break; - } - - const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].LinkeditData; - const allocated_size = self.allocatedSizeLinkedit(dice_cmd.dataoff); - const needed_size = @intCast(u32, buf.items.len); - - if (needed_size > allocated_size) { - dice_cmd.datasize = 0; - dice_cmd.dataoff = @intCast(u32, self.findFreeSpaceLinkedit( - needed_size, - @alignOf(macho.data_in_code_entry), - dice_cmd.dataoff, - )); - } - dice_cmd.datasize = needed_size; - log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ - dice_cmd.dataoff, - dice_cmd.dataoff + dice_cmd.datasize, - }); - - try self.base.file.?.pwriteAll(buf.items, dice_cmd.dataoff); - self.load_commands_dirty = true; -} - -fn writeCodeSignaturePadding(self: *MachO) !void { - // TODO figure out how not to rewrite padding every single time. - const tracy = trace(@src()); - defer tracy.end(); - - const linkedit_segment = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const code_sig_cmd = &self.load_commands.items[self.code_signature_cmd_index.?].LinkeditData; - const fileoff = linkedit_segment.inner.fileoff + linkedit_segment.inner.filesize; - const needed_size = CodeSignature.calcCodeSignaturePaddingSize( - self.base.options.emit.?.sub_path, - fileoff, - self.page_size, - ); - code_sig_cmd.dataoff = @intCast(u32, fileoff); - code_sig_cmd.datasize = needed_size; - - // Advance size of __LINKEDIT segment - linkedit_segment.inner.filesize += needed_size; - if (linkedit_segment.inner.vmsize < linkedit_segment.inner.filesize) { - linkedit_segment.inner.vmsize = mem.alignForwardGeneric(u64, linkedit_segment.inner.filesize, self.page_size); - } - log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ fileoff, fileoff + needed_size }); - // Pad out the space. We need to do this to calculate valid hashes for everything in the file - // except for code signature data. - try self.base.file.?.pwriteAll(&[_]u8{0}, fileoff + needed_size - 1); - self.load_commands_dirty = true; -} - -fn writeCodeSignature(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const code_sig_cmd = self.load_commands.items[self.code_signature_cmd_index.?].LinkeditData; - - var code_sig: CodeSignature = .{}; - defer code_sig.deinit(self.base.allocator); - - try code_sig.calcAdhocSignature( - self.base.allocator, - self.base.file.?, - self.base.options.emit.?.sub_path, - text_segment.inner, - code_sig_cmd, - self.base.options.output_mode, - self.page_size, - ); - - var buffer = try self.base.allocator.alloc(u8, code_sig.size()); - defer self.base.allocator.free(buffer); - var stream = std.io.fixedBufferStream(buffer); - try code_sig.write(stream.writer()); - - log.debug("writing code signature from 0x{x} to 0x{x}", .{ code_sig_cmd.dataoff, code_sig_cmd.dataoff + buffer.len }); - - try self.base.file.?.pwriteAll(buffer, code_sig_cmd.dataoff); -} - fn writeDyldInfoData(self: *MachO) !void { - if (!self.dyld_info_dirty) return; - const tracy = trace(@src()); defer tracy.end(); @@ -4528,7 +4062,7 @@ fn writeDyldInfoData(self: *MachO) !void { { // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. - log.debug("writing export trie", .{}); + log.debug("generating export trie", .{}); const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; const base_address = text_segment.inner.vmaddr; @@ -4546,28 +4080,30 @@ fn writeDyldInfoData(self: *MachO) !void { try trie.finalize(self.base.allocator); } + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - const allocated_size = self.allocatedSizeLinkedit(dyld_info.rebase_off); - const rebase_size = @intCast(u32, try bind.rebaseInfoSize(rebase_pointers.items)); - const bind_size = @intCast(u32, try bind.bindInfoSize(bind_pointers.items)); - const lazy_bind_size = @intCast(u32, try bind.lazyBindInfoSize(lazy_bind_pointers.items)); - const export_size = @intCast(u32, trie.size); - const total_size = rebase_size + bind_size + lazy_bind_size + export_size; - const needed_size = mem.alignForwardGeneric(u64, total_size, @alignOf(u64)); + const rebase_size = try bind.rebaseInfoSize(rebase_pointers.items); + const bind_size = try bind.bindInfoSize(bind_pointers.items); + const lazy_bind_size = try bind.lazyBindInfoSize(lazy_bind_pointers.items); + const export_size = trie.size; - if (needed_size > allocated_size) { - dyld_info.rebase_off = 0; - dyld_info.rebase_off = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1, null)); - } + dyld_info.rebase_off = @intCast(u32, seg.inner.fileoff); + dyld_info.rebase_size = @intCast(u32, mem.alignForwardGeneric(u64, rebase_size, @alignOf(u64))); + seg.inner.filesize += dyld_info.rebase_size; - dyld_info.rebase_size = rebase_size; dyld_info.bind_off = dyld_info.rebase_off + dyld_info.rebase_size; - dyld_info.bind_size = bind_size; - dyld_info.lazy_bind_off = dyld_info.bind_off + dyld_info.bind_size; - dyld_info.lazy_bind_size = lazy_bind_size; - dyld_info.export_off = dyld_info.lazy_bind_off + dyld_info.lazy_bind_size; - dyld_info.export_size = export_size; + dyld_info.bind_size = @intCast(u32, mem.alignForwardGeneric(u64, bind_size, @alignOf(u64))); + seg.inner.filesize += dyld_info.bind_size; + dyld_info.lazy_bind_off = dyld_info.bind_off + dyld_info.bind_size; + dyld_info.lazy_bind_size = @intCast(u32, mem.alignForwardGeneric(u64, lazy_bind_size, @alignOf(u64))); + seg.inner.filesize += dyld_info.lazy_bind_size; + + dyld_info.export_off = dyld_info.lazy_bind_off + dyld_info.lazy_bind_size; + dyld_info.export_size = @intCast(u32, mem.alignForwardGeneric(u64, export_size, @alignOf(u64))); + seg.inner.filesize += dyld_info.export_size; + + const needed_size = dyld_info.rebase_size + dyld_info.bind_size + dyld_info.lazy_bind_size + dyld_info.export_size; var buffer = try self.base.allocator.alloc(u8, needed_size); defer self.base.allocator.free(buffer); mem.set(u8, buffer, 0); @@ -4576,16 +4112,26 @@ fn writeDyldInfoData(self: *MachO) !void { const writer = stream.writer(); try bind.writeRebaseInfo(rebase_pointers.items, writer); + try stream.seekBy(@intCast(i64, dyld_info.rebase_size) - @intCast(i64, rebase_size)); + try bind.writeBindInfo(bind_pointers.items, writer); + try stream.seekBy(@intCast(i64, dyld_info.bind_size) - @intCast(i64, bind_size)); + try bind.writeLazyBindInfo(lazy_bind_pointers.items, writer); + try stream.seekBy(@intCast(i64, dyld_info.lazy_bind_size) - @intCast(i64, lazy_bind_size)); + _ = try trie.write(writer); - log.debug("writing dyld info from 0x{x} to 0x{x}", .{ dyld_info.rebase_off, dyld_info.rebase_off + needed_size }); + log.debug("writing dyld info from 0x{x} to 0x{x}", .{ + dyld_info.rebase_off, + dyld_info.rebase_off + needed_size, + }); try self.base.file.?.pwriteAll(buffer, dyld_info.rebase_off); - try self.populateLazyBindOffsetsInStubHelper(buffer[rebase_size + bind_size ..][0..lazy_bind_size]); + try self.populateLazyBindOffsetsInStubHelper( + buffer[dyld_info.rebase_size + dyld_info.bind_size ..][0..dyld_info.lazy_bind_size], + ); self.load_commands_dirty = true; - self.dyld_info_dirty = false; } fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { @@ -4661,7 +4207,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { const sym = self.locals.items[atom.local_sym_index]; const file_offset = sect.offset + sym.n_value - sect.addr + stub_offset; mem.writeIntLittle(u32, &buf, bind_offset); - log.debug("writing lazy binding offset in stub helper of 0x{x} for symbol {s} at offset 0x{x}", .{ + log.debug("writing lazy bind offset in stub helper of 0x{x} for symbol {s} at offset 0x{x}", .{ bind_offset, self.getString(sym.n_strx), file_offset, @@ -4674,79 +4220,307 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { } } -fn writeStringTable(self: *MachO) !void { - if (!self.strtab_dirty) return; +fn writeDices(self: *MachO) !void { + if (!self.has_dices) return; const tracy = trace(@src()); defer tracy.end(); - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - const allocated_size = self.allocatedSizeLinkedit(symtab.stroff); - const needed_size = mem.alignForwardGeneric(u64, self.strtab.items.len, @alignOf(u64)); + var buf = std.ArrayList(u8).init(self.base.allocator); + defer buf.deinit(); - if (needed_size > allocated_size or self.strtab_needs_relocation) { - symtab.strsize = 0; - symtab.stroff = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1, symtab.symoff)); - self.strtab_needs_relocation = false; + var block: *TextBlock = self.blocks.get(.{ + .seg = self.text_segment_cmd_index orelse return, + .sect = self.text_section_index orelse return, + }) orelse return; + + while (block.prev) |prev| { + block = prev; } - symtab.strsize = @intCast(u32, needed_size); - log.debug("writing string table from 0x{x} to 0x{x}", .{ - symtab.stroff, - symtab.stroff + symtab.strsize, + + const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const text_sect = text_seg.sections.items[self.text_section_index.?]; + + while (true) { + if (block.dices.items.len > 0) { + const sym = self.locals.items[block.local_sym_index]; + const base_off = try math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset); + + try buf.ensureUnusedCapacity(block.dices.items.len * @sizeOf(macho.data_in_code_entry)); + for (block.dices.items) |dice| { + const rebased_dice = macho.data_in_code_entry{ + .offset = base_off + dice.offset, + .length = dice.length, + .kind = dice.kind, + }; + buf.appendSliceAssumeCapacity(mem.asBytes(&rebased_dice)); + } + } + + if (block.next) |next| { + block = next; + } else break; + } + + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].LinkeditData; + const needed_size = @intCast(u32, buf.items.len); + + dice_cmd.dataoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + dice_cmd.datasize = needed_size; + seg.inner.filesize += needed_size; + + log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ + dice_cmd.dataoff, + dice_cmd.dataoff + dice_cmd.datasize, }); - try self.base.file.?.pwriteAll(self.strtab.items, symtab.stroff); + try self.base.file.?.pwriteAll(buf.items, dice_cmd.dataoff); self.load_commands_dirty = true; - self.strtab_dirty = false; } -fn updateLinkeditSegmentSizes(self: *MachO) !void { - if (!self.load_commands_dirty) return; - +fn writeSymbolTable(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - // Now, we are in position to update __LINKEDIT segment sizes. - // TODO Add checkpointing so that we don't have to do this every single time. - const linkedit_segment = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - var final_offset = linkedit_segment.inner.fileoff; + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + symtab.symoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); - if (self.dyld_info_cmd_index) |idx| { - const dyld_info = self.load_commands.items[idx].DyldInfoOnly; - final_offset = std.math.max(final_offset, dyld_info.rebase_off + dyld_info.rebase_size); - final_offset = std.math.max(final_offset, dyld_info.bind_off + dyld_info.bind_size); - final_offset = std.math.max(final_offset, dyld_info.weak_bind_off + dyld_info.weak_bind_size); - final_offset = std.math.max(final_offset, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size); - final_offset = std.math.max(final_offset, dyld_info.export_off + dyld_info.export_size); - } - if (self.function_starts_cmd_index) |idx| { - const fstart = self.load_commands.items[idx].LinkeditData; - final_offset = std.math.max(final_offset, fstart.dataoff + fstart.datasize); - } - if (self.data_in_code_cmd_index) |idx| { - const dic = self.load_commands.items[idx].LinkeditData; - final_offset = std.math.max(final_offset, dic.dataoff + dic.datasize); - } - if (self.dysymtab_cmd_index) |idx| { - const dysymtab = self.load_commands.items[idx].Dysymtab; - const nindirectsize = dysymtab.nindirectsyms * @sizeOf(u32); - final_offset = std.math.max(final_offset, dysymtab.indirectsymoff + nindirectsize); - // TODO Handle more dynamic symbol table sections. - } - if (self.symtab_cmd_index) |idx| { - const symtab = self.load_commands.items[idx].Symtab; - const symsize = symtab.nsyms * @sizeOf(macho.nlist_64); - final_offset = std.math.max(final_offset, symtab.symoff + symsize); - final_offset = std.math.max(final_offset, symtab.stroff + symtab.strsize); + var locals = std.ArrayList(macho.nlist_64).init(self.base.allocator); + defer locals.deinit(); + try locals.appendSlice(self.locals.items); + + if (self.has_stabs) { + for (self.objects.items) |object| { + if (object.debug_info == null) continue; + + // Open scope + try locals.ensureUnusedCapacity(3); + locals.appendAssumeCapacity(.{ + .n_strx = try self.makeString(object.tu_comp_dir.?), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.makeString(object.tu_name.?), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.makeString(object.name), + .n_type = macho.N_OSO, + .n_sect = 0, + .n_desc = 1, + .n_value = object.mtime orelse 0, + }); + + for (object.text_blocks.items) |block| { + if (block.stab) |stab| { + const nlists = try stab.asNlists(block.local_sym_index, self); + defer self.base.allocator.free(nlists); + try locals.appendSlice(nlists); + } else { + for (block.contained.items) |sym_at_off| { + const stab = sym_at_off.stab orelse continue; + const nlists = try stab.asNlists(sym_at_off.local_sym_index, self); + defer self.base.allocator.free(nlists); + try locals.appendSlice(nlists); + } + } + } + + // Close scope + try locals.append(.{ + .n_strx = 0, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + } } - const filesize = final_offset - linkedit_segment.inner.fileoff; - linkedit_segment.inner.filesize = filesize; - linkedit_segment.inner.vmsize = mem.alignForwardGeneric(u64, filesize, self.page_size); - try self.base.file.?.pwriteAll(&[_]u8{0}, final_offset); + const nlocals = locals.items.len; + const nexports = self.globals.items.len; + const nundefs = self.undefs.items.len; + + const locals_off = symtab.symoff; + const locals_size = nlocals * @sizeOf(macho.nlist_64); + log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off }); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); + + const exports_off = locals_off + locals_size; + const exports_size = nexports * @sizeOf(macho.nlist_64); + log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.globals.items), exports_off); + + const undefs_off = exports_off + exports_size; + const undefs_size = nundefs * @sizeOf(macho.nlist_64); + log.debug("writing undefined symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.undefs.items), undefs_off); + + symtab.nsyms = @intCast(u32, nlocals + nexports + nundefs); + seg.inner.filesize += locals_size + exports_size + undefs_size; + + // Update dynamic symbol table. + const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; + dysymtab.nlocalsym = @intCast(u32, nlocals); + dysymtab.iextdefsym = dysymtab.nlocalsym; + dysymtab.nextdefsym = @intCast(u32, nexports); + dysymtab.iundefsym = dysymtab.nlocalsym + dysymtab.nextdefsym; + dysymtab.nundefsym = @intCast(u32, nundefs); + + const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const stubs = &text_segment.sections.items[self.stubs_section_index.?]; + const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const got = &data_const_segment.sections.items[self.got_section_index.?]; + const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; + + const nstubs = @intCast(u32, self.stubs_map.keys().len); + const ngot_entries = @intCast(u32, self.got_entries_map.keys().len); + + dysymtab.indirectsymoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + dysymtab.nindirectsyms = nstubs * 2 + ngot_entries; + + const needed_size = dysymtab.nindirectsyms * @sizeOf(u32); + seg.inner.filesize += needed_size; + + log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ + dysymtab.indirectsymoff, + dysymtab.indirectsymoff + needed_size, + }); + + var buf = try self.base.allocator.alloc(u8, needed_size); + defer self.base.allocator.free(buf); + + var stream = std.io.fixedBufferStream(buf); + var writer = stream.writer(); + + stubs.reserved1 = 0; + for (self.stubs_map.keys()) |key| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + key); + } + + got.reserved1 = nstubs; + for (self.got_entries_map.keys()) |key| { + switch (key.where) { + .undef => { + try writer.writeIntLittle(u32, dysymtab.iundefsym + key.where_index); + }, + .local => { + try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); + }, + } + } + + la_symbol_ptr.reserved1 = got.reserved1 + ngot_entries; + for (self.stubs_map.keys()) |key| { + try writer.writeIntLittle(u32, dysymtab.iundefsym + key); + } + + try self.base.file.?.pwriteAll(buf, dysymtab.indirectsymoff); self.load_commands_dirty = true; } +fn writeStringTable(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + symtab.stroff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize); + symtab.strsize = @intCast(u32, mem.alignForwardGeneric(u64, self.strtab.items.len, @alignOf(u64))); + seg.inner.filesize += symtab.strsize; + + log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); + + try self.base.file.?.pwriteAll(self.strtab.items, symtab.stroff); + + if (symtab.strsize > self.strtab.items.len) { + // This is potentially the last section, so we need to pad it out. + try self.base.file.?.pwriteAll(&[_]u8{0}, seg.inner.fileoff + seg.inner.filesize - 1); + } + self.load_commands_dirty = true; +} + +fn writeLinkeditSegment(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + seg.inner.filesize = 0; + + try self.writeDyldInfoData(); + try self.writeDices(); + try self.writeSymbolTable(); + try self.writeStringTable(); + + seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); +} + +fn writeCodeSignaturePadding(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const linkedit_segment = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const code_sig_cmd = &self.load_commands.items[self.code_signature_cmd_index.?].LinkeditData; + const fileoff = linkedit_segment.inner.fileoff + linkedit_segment.inner.filesize; + const needed_size = CodeSignature.calcCodeSignaturePaddingSize( + self.base.options.emit.?.sub_path, + fileoff, + self.page_size, + ); + code_sig_cmd.dataoff = @intCast(u32, fileoff); + code_sig_cmd.datasize = needed_size; + + // Advance size of __LINKEDIT segment + linkedit_segment.inner.filesize += needed_size; + if (linkedit_segment.inner.vmsize < linkedit_segment.inner.filesize) { + linkedit_segment.inner.vmsize = mem.alignForwardGeneric(u64, linkedit_segment.inner.filesize, self.page_size); + } + log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ fileoff, fileoff + needed_size }); + // Pad out the space. We need to do this to calculate valid hashes for everything in the file + // except for code signature data. + try self.base.file.?.pwriteAll(&[_]u8{0}, fileoff + needed_size - 1); + self.load_commands_dirty = true; +} + +fn writeCodeSignature(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const code_sig_cmd = self.load_commands.items[self.code_signature_cmd_index.?].LinkeditData; + + var code_sig: CodeSignature = .{}; + defer code_sig.deinit(self.base.allocator); + + try code_sig.calcAdhocSignature( + self.base.allocator, + self.base.file.?, + self.base.options.emit.?.sub_path, + text_segment.inner, + code_sig_cmd, + self.base.options.output_mode, + self.page_size, + ); + + var buffer = try self.base.allocator.alloc(u8, code_sig.size()); + defer self.base.allocator.free(buffer); + var stream = std.io.fixedBufferStream(buffer); + try code_sig.write(stream.writer()); + + log.debug("writing code signature from 0x{x} to 0x{x}", .{ code_sig_cmd.dataoff, code_sig_cmd.dataoff + buffer.len }); + + try self.base.file.?.pwriteAll(buffer, code_sig_cmd.dataoff); +} + /// Writes all load commands and section headers. fn writeLoadCommands(self: *MachO) !void { if (!self.load_commands_dirty) return; diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index e6ceed9c55..160ba5cd8c 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -844,7 +844,6 @@ pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: R .sect = context.macho_file.got_section_index.?, }; _ = try context.macho_file.allocateAtom(atom, match); - context.macho_file.dyld_info_dirty = true; } else if (parsed_rel.payload == .unsigned) { switch (parsed_rel.where) { .undef => { From d92b5416e86f2b6bf0cf530c8f98da572bf9efde Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 5 Sep 2021 00:19:45 +0200 Subject: [PATCH 51/78] macho: add a small routine to update section ordinals at the end The actual ordinals may change when adding new sections to the segments. --- src/link/MachO.zig | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 4ee81e7c43..ce6fe49d35 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -162,6 +162,7 @@ stubs_map: std.AutoArrayHashMapUnmanaged(u32, *TextBlock) = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, load_commands_dirty: bool = false, +sections_order_dirty: bool = false, has_dices: bool = false, has_stabs: bool = false, @@ -806,6 +807,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { defer tracy.end(); try self.setEntryPoint(); + try self.updateSectionOrdinals(); try self.writeLinkeditSegment(); if (self.d_sym) |*ds| { @@ -3817,6 +3819,7 @@ fn allocateSection( try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); self.load_commands_dirty = true; + self.sections_order_dirty = true; return index; } @@ -3998,6 +4001,44 @@ fn nextSegmentAddressAndOffset(self: *MachO) NextSegmentAddressAndOffset { }; } +fn updateSectionOrdinals(self: *MachO) !void { + if (!self.sections_order_dirty) return; + + const tracy = trace(@src()); + defer tracy.end(); + + var ordinal_remap = std.AutoHashMap(u8, u8).init(self.base.allocator); + defer ordinal_remap.deinit(); + var ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}; + + var new_ordinal: u8 = 0; + for (self.load_commands.items) |lc, lc_id| { + if (lc != .Segment) break; + + for (lc.Segment.sections.items) |_, sect_id| { + const match = MatchingSection{ + .seg = @intCast(u16, lc_id), + .sect = @intCast(u16, sect_id), + }; + const old_ordinal = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + new_ordinal += 1; + try ordinal_remap.putNoClobber(old_ordinal, new_ordinal); + try ordinals.putNoClobber(self.base.allocator, match, {}); + } + } + + for (self.locals.items) |*sym| { + if (sym.n_sect == 0) continue; + sym.n_sect = ordinal_remap.get(sym.n_sect).?; + } + for (self.globals.items) |*sym| { + sym.n_sect = ordinal_remap.get(sym.n_sect).?; + } + + self.section_ordinals.deinit(self.base.allocator); + self.section_ordinals = ordinals; +} + fn writeDyldInfoData(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); From ea8808f87b319c92dd74d75f1bb4fae1180e00ed Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 5 Sep 2021 11:15:48 +0200 Subject: [PATCH 52/78] macho: add logic for segment expansion --- src/link/MachO.zig | 191 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 171 insertions(+), 20 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index ce6fe49d35..214c4f67f0 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -133,8 +133,8 @@ objc_selrefs_section_index: ?u16 = null, objc_classrefs_section_index: ?u16 = null, objc_data_section_index: ?u16 = null, -bss_file_offset: ?u64 = null, -tlv_bss_file_offset: ?u64 = null, +bss_file_offset: ?u32 = null, +tlv_bss_file_offset: ?u32 = null, locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, globals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, @@ -1693,6 +1693,84 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 const padding: ?u64 = if (match.seg == self.text_segment_cmd_index.?) self.header_pad else null; const atom_alignment = try math.powi(u64, 2, atom.alignment); const new_offset = @intCast(u32, seg.findFreeSpace(needed_size, atom_alignment, padding)); + + if (new_offset + needed_size >= seg.inner.fileoff + seg.inner.filesize) { + // Bummer, need to move all segments below down... + // TODO is this the right estimate? + const new_seg_size = mem.alignForwardGeneric( + u64, + padToIdeal(seg.inner.filesize + needed_size), + self.page_size, + ); + // TODO actually, we're always required to move in a number of pages so I guess all we need + // to know here is the number of pages to shift downwards. + const offset_amt = @intCast(u32, @intCast(i64, new_seg_size) - @intCast(i64, seg.inner.filesize)); + seg.inner.filesize = new_seg_size; + seg.inner.vmsize = new_seg_size; + log.debug(" (new {s} segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + seg.inner.segname, + seg.inner.fileoff, + seg.inner.fileoff + seg.inner.filesize, + seg.inner.vmaddr, + seg.inner.vmaddr + seg.inner.vmsize, + }); + // TODO We should probably nop the expanded by distance, or put 0s. + + // TODO copyRangeAll doesn't automatically extend the file on macOS. + const ledit_seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const new_filesize = offset_amt + ledit_seg.inner.fileoff + ledit_seg.inner.filesize; + try self.base.file.?.pwriteAll(&[_]u8{0}, new_filesize - 1); + + var next: usize = match.seg + 1; + while (next < self.linkedit_segment_cmd_index.? + 1) : (next += 1) { + const next_seg = &self.load_commands.items[next].Segment; + _ = try self.base.file.?.copyRangeAll( + next_seg.inner.fileoff, + self.base.file.?, + next_seg.inner.fileoff + offset_amt, + next_seg.inner.filesize, + ); + next_seg.inner.fileoff += offset_amt; + next_seg.inner.vmaddr += offset_amt; + log.debug(" (new {s} segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + next_seg.inner.segname, + next_seg.inner.fileoff, + next_seg.inner.fileoff + next_seg.inner.filesize, + next_seg.inner.vmaddr, + next_seg.inner.vmaddr + next_seg.inner.vmsize, + }); + + for (next_seg.sections.items) |*moved_sect, moved_sect_id| { + // TODO put below snippet in a function. + const moved_sect_offset = blk: { + if (self.data_segment_cmd_index.? == next) { + if (self.bss_section_index) |idx| { + if (idx == moved_sect_id) break :blk &self.bss_file_offset.?; + } + if (self.tlv_bss_section_index) |idx| { + if (idx == moved_sect_id) break :blk &self.tlv_bss_file_offset.?; + } + } + break :blk &moved_sect.offset; + }; + moved_sect_offset.* += offset_amt; + moved_sect.addr += offset_amt; + log.debug(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + commands.segmentName(moved_sect.*), + commands.sectionName(moved_sect.*), + moved_sect_offset.*, + moved_sect_offset.* + moved_sect.size, + moved_sect.addr, + moved_sect.addr + moved_sect.size, + }); + + try self.allocateLocalSymbols(.{ + .seg = @intCast(u16, next), + .sect = @intCast(u16, moved_sect_id), + }, offset_amt); + } + } + } sect.offset = new_offset; sect.addr = seg.inner.vmaddr + sect.offset - seg.inner.fileoff; log.debug(" (found new {s},{s} free space from 0x{x} to 0x{x})", .{ @@ -1701,11 +1779,9 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 new_offset, new_offset + needed_size, }); - try self.allocateLocalSymbols(match, old_base_addr); - vaddr = @intCast( - u64, - @intCast(i64, vaddr) + @intCast(i64, sect.addr) - @intCast(i64, old_base_addr), - ); + const offset_amt = @intCast(i64, sect.addr) - @intCast(i64, old_base_addr); + try self.allocateLocalSymbols(match, offset_amt); + vaddr = @intCast(u64, @intCast(i64, vaddr) + offset_amt); } sect.size = needed_size; @@ -1761,11 +1837,8 @@ pub fn writeAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !void { try self.base.file.?.pwriteAll(atom.code.items, file_offset); } -fn allocateLocalSymbols(self: *MachO, match: MatchingSection, old_base_addr: u64) !void { +fn allocateLocalSymbols(self: *MachO, match: MatchingSection, offset: i64) !void { var atom = self.blocks.get(match) orelse return; - const seg = self.load_commands.items[match.seg].Segment; - const sect = seg.sections.items[match.sect]; - const offset = @intCast(i64, sect.addr) - @intCast(i64, old_base_addr); while (true) { const atom_sym = &self.locals.items[atom.local_sym_index]; @@ -3317,9 +3390,8 @@ pub fn populateMissingMetadata(self: *MachO) !void { if (self.text_segment_cmd_index == null) { self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); const program_code_size_hint = self.base.options.program_code_size_hint; - // const program_code_size_hint = 10; const got_size_hint = @sizeOf(u64) * self.base.options.symbol_count_hint; - const ideal_size = self.header_pad + (program_code_size_hint + got_size_hint) * 5; + const ideal_size = self.header_pad + program_code_size_hint + got_size_hint; const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); log.debug("found __TEXT segment free space 0x{x} to 0x{x}", .{ 0, needed_size }); @@ -3413,7 +3485,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { if (self.data_const_segment_cmd_index == null) { self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); const address_and_offset = self.nextSegmentAddressAndOffset(); - const ideal_size = @sizeOf(u64) * self.base.options.symbol_count_hint * 1000; + const ideal_size = @sizeOf(u64) * self.base.options.symbol_count_hint; const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); log.debug("found __DATA_CONST segment free space 0x{x} to 0x{x}", .{ @@ -3454,7 +3526,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { if (self.data_segment_cmd_index == null) { self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); const address_and_offset = self.nextSegmentAddressAndOffset(); - const ideal_size = 2 * @sizeOf(u64) * self.base.options.symbol_count_hint * 1000; + const ideal_size = 2 * @sizeOf(u64) * self.base.options.symbol_count_hint; const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.page_size); log.debug("found __DATA segment free space 0x{x} to 0x{x}", .{ address_and_offset.offset, address_and_offset.offset + needed_size }); @@ -3905,20 +3977,99 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, const old_base_addr = text_section.addr; text_section.size = 0; const new_offset = @intCast(u32, text_segment.findFreeSpace(needed_size, alignment, self.header_pad)); + + if (new_offset + needed_size >= text_segment.inner.fileoff + text_segment.inner.filesize) { + // Bummer, need to move all segments below down... + // TODO is this the right estimate? + const new_seg_size = mem.alignForwardGeneric( + u64, + padToIdeal(text_segment.inner.filesize + needed_size), + self.page_size, + ); + // TODO actually, we're always required to move in a number of pages so I guess all we need + // to know here is the number of pages to shift downwards. + const offset_amt = @intCast( + u32, + @intCast(i64, new_seg_size) - @intCast(i64, text_segment.inner.filesize), + ); + text_segment.inner.filesize = new_seg_size; + text_segment.inner.vmsize = new_seg_size; + log.debug(" (new __TEXT segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + text_segment.inner.fileoff, + text_segment.inner.fileoff + text_segment.inner.filesize, + text_segment.inner.vmaddr, + text_segment.inner.vmaddr + text_segment.inner.vmsize, + }); + // TODO We should probably nop the expanded by distance, or put 0s. + + // TODO copyRangeAll doesn't automatically extend the file on macOS. + const ledit_seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const new_filesize = offset_amt + ledit_seg.inner.fileoff + ledit_seg.inner.filesize; + try self.base.file.?.pwriteAll(&[_]u8{0}, new_filesize - 1); + + var next: usize = match.seg + 1; + while (next < self.linkedit_segment_cmd_index.? + 1) : (next += 1) { + const next_seg = &self.load_commands.items[next].Segment; + _ = try self.base.file.?.copyRangeAll( + next_seg.inner.fileoff, + self.base.file.?, + next_seg.inner.fileoff + offset_amt, + next_seg.inner.filesize, + ); + next_seg.inner.fileoff += offset_amt; + next_seg.inner.vmaddr += offset_amt; + log.debug(" (new {s} segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + next_seg.inner.segname, + next_seg.inner.fileoff, + next_seg.inner.fileoff + next_seg.inner.filesize, + next_seg.inner.vmaddr, + next_seg.inner.vmaddr + next_seg.inner.vmsize, + }); + + for (next_seg.sections.items) |*moved_sect, moved_sect_id| { + // TODO put below snippet in a function. + const moved_sect_offset = blk: { + if (self.data_segment_cmd_index.? == next) { + if (self.bss_section_index) |idx| { + if (idx == moved_sect_id) break :blk &self.bss_file_offset.?; + } + if (self.tlv_bss_section_index) |idx| { + if (idx == moved_sect_id) break :blk &self.tlv_bss_file_offset.?; + } + } + break :blk &moved_sect.offset; + }; + moved_sect_offset.* += offset_amt; + moved_sect.addr += offset_amt; + log.debug(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + commands.segmentName(moved_sect.*), + commands.sectionName(moved_sect.*), + moved_sect_offset.*, + moved_sect_offset.* + moved_sect.size, + moved_sect.addr, + moved_sect.addr + moved_sect.size, + }); + + try self.allocateLocalSymbols(.{ + .seg = @intCast(u16, next), + .sect = @intCast(u16, moved_sect_id), + }, offset_amt); + } + } + } + text_section.offset = new_offset; text_section.addr = text_segment.inner.vmaddr + text_section.offset - text_segment.inner.fileoff; log.debug(" (found new __TEXT,__text free space from 0x{x} to 0x{x})", .{ new_offset, new_offset + needed_size, }); + const offset_amt = @intCast(i64, text_section.addr) - @intCast(i64, old_base_addr); try self.allocateLocalSymbols(.{ .seg = self.text_segment_cmd_index.?, .sect = self.text_section_index.?, - }, old_base_addr); - vaddr = @intCast( - u64, - @intCast(i64, vaddr) + @intCast(i64, text_section.addr) - @intCast(i64, old_base_addr), - ); + }, offset_amt); + vaddr = @intCast(u64, @intCast(i64, vaddr) + offset_amt); } text_section.size = needed_size; From 61dca19107a30011ba67754ff867c858024eb5c5 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sun, 5 Sep 2021 23:30:06 +0200 Subject: [PATCH 53/78] macho: encaps logic for extract sect offset into fn --- src/link/MachO.zig | 100 ++++++++++++++++++--------------------------- 1 file changed, 40 insertions(+), 60 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 214c4f67f0..40c2c0dd83 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1667,18 +1667,8 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 const expand_section = atom_placement == null or atom_placement.?.next == null; if (expand_section) { const needed_size = (vaddr + atom.size) - sect.addr; - const sect_offset: u64 = blk: { - if (self.data_segment_cmd_index.? == match.seg) { - if (self.bss_section_index) |idx| { - if (idx == match.sect) break :blk self.bss_file_offset.?; - } - if (self.tlv_bss_section_index) |idx| { - if (idx == match.sect) break :blk self.tlv_bss_file_offset.?; - } - } - break :blk sect.offset; - }; - const file_offset = sect_offset + vaddr - sect.addr; + const sect_offset = self.getPtrToSectionOffset(match); + const file_offset = sect_offset.* + vaddr - sect.addr; const max_size = seg.allocatedSize(file_offset); log.debug(" (section {s},{s} needed size 0x{x}, max available size 0x{x})", .{ commands.segmentName(sect.*), @@ -1741,38 +1731,28 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 }); for (next_seg.sections.items) |*moved_sect, moved_sect_id| { - // TODO put below snippet in a function. - const moved_sect_offset = blk: { - if (self.data_segment_cmd_index.? == next) { - if (self.bss_section_index) |idx| { - if (idx == moved_sect_id) break :blk &self.bss_file_offset.?; - } - if (self.tlv_bss_section_index) |idx| { - if (idx == moved_sect_id) break :blk &self.tlv_bss_file_offset.?; - } - } - break :blk &moved_sect.offset; + const moved_match = MatchingSection{ + .seg = @intCast(u16, next), + .sect = @intCast(u16, moved_sect_id), }; - moved_sect_offset.* += offset_amt; + const ptr_sect_offset = self.getPtrToSectionOffset(moved_match); + ptr_sect_offset.* += offset_amt; moved_sect.addr += offset_amt; log.debug(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ commands.segmentName(moved_sect.*), commands.sectionName(moved_sect.*), - moved_sect_offset.*, - moved_sect_offset.* + moved_sect.size, + ptr_sect_offset.*, + ptr_sect_offset.* + moved_sect.size, moved_sect.addr, moved_sect.addr + moved_sect.size, }); - try self.allocateLocalSymbols(.{ - .seg = @intCast(u16, next), - .sect = @intCast(u16, moved_sect_id), - }, offset_amt); + try self.allocateLocalSymbols(moved_match, offset_amt); } } } - sect.offset = new_offset; - sect.addr = seg.inner.vmaddr + sect.offset - seg.inner.fileoff; + sect_offset.* = new_offset; + sect.addr = seg.inner.vmaddr + sect_offset.* - seg.inner.fileoff; log.debug(" (found new {s},{s} free space from 0x{x} to 0x{x})", .{ commands.segmentName(sect.*), commands.sectionName(sect.*), @@ -1820,17 +1800,7 @@ pub fn writeAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !void { const seg = self.load_commands.items[match.seg].Segment; const sect = seg.sections.items[match.sect]; const sym = self.locals.items[atom.local_sym_index]; - const sect_offset: u64 = blk: { - if (self.data_segment_cmd_index.? == match.seg) { - if (self.bss_section_index) |idx| { - if (idx == match.sect) break :blk self.bss_file_offset.?; - } - if (self.tlv_bss_section_index) |idx| { - if (idx == match.sect) break :blk self.tlv_bss_file_offset.?; - } - } - break :blk sect.offset; - }; + const sect_offset = self.getPtrToSectionOffset(match).*; const file_offset = sect_offset + sym.n_value - sect.addr; try atom.resolveRelocs(self); log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ self.getString(sym.n_strx), file_offset }); @@ -3896,6 +3866,25 @@ fn allocateSection( return index; } +fn getPtrToSectionOffset(self: *MachO, match: MatchingSection) *u32 { + if (self.data_segment_cmd_index.? == match.seg) { + if (self.bss_section_index) |idx| { + if (idx == match.sect) { + return &self.bss_file_offset.?; + } + } + if (self.tlv_bss_section_index) |idx| { + if (idx == match.sect) { + return &self.tlv_bss_file_offset.?; + } + } + } + + const seg = &self.load_commands.items[match.seg].Segment; + const sect = &seg.sections.items[match.sect]; + return §.offset; +} + fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, alignment: u64) !u64 { const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; const text_section = &text_segment.sections.items[self.text_section_index.?]; @@ -4028,32 +4017,23 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, for (next_seg.sections.items) |*moved_sect, moved_sect_id| { // TODO put below snippet in a function. - const moved_sect_offset = blk: { - if (self.data_segment_cmd_index.? == next) { - if (self.bss_section_index) |idx| { - if (idx == moved_sect_id) break :blk &self.bss_file_offset.?; - } - if (self.tlv_bss_section_index) |idx| { - if (idx == moved_sect_id) break :blk &self.tlv_bss_file_offset.?; - } - } - break :blk &moved_sect.offset; + const moved_match = MatchingSection{ + .seg = @intCast(u16, next), + .sect = @intCast(u16, moved_sect_id), }; - moved_sect_offset.* += offset_amt; + const ptr_sect_offset = self.getPtrToSectionOffset(moved_match); + ptr_sect_offset.* += offset_amt; moved_sect.addr += offset_amt; log.debug(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ commands.segmentName(moved_sect.*), commands.sectionName(moved_sect.*), - moved_sect_offset.*, - moved_sect_offset.* + moved_sect.size, + ptr_sect_offset.*, + ptr_sect_offset.* + moved_sect.size, moved_sect.addr, moved_sect.addr + moved_sect.size, }); - try self.allocateLocalSymbols(.{ - .seg = @intCast(u16, next), - .sect = @intCast(u16, moved_sect_id), - }, offset_amt); + try self.allocateLocalSymbols(moved_match, offset_amt); } } } From 5e64d9745ba54d4dd61f8f98be4a3b7e6f2d8205 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 6 Sep 2021 10:38:51 +0200 Subject: [PATCH 54/78] macho: fix noninclusion of data-in-code Also, calculate non-extern, section offset based addends for SIGNED and UNSIGNED relocations on x86_64 upfront as an offset wrt to the target symbol representing position of the section/atom within the final artifact. --- src/link/MachO/Object.zig | 6 +++--- src/link/MachO/TextBlock.zig | 17 +++++++---------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 72dbb05de9..a3f0c56065 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -525,7 +525,7 @@ pub fn parseTextBlocks( break :blk self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; }; - macho_file.has_dices = blk: { + macho_file.has_dices = macho_file.has_dices or blk: { if (self.text_section_index) |index| { if (index != id) break :blk false; if (self.data_in_code_entries.items.len == 0) break :blk false; @@ -558,7 +558,7 @@ pub fn parseTextBlocks( .n_type = macho.N_SECT, .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), .n_desc = 0, - .n_value = sect.addr, + .n_value = 0, }); try self.sections_as_symbols.putNoClobber(allocator, sect_id, block_local_sym_index); break :blk block_local_sym_index; @@ -660,7 +660,7 @@ pub fn parseTextBlocks( .n_type = macho.N_SECT, .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), .n_desc = 0, - .n_value = sect.addr, + .n_value = 0, }); try self.sections_as_symbols.putNoClobber(allocator, sect_id, block_local_sym_index); break :blk block_local_sym_index; diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index 160ba5cd8c..3517e532a0 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -646,7 +646,7 @@ fn initRelocFromObject(rel: macho.relocation_info, context: RelocContext) !Reloc .n_type = macho.N_SECT, .n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(match).? + 1), .n_desc = 0, - .n_value = sect.addr, + .n_value = 0, }); try context.object.sections_as_symbols.putNoClobber(context.allocator, sect_id, local_sym_index); break :blk local_sym_index; @@ -956,9 +956,9 @@ fn parseUnsigned( mem.readIntLittle(i32, self.code.items[out.offset..][0..4]); if (rel.r_extern == 0) { - assert(out.where == .local); - const target_sym = context.macho_file.locals.items[out.where_index]; - addend -= @intCast(i64, target_sym.n_value); + const source_seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment; + const source_sect_base_addr = source_seg.sections.items[rel.r_symbolnum - 1].addr; + addend -= @intCast(i64, source_sect_base_addr); } out.payload = .{ @@ -1053,12 +1053,9 @@ fn parseSigned(self: TextBlock, rel: macho.relocation_info, out: *Relocation, co var addend: i64 = mem.readIntLittle(i32, self.code.items[out.offset..][0..4]) + correction; if (rel.r_extern == 0) { - const source_sym = context.macho_file.locals.items[self.local_sym_index]; - const target_sym = switch (out.where) { - .local => context.macho_file.locals.items[out.where_index], - .undef => context.macho_file.undefs.items[out.where_index], - }; - addend = @intCast(i64, source_sym.n_value + out.offset + 4) + addend - @intCast(i64, target_sym.n_value); + const source_seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment; + const source_sect_base_addr = source_seg.sections.items[rel.r_symbolnum - 1].addr; + addend = @intCast(i64, out.offset) + addend - @intCast(i64, source_sect_base_addr) + 4 + correction; } out.payload = .{ From 2914ea9e3388da2bf0240b9bd6b0474f9686322c Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 6 Sep 2021 11:17:20 +0200 Subject: [PATCH 55/78] macho: require atom padding for machine code only --- src/link/MachO.zig | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 40c2c0dd83..e0396dc6da 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1632,22 +1632,7 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 const sect = &seg.sections.items[match.sect]; const sym = &self.locals.items[atom.local_sym_index]; - // Padding is not required for pointer-type sections and any synthetic sections such as - // stubs or stub_helper. - // TODO audit this. - const needs_padding = switch (commands.sectionType(sect.*)) { - macho.S_SYMBOL_STUBS, - macho.S_NON_LAZY_SYMBOL_POINTERS, - macho.S_LAZY_SYMBOL_POINTERS, - macho.S_LITERAL_POINTERS, - macho.S_THREAD_LOCAL_VARIABLES, - => false, - else => blk: { - if (match.seg == self.text_segment_cmd_index.? and - match.sect == self.stub_helper_section_index.?) break :blk false; - break :blk true; - }, - }; + const needs_padding = match.seg == self.text_segment_cmd_index.? and match.sect == self.text_section_index.?; var atom_placement: ?*TextBlock = null; From 29d2e19c3ea50c9caec036b52e978297a04bb969 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 6 Sep 2021 16:46:48 +0200 Subject: [PATCH 56/78] macho: allocate sections one after the other and grow if needed --- src/link/MachO.zig | 372 +++++++++++++++++++++------------------------ 1 file changed, 171 insertions(+), 201 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index e0396dc6da..7c7995a5cc 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1647,110 +1647,12 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 break :blk new_start_vaddr; } else sect.addr; - log.debug("allocating atom for symbol {s} at address 0x{x}", .{ self.getString(sym.n_strx), vaddr }); + log.warn("allocating atom for symbol {s} at address 0x{x}", .{ self.getString(sym.n_strx), vaddr }); const expand_section = atom_placement == null or atom_placement.?.next == null; if (expand_section) { - const needed_size = (vaddr + atom.size) - sect.addr; - const sect_offset = self.getPtrToSectionOffset(match); - const file_offset = sect_offset.* + vaddr - sect.addr; - const max_size = seg.allocatedSize(file_offset); - log.debug(" (section {s},{s} needed size 0x{x}, max available size 0x{x})", .{ - commands.segmentName(sect.*), - commands.sectionName(sect.*), - needed_size, - max_size, - }); - - if (needed_size > max_size) { - const old_base_addr = sect.addr; - sect.size = 0; - const padding: ?u64 = if (match.seg == self.text_segment_cmd_index.?) self.header_pad else null; - const atom_alignment = try math.powi(u64, 2, atom.alignment); - const new_offset = @intCast(u32, seg.findFreeSpace(needed_size, atom_alignment, padding)); - - if (new_offset + needed_size >= seg.inner.fileoff + seg.inner.filesize) { - // Bummer, need to move all segments below down... - // TODO is this the right estimate? - const new_seg_size = mem.alignForwardGeneric( - u64, - padToIdeal(seg.inner.filesize + needed_size), - self.page_size, - ); - // TODO actually, we're always required to move in a number of pages so I guess all we need - // to know here is the number of pages to shift downwards. - const offset_amt = @intCast(u32, @intCast(i64, new_seg_size) - @intCast(i64, seg.inner.filesize)); - seg.inner.filesize = new_seg_size; - seg.inner.vmsize = new_seg_size; - log.debug(" (new {s} segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - seg.inner.segname, - seg.inner.fileoff, - seg.inner.fileoff + seg.inner.filesize, - seg.inner.vmaddr, - seg.inner.vmaddr + seg.inner.vmsize, - }); - // TODO We should probably nop the expanded by distance, or put 0s. - - // TODO copyRangeAll doesn't automatically extend the file on macOS. - const ledit_seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const new_filesize = offset_amt + ledit_seg.inner.fileoff + ledit_seg.inner.filesize; - try self.base.file.?.pwriteAll(&[_]u8{0}, new_filesize - 1); - - var next: usize = match.seg + 1; - while (next < self.linkedit_segment_cmd_index.? + 1) : (next += 1) { - const next_seg = &self.load_commands.items[next].Segment; - _ = try self.base.file.?.copyRangeAll( - next_seg.inner.fileoff, - self.base.file.?, - next_seg.inner.fileoff + offset_amt, - next_seg.inner.filesize, - ); - next_seg.inner.fileoff += offset_amt; - next_seg.inner.vmaddr += offset_amt; - log.debug(" (new {s} segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - next_seg.inner.segname, - next_seg.inner.fileoff, - next_seg.inner.fileoff + next_seg.inner.filesize, - next_seg.inner.vmaddr, - next_seg.inner.vmaddr + next_seg.inner.vmsize, - }); - - for (next_seg.sections.items) |*moved_sect, moved_sect_id| { - const moved_match = MatchingSection{ - .seg = @intCast(u16, next), - .sect = @intCast(u16, moved_sect_id), - }; - const ptr_sect_offset = self.getPtrToSectionOffset(moved_match); - ptr_sect_offset.* += offset_amt; - moved_sect.addr += offset_amt; - log.debug(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - commands.segmentName(moved_sect.*), - commands.sectionName(moved_sect.*), - ptr_sect_offset.*, - ptr_sect_offset.* + moved_sect.size, - moved_sect.addr, - moved_sect.addr + moved_sect.size, - }); - - try self.allocateLocalSymbols(moved_match, offset_amt); - } - } - } - sect_offset.* = new_offset; - sect.addr = seg.inner.vmaddr + sect_offset.* - seg.inner.fileoff; - log.debug(" (found new {s},{s} free space from 0x{x} to 0x{x})", .{ - commands.segmentName(sect.*), - commands.sectionName(sect.*), - new_offset, - new_offset + needed_size, - }); - const offset_amt = @intCast(i64, sect.addr) - @intCast(i64, old_base_addr); - try self.allocateLocalSymbols(match, offset_amt); - vaddr = @intCast(u64, @intCast(i64, vaddr) + offset_amt); - } - - sect.size = needed_size; - self.load_commands_dirty = true; + const needed_size = @intCast(u32, (vaddr + atom.size) - sect.addr); + try self.growSection(match, needed_size); } const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); sym.n_value = vaddr; @@ -3373,7 +3275,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { else => unreachable, // unhandled architecture type }; const needed_size = self.base.options.program_code_size_hint; - // const needed_size = 10; self.text_section_index = try self.allocateSection( self.text_segment_cmd_index.?, "__text", @@ -3821,9 +3722,9 @@ fn allocateSection( const alignment_pow_2 = try math.powi(u32, 2, alignment); const padding: ?u64 = if (segment_id == self.text_segment_cmd_index.?) self.header_pad else null; - const off = seg.findFreeSpace(size, alignment_pow_2, padding); + const off = self.findFreeSpace(segment_id, alignment_pow_2, padding); - log.debug("found {s},{s} section free space 0x{x} to 0x{x}", .{ + log.debug("allocating {s},{s} section from 0x{x} to 0x{x}", .{ commands.segmentName(sect), commands.sectionName(sect), off, @@ -3851,6 +3752,170 @@ fn allocateSection( return index; } +fn findFreeSpace(self: MachO, segment_id: u16, alignment: u64, start: ?u64) u64 { + const seg = self.load_commands.items[segment_id].Segment; + if (seg.sections.items.len == 0) { + return if (start) |v| v else seg.inner.fileoff; + } + const last_sect = seg.sections.items[seg.sections.items.len - 1]; + const final_off = last_sect.offset + padToIdeal(last_sect.size); + return mem.alignForwardGeneric(u64, final_off, alignment); +} + +fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { + const seg = &self.load_commands.items[match.seg].Segment; + const sect = &seg.sections.items[match.sect]; + + const alignment = try math.powi(u32, 2, sect.@"align"); + const sect_offset = self.getPtrToSectionOffset(match); + const max_size = self.allocatedSize(match.seg, sect_offset.*); + const ideal_size = padToIdeal(new_size); + const needed_size = mem.alignForwardGeneric(u32, ideal_size, alignment); + + if (needed_size > max_size) blk: { + // Need to move all sections below in file and address spaces. + const offset_amt = offset: { + const max_alignment = try self.getSectionMaxAlignment(match.seg, match.sect + 1); + break :offset mem.alignForwardGeneric(u64, needed_size - max_size, max_alignment); + }; + + // Before we commit to this, check if the segment needs to grow too. + // We assume that each section header is growing linearly with the increasing + // file offset / virtual memory address space. + const last_sect = seg.sections.items[seg.sections.items.len - 1]; + const last_sect_off = last_sect.offset + last_sect.size; + const seg_off = seg.inner.fileoff + seg.inner.filesize; + + if (last_sect_off + offset_amt > seg_off) { + // Need to grow segment first. + log.warn(" (need to grow segment first)", .{}); + const spill_size = (last_sect_off + offset_amt) - seg_off; + const seg_offset_amt = mem.alignForwardGeneric(u64, spill_size, self.page_size); + seg.inner.filesize += seg_offset_amt; + seg.inner.vmsize += seg_offset_amt; + + log.warn(" (new {s} segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + seg.inner.segname, + seg.inner.fileoff, + seg.inner.fileoff + seg.inner.filesize, + seg.inner.vmaddr, + seg.inner.vmaddr + seg.inner.vmsize, + }); + + // TODO We should probably nop the expanded by distance, or put 0s. + + // TODO copyRangeAll doesn't automatically extend the file on macOS. + const ledit_seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const new_filesize = seg_offset_amt + ledit_seg.inner.fileoff + ledit_seg.inner.filesize; + try self.base.file.?.pwriteAll(&[_]u8{0}, new_filesize - 1); + + var next: usize = match.seg + 1; + while (next < self.linkedit_segment_cmd_index.? + 1) : (next += 1) { + const next_seg = &self.load_commands.items[next].Segment; + _ = try self.base.file.?.copyRangeAll( + next_seg.inner.fileoff, + self.base.file.?, + next_seg.inner.fileoff + seg_offset_amt, + next_seg.inner.filesize, + ); + next_seg.inner.fileoff += seg_offset_amt; + next_seg.inner.vmaddr += seg_offset_amt; + + log.warn(" (new {s} segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + next_seg.inner.segname, + next_seg.inner.fileoff, + next_seg.inner.fileoff + next_seg.inner.filesize, + next_seg.inner.vmaddr, + next_seg.inner.vmaddr + next_seg.inner.vmsize, + }); + + for (next_seg.sections.items) |*moved_sect, moved_sect_id| { + const moved_match = MatchingSection{ + .seg = @intCast(u16, next), + .sect = @intCast(u16, moved_sect_id), + }; + const ptr_sect_offset = self.getPtrToSectionOffset(moved_match); + ptr_sect_offset.* += @intCast(u32, seg_offset_amt); + moved_sect.addr += seg_offset_amt; + + log.warn(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + commands.segmentName(moved_sect.*), + commands.sectionName(moved_sect.*), + ptr_sect_offset.*, + ptr_sect_offset.* + moved_sect.size, + moved_sect.addr, + moved_sect.addr + moved_sect.size, + }); + + try self.allocateLocalSymbols(moved_match, @intCast(i64, seg_offset_amt)); + } + } + } + + if (match.sect + 1 >= seg.sections.items.len) break :blk; + + // We have enough space to expand within the segment, so move all sections by + // the required amount and update their header offsets. + const next_sect = seg.sections.items[match.sect + 1]; + const total_size = last_sect_off - next_sect.offset; + _ = try self.base.file.?.copyRangeAll( + next_sect.offset, + self.base.file.?, + next_sect.offset + offset_amt, + total_size, + ); + + var next = match.sect + 1; + while (next < seg.sections.items.len) : (next += 1) { + const moved_match = MatchingSection{ + .seg = match.seg, + .sect = next, + }; + const moved_sect = &seg.sections.items[next]; + const ptr_sect_offset = self.getPtrToSectionOffset(moved_match); + ptr_sect_offset.* += @intCast(u32, offset_amt); + moved_sect.addr += offset_amt; + + log.warn(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + commands.segmentName(moved_sect.*), + commands.sectionName(moved_sect.*), + ptr_sect_offset.*, + ptr_sect_offset.* + moved_sect.size, + moved_sect.addr, + moved_sect.addr + moved_sect.size, + }); + + try self.allocateLocalSymbols(moved_match, @intCast(i64, offset_amt)); + } + } + + sect.size = new_size; + self.load_commands_dirty = true; +} + +fn allocatedSize(self: MachO, segment_id: u16, start: u64) u64 { + const seg = self.load_commands.items[segment_id].Segment; + assert(start >= seg.inner.fileoff); + var min_pos: u64 = seg.inner.fileoff + seg.inner.filesize; + for (seg.sections.items) |section| { + if (section.offset <= start) continue; + if (section.offset < min_pos) min_pos = section.offset; + } + return min_pos - start; +} + +fn getSectionMaxAlignment(self: *MachO, segment_id: u16, start_sect_id: u16) !u32 { + const seg = self.load_commands.items[segment_id].Segment; + var max_alignment: u32 = 1; + var next = start_sect_id; + while (next < seg.sections.items.len) : (next += 1) { + const sect = seg.sections.items[next]; + const alignment = try math.powi(u32, 2, sect.@"align"); + max_alignment = math.max(max_alignment, alignment); + } + return max_alignment; +} + fn getPtrToSectionOffset(self: *MachO, match: MatchingSection) *u32 { if (self.data_segment_cmd_index.? == match.seg) { if (self.bss_section_index) |idx| { @@ -3943,103 +4008,8 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, const expand_text_section = block_placement == null or block_placement.?.next == null; if (expand_text_section) { - const needed_size = (vaddr + new_block_size) - text_section.addr; - const max_size = text_segment.allocatedSize(vaddr - pagezero_vmsize); - log.debug(" (section __TEXT,__text needed size 0x{x}, max available size 0x{x})", .{ needed_size, max_size }); - - if (needed_size > max_size) { - const old_base_addr = text_section.addr; - text_section.size = 0; - const new_offset = @intCast(u32, text_segment.findFreeSpace(needed_size, alignment, self.header_pad)); - - if (new_offset + needed_size >= text_segment.inner.fileoff + text_segment.inner.filesize) { - // Bummer, need to move all segments below down... - // TODO is this the right estimate? - const new_seg_size = mem.alignForwardGeneric( - u64, - padToIdeal(text_segment.inner.filesize + needed_size), - self.page_size, - ); - // TODO actually, we're always required to move in a number of pages so I guess all we need - // to know here is the number of pages to shift downwards. - const offset_amt = @intCast( - u32, - @intCast(i64, new_seg_size) - @intCast(i64, text_segment.inner.filesize), - ); - text_segment.inner.filesize = new_seg_size; - text_segment.inner.vmsize = new_seg_size; - log.debug(" (new __TEXT segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - text_segment.inner.fileoff, - text_segment.inner.fileoff + text_segment.inner.filesize, - text_segment.inner.vmaddr, - text_segment.inner.vmaddr + text_segment.inner.vmsize, - }); - // TODO We should probably nop the expanded by distance, or put 0s. - - // TODO copyRangeAll doesn't automatically extend the file on macOS. - const ledit_seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - const new_filesize = offset_amt + ledit_seg.inner.fileoff + ledit_seg.inner.filesize; - try self.base.file.?.pwriteAll(&[_]u8{0}, new_filesize - 1); - - var next: usize = match.seg + 1; - while (next < self.linkedit_segment_cmd_index.? + 1) : (next += 1) { - const next_seg = &self.load_commands.items[next].Segment; - _ = try self.base.file.?.copyRangeAll( - next_seg.inner.fileoff, - self.base.file.?, - next_seg.inner.fileoff + offset_amt, - next_seg.inner.filesize, - ); - next_seg.inner.fileoff += offset_amt; - next_seg.inner.vmaddr += offset_amt; - log.debug(" (new {s} segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - next_seg.inner.segname, - next_seg.inner.fileoff, - next_seg.inner.fileoff + next_seg.inner.filesize, - next_seg.inner.vmaddr, - next_seg.inner.vmaddr + next_seg.inner.vmsize, - }); - - for (next_seg.sections.items) |*moved_sect, moved_sect_id| { - // TODO put below snippet in a function. - const moved_match = MatchingSection{ - .seg = @intCast(u16, next), - .sect = @intCast(u16, moved_sect_id), - }; - const ptr_sect_offset = self.getPtrToSectionOffset(moved_match); - ptr_sect_offset.* += offset_amt; - moved_sect.addr += offset_amt; - log.debug(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - commands.segmentName(moved_sect.*), - commands.sectionName(moved_sect.*), - ptr_sect_offset.*, - ptr_sect_offset.* + moved_sect.size, - moved_sect.addr, - moved_sect.addr + moved_sect.size, - }); - - try self.allocateLocalSymbols(moved_match, offset_amt); - } - } - } - - text_section.offset = new_offset; - text_section.addr = text_segment.inner.vmaddr + text_section.offset - text_segment.inner.fileoff; - log.debug(" (found new __TEXT,__text free space from 0x{x} to 0x{x})", .{ - new_offset, - new_offset + needed_size, - }); - const offset_amt = @intCast(i64, text_section.addr) - @intCast(i64, old_base_addr); - try self.allocateLocalSymbols(.{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }, offset_amt); - vaddr = @intCast(u64, @intCast(i64, vaddr) + offset_amt); - } - - text_section.size = needed_size; - self.load_commands_dirty = true; - + const needed_size = @intCast(u32, (vaddr + new_block_size) - text_section.addr); + try self.growSection(match, needed_size); _ = try self.blocks.put(self.base.allocator, match, text_block); } text_block.size = new_block_size; From 81e5320973e8cffd585f240b20321e6afc15e8f9 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 6 Sep 2021 17:43:24 +0200 Subject: [PATCH 57/78] macho: set and reset file offset to zerofill sections in flush This way, there's no need to special the set/reset behaviour at every stage of updating sections/atoms. --- src/link/MachO.zig | 106 +++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 57 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 7c7995a5cc..ec5ce6aedd 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -133,8 +133,8 @@ objc_selrefs_section_index: ?u16 = null, objc_classrefs_section_index: ?u16 = null, objc_data_section_index: ?u16 = null, -bss_file_offset: ?u32 = null, -tlv_bss_file_offset: ?u32 = null, +bss_file_offset: u32 = 0, +tlv_bss_file_offset: u32 = 0, locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, globals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, @@ -749,6 +749,17 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.parseInputFiles(positionals.items, self.base.options.sysroot); try self.parseLibs(libs.items, self.base.options.sysroot); + if (self.bss_section_index) |idx| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[idx]; + sect.offset = self.bss_file_offset; + } + if (self.tlv_bss_section_index) |idx| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[idx]; + sect.offset = self.tlv_bss_file_offset; + } + try self.resolveSymbols(); try self.addRpathLCs(rpath_table.keys()); try self.addLoadDylibLCs(); @@ -781,6 +792,20 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { } } try self.writeAtoms(); + + if (self.bss_section_index) |idx| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[idx]; + self.bss_file_offset = sect.offset; + sect.offset = 0; + } + if (self.tlv_bss_section_index) |idx| { + const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = &seg.sections.items[idx]; + self.tlv_bss_file_offset = sect.offset; + sect.offset = 0; + } + try self.flushModule(comp); } @@ -1687,8 +1712,7 @@ pub fn writeAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !void { const seg = self.load_commands.items[match.seg].Segment; const sect = seg.sections.items[match.sect]; const sym = self.locals.items[atom.local_sym_index]; - const sect_offset = self.getPtrToSectionOffset(match).*; - const file_offset = sect_offset + sym.n_value - sect.addr; + const file_offset = sect.offset + sym.n_value - sect.addr; try atom.resolveRelocs(self); log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ self.getString(sym.n_strx), file_offset }); try self.base.file.?.pwriteAll(atom.code.items, file_offset); @@ -3469,13 +3493,9 @@ pub fn populateMissingMetadata(self: *MachO) !void { .flags = macho.S_THREAD_LOCAL_ZEROFILL, }, ); - - // We keep offset to the section in a separate variable as the actual section is usually pointing at the - // beginning of the file. - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const out_sect = &seg.sections.items[self.tlv_bss_section_index.?]; - self.tlv_bss_file_offset = out_sect.offset; - out_sect.offset = 0; + const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = seg.sections.items[self.tlv_bss_section_index.?]; + self.tlv_bss_file_offset = sect.offset; } if (self.bss_section_index == null) { @@ -3490,13 +3510,9 @@ pub fn populateMissingMetadata(self: *MachO) !void { .flags = macho.S_ZEROFILL, }, ); - - // We keep offset to the section in a separate variable as the actual section is usually pointing at the - // beginning of the file. - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - const out_sect = &seg.sections.items[self.bss_section_index.?]; - self.bss_file_offset = out_sect.offset; - out_sect.offset = 0; + const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const sect = seg.sections.items[self.bss_section_index.?]; + self.bss_file_offset = sect.offset; } if (self.linkedit_segment_cmd_index == null) { @@ -3767,8 +3783,7 @@ fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { const sect = &seg.sections.items[match.sect]; const alignment = try math.powi(u32, 2, sect.@"align"); - const sect_offset = self.getPtrToSectionOffset(match); - const max_size = self.allocatedSize(match.seg, sect_offset.*); + const max_size = self.allocatedSize(match.seg, sect.offset); const ideal_size = padToIdeal(new_size); const needed_size = mem.alignForwardGeneric(u32, ideal_size, alignment); @@ -3830,24 +3845,22 @@ fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { }); for (next_seg.sections.items) |*moved_sect, moved_sect_id| { - const moved_match = MatchingSection{ - .seg = @intCast(u16, next), - .sect = @intCast(u16, moved_sect_id), - }; - const ptr_sect_offset = self.getPtrToSectionOffset(moved_match); - ptr_sect_offset.* += @intCast(u32, seg_offset_amt); + moved_sect.offset += @intCast(u32, seg_offset_amt); moved_sect.addr += seg_offset_amt; log.warn(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ commands.segmentName(moved_sect.*), commands.sectionName(moved_sect.*), - ptr_sect_offset.*, - ptr_sect_offset.* + moved_sect.size, + moved_sect.offset, + moved_sect.offset + moved_sect.size, moved_sect.addr, moved_sect.addr + moved_sect.size, }); - try self.allocateLocalSymbols(moved_match, @intCast(i64, seg_offset_amt)); + try self.allocateLocalSymbols(.{ + .seg = @intCast(u16, next), + .sect = @intCast(u16, moved_sect_id), + }, @intCast(i64, seg_offset_amt)); } } } @@ -3867,25 +3880,23 @@ fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { var next = match.sect + 1; while (next < seg.sections.items.len) : (next += 1) { - const moved_match = MatchingSection{ - .seg = match.seg, - .sect = next, - }; const moved_sect = &seg.sections.items[next]; - const ptr_sect_offset = self.getPtrToSectionOffset(moved_match); - ptr_sect_offset.* += @intCast(u32, offset_amt); + moved_sect.offset += @intCast(u32, offset_amt); moved_sect.addr += offset_amt; log.warn(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ commands.segmentName(moved_sect.*), commands.sectionName(moved_sect.*), - ptr_sect_offset.*, - ptr_sect_offset.* + moved_sect.size, + moved_sect.offset, + moved_sect.offset + moved_sect.size, moved_sect.addr, moved_sect.addr + moved_sect.size, }); - try self.allocateLocalSymbols(moved_match, @intCast(i64, offset_amt)); + try self.allocateLocalSymbols(.{ + .seg = match.seg, + .sect = next, + }, @intCast(i64, offset_amt)); } } @@ -3916,25 +3927,6 @@ fn getSectionMaxAlignment(self: *MachO, segment_id: u16, start_sect_id: u16) !u3 return max_alignment; } -fn getPtrToSectionOffset(self: *MachO, match: MatchingSection) *u32 { - if (self.data_segment_cmd_index.? == match.seg) { - if (self.bss_section_index) |idx| { - if (idx == match.sect) { - return &self.bss_file_offset.?; - } - } - if (self.tlv_bss_section_index) |idx| { - if (idx == match.sect) { - return &self.tlv_bss_file_offset.?; - } - } - } - - const seg = &self.load_commands.items[match.seg].Segment; - const sect = &seg.sections.items[match.sect]; - return §.offset; -} - fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, alignment: u64) !u64 { const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; const text_section = &text_segment.sections.items[self.text_section_index.?]; From 6836cc473c3e75a71c0e6c0123c8afb23a79596d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 6 Sep 2021 18:30:40 +0200 Subject: [PATCH 58/78] macho: make sure that parsed bss atoms are zero-filled --- src/link/MachO.zig | 12 ++++++------ src/link/MachO/Object.zig | 25 ++++++++++++++++++++++--- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index ec5ce6aedd..8f89133378 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1672,7 +1672,7 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 break :blk new_start_vaddr; } else sect.addr; - log.warn("allocating atom for symbol {s} at address 0x{x}", .{ self.getString(sym.n_strx), vaddr }); + log.debug("allocating atom for symbol {s} at address 0x{x}", .{ self.getString(sym.n_strx), vaddr }); const expand_section = atom_placement == null or atom_placement.?.next == null; if (expand_section) { @@ -3803,13 +3803,13 @@ fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { if (last_sect_off + offset_amt > seg_off) { // Need to grow segment first. - log.warn(" (need to grow segment first)", .{}); + log.debug(" (need to grow segment first)", .{}); const spill_size = (last_sect_off + offset_amt) - seg_off; const seg_offset_amt = mem.alignForwardGeneric(u64, spill_size, self.page_size); seg.inner.filesize += seg_offset_amt; seg.inner.vmsize += seg_offset_amt; - log.warn(" (new {s} segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + log.debug(" (new {s} segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ seg.inner.segname, seg.inner.fileoff, seg.inner.fileoff + seg.inner.filesize, @@ -3836,7 +3836,7 @@ fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { next_seg.inner.fileoff += seg_offset_amt; next_seg.inner.vmaddr += seg_offset_amt; - log.warn(" (new {s} segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + log.debug(" (new {s} segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ next_seg.inner.segname, next_seg.inner.fileoff, next_seg.inner.fileoff + next_seg.inner.filesize, @@ -3848,7 +3848,7 @@ fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { moved_sect.offset += @intCast(u32, seg_offset_amt); moved_sect.addr += seg_offset_amt; - log.warn(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + log.debug(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ commands.segmentName(moved_sect.*), commands.sectionName(moved_sect.*), moved_sect.offset, @@ -3884,7 +3884,7 @@ fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { moved_sect.offset += @intCast(u32, offset_amt); moved_sect.addr += offset_amt; - log.warn(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ + log.debug(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ commands.segmentName(moved_sect.*), commands.sectionName(moved_sect.*), moved_sect.offset, diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index a3f0c56065..c72aa66b84 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -408,7 +408,14 @@ const TextBlockParser = struct { const block = try context.macho_file.createEmptyAtom(senior_nlist.index, size, actual_align); block.stab = stab; - mem.copy(u8, block.code.items, code); + + const is_zerofill = blk: { + const section_type = commands.sectionType(self.section); + break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; + }; + if (!is_zerofill) { + mem.copy(u8, block.code.items, code); + } try block.aliases.ensureTotalCapacity(context.allocator, aliases.items.len); for (aliases.items) |alias| { @@ -567,7 +574,13 @@ pub fn parseTextBlocks( const block_size = block_code.len; const block = try macho_file.createEmptyAtom(block_local_sym_index, block_size, sect.@"align"); - mem.copy(u8, block.code.items, block_code); + const is_zerofill = blk: { + const section_type = commands.sectionType(sect); + break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; + }; + if (!is_zerofill) { + mem.copy(u8, block.code.items, block_code); + } try block.parseRelocs(relocs, .{ .base_addr = 0, @@ -667,7 +680,13 @@ pub fn parseTextBlocks( }; const block = try macho_file.createEmptyAtom(block_local_sym_index, sect.size, sect.@"align"); - mem.copy(u8, block.code.items, code); + const is_zerofill = blk: { + const section_type = commands.sectionType(sect); + break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; + }; + if (!is_zerofill) { + mem.copy(u8, block.code.items, code); + } try block.parseRelocs(relocs, .{ .base_addr = 0, From c35f668932df4d339b069acf0dbfccfe2faa56b6 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 7 Sep 2021 10:06:01 +0200 Subject: [PATCH 59/78] macho: update max section alignment when inserting atoms --- src/link/MachO.zig | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 8f89133378..0f4270c344 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1678,7 +1678,11 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 if (expand_section) { const needed_size = @intCast(u32, (vaddr + atom.size) - sect.addr); try self.growSection(match, needed_size); + sect.size = needed_size; + self.load_commands_dirty = true; } + sect.@"align" = math.max(sect.@"align", atom.alignment); + const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); sym.n_value = vaddr; sym.n_sect = n_sect; @@ -3899,15 +3903,13 @@ fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { }, @intCast(i64, offset_amt)); } } - - sect.size = new_size; - self.load_commands_dirty = true; } fn allocatedSize(self: MachO, segment_id: u16, start: u64) u64 { const seg = self.load_commands.items[segment_id].Segment; assert(start >= seg.inner.fileoff); var min_pos: u64 = seg.inner.fileoff + seg.inner.filesize; + if (start > min_pos) return 0; for (seg.sections.items) |section| { if (section.offset <= start) continue; if (section.offset < min_pos) min_pos = section.offset; @@ -4003,7 +4005,11 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, const needed_size = @intCast(u32, (vaddr + new_block_size) - text_section.addr); try self.growSection(match, needed_size); _ = try self.blocks.put(self.base.allocator, match, text_block); + text_section.size = needed_size; + self.load_commands_dirty = true; } + const align_pow = @intCast(u32, math.log2(alignment)); + text_section.@"align" = math.max(text_section.@"align", align_pow); text_block.size = new_block_size; if (text_block.prev) |prev| { From a2279e7e209b5dab096af186fc692bd2acf5f4c8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 7 Sep 2021 12:23:26 +0200 Subject: [PATCH 60/78] macho: fix commands.zig tests --- src/link/MachO/commands.zig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/link/MachO/commands.zig b/src/link/MachO/commands.zig index 77d2b942a8..d9ca056c8e 100644 --- a/src/link/MachO/commands.zig +++ b/src/link/MachO/commands.zig @@ -436,7 +436,7 @@ test "read-write segment command" { 0x00, 0x00, 0x00, 0x00, // reserved3 }; var cmd = SegmentCommand{ - .inner = macho.segment_command_64.new(.{ + .inner = .{ .cmdsize = 152, .segname = makeStaticString("__TEXT"), .vmaddr = 4294967296, @@ -445,9 +445,9 @@ test "read-write segment command" { .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE, .initprot = macho.VM_PROT_EXECUTE | macho.VM_PROT_READ, .nsects = 1, - }), + }, }; - try cmd.sections.append(gpa, macho.section_64.new(.{ + try cmd.sections.append(gpa, .{ .sectname = makeStaticString("__text"), .segname = makeStaticString("__TEXT"), .addr = 4294983680, @@ -455,7 +455,7 @@ test "read-write segment command" { .offset = 16384, .@"align" = 2, .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - })); + }); defer cmd.deinit(gpa); try testRead(gpa, in_buffer, LoadCommand{ .Segment = cmd }); From 159e55dfd9b03a11f5eb1c79ec5d56cdfd6f7707 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 7 Sep 2021 20:18:48 +0200 Subject: [PATCH 61/78] macho: fix alignment of atoms which begin the section --- src/link/MachO.zig | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 0f4270c344..b60c29ecd0 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1660,17 +1660,20 @@ pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 const needs_padding = match.seg == self.text_segment_cmd_index.? and match.sect == self.text_section_index.?; var atom_placement: ?*TextBlock = null; + const atom_alignment = try math.powi(u32, 2, atom.alignment); // TODO converge with `allocateTextBlock` and handle free list var vaddr = if (self.blocks.get(match)) |last| blk: { const last_atom_sym = self.locals.items[last.local_sym_index]; const ideal_capacity = if (needs_padding) padToIdeal(last.size) else last.size; const ideal_capacity_end_vaddr = last_atom_sym.n_value + ideal_capacity; - const last_atom_alignment = try math.powi(u32, 2, atom.alignment); - const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, last_atom_alignment); + const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, atom_alignment); atom_placement = last; break :blk new_start_vaddr; - } else sect.addr; + } else mem.alignForwardGeneric(u64, sect.addr, atom_alignment); + + // TODO what if the section which was preallocated is not aligned to the maximum (section) alignment? + // Should we move the section? log.debug("allocating atom for symbol {s} at address 0x{x}", .{ self.getString(sym.n_strx), vaddr }); From e229202cb87a895401f2813f0f12227a0d715c09 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 7 Sep 2021 23:21:08 +0200 Subject: [PATCH 62/78] macho: store source section address of relocs in context This is particularly relevant for x86_64 and C++ when relocating StaticInit sections containing static initializers machine code. Then, in case of SIGNED_X relocations, it is necessary to have the full image of the VM address layout of the sections in the object file as this is how the addend needs to be adjusted for non-extern relocations. --- src/link/MachO/Object.zig | 9 ++++++--- src/link/MachO/TextBlock.zig | 23 ++++++++++++----------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index c72aa66b84..d94785b377 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -425,7 +425,8 @@ const TextBlockParser = struct { } try block.parseRelocs(self.relocs, .{ - .base_addr = start_addr, + .base_addr = self.section.addr, + .base_offset = start_addr, .allocator = context.allocator, .object = context.object, .macho_file = context.macho_file, @@ -583,7 +584,8 @@ pub fn parseTextBlocks( } try block.parseRelocs(relocs, .{ - .base_addr = 0, + .base_addr = sect.addr, + .base_offset = 0, .allocator = allocator, .object = self, .macho_file = macho_file, @@ -689,7 +691,8 @@ pub fn parseTextBlocks( } try block.parseRelocs(relocs, .{ - .base_addr = 0, + .base_addr = sect.addr, + .base_offset = 0, .allocator = allocator, .object = self, .macho_file = macho_file, diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index 3517e532a0..dca319fedd 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -479,13 +479,13 @@ pub const Relocation = struct { pub const Signed = struct { addend: i64, - correction: i4, + correction: u3, pub fn resolve(self: Signed, args: ResolveArgs) !void { const target_addr = @intCast(i64, args.target_addr) + self.addend; const displacement = try math.cast( i32, - target_addr - @intCast(i64, args.source_addr) - self.correction - 4, + target_addr - @intCast(i64, args.source_addr + self.correction + 4), ); mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); } @@ -613,6 +613,7 @@ pub fn freeListEligible(self: TextBlock, macho_file: MachO) bool { const RelocContext = struct { base_addr: u64 = 0, + base_offset: u64 = 0, allocator: *Allocator, object: *Object, macho_file: *MachO, @@ -620,7 +621,7 @@ const RelocContext = struct { fn initRelocFromObject(rel: macho.relocation_info, context: RelocContext) !Relocation { var parsed_rel = Relocation{ - .offset = @intCast(u32, @intCast(u64, rel.r_address) - context.base_addr), + .offset = @intCast(u32, @intCast(u64, rel.r_address) - context.base_offset), .where = undefined, .where_index = undefined, .payload = undefined, @@ -684,7 +685,7 @@ fn initRelocFromObject(rel: macho.relocation_info, context: RelocContext) !Reloc } pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: RelocContext) !void { - const filtered_relocs = filterRelocs(relocs, context.base_addr, context.base_addr + self.size); + const filtered_relocs = filterRelocs(relocs, context.base_offset, context.base_offset + self.size); var it = RelocIterator{ .buffer = filtered_relocs, }; @@ -956,9 +957,9 @@ fn parseUnsigned( mem.readIntLittle(i32, self.code.items[out.offset..][0..4]); if (rel.r_extern == 0) { - const source_seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment; - const source_sect_base_addr = source_seg.sections.items[rel.r_symbolnum - 1].addr; - addend -= @intCast(i64, source_sect_base_addr); + const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment; + const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr; + addend -= @intCast(i64, target_sect_base_addr); } out.payload = .{ @@ -1043,7 +1044,7 @@ fn parseSigned(self: TextBlock, rel: macho.relocation_info, out: *Relocation, co assert(rel.r_length == 2); const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - const correction: i4 = switch (rel_type) { + const correction: u3 = switch (rel_type) { .X86_64_RELOC_SIGNED => 0, .X86_64_RELOC_SIGNED_1 => 1, .X86_64_RELOC_SIGNED_2 => 2, @@ -1053,9 +1054,9 @@ fn parseSigned(self: TextBlock, rel: macho.relocation_info, out: *Relocation, co var addend: i64 = mem.readIntLittle(i32, self.code.items[out.offset..][0..4]) + correction; if (rel.r_extern == 0) { - const source_seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment; - const source_sect_base_addr = source_seg.sections.items[rel.r_symbolnum - 1].addr; - addend = @intCast(i64, out.offset) + addend - @intCast(i64, source_sect_base_addr) + 4 + correction; + const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment; + const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr; + addend += @intCast(i64, context.base_addr + out.offset + correction + 4) - @intCast(i64, target_sect_base_addr); } out.payload = .{ From e00b9d6192a09305c4160ccbeeb761a1f1af855a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 8 Sep 2021 13:17:43 +0200 Subject: [PATCH 63/78] macho: use smaller padding until we have branch islands on arm64 Without branch islands, it is impossible to link self-hosted using the common linker path. --- src/link/MachO.zig | 2 +- src/link/MachO/TextBlock.zig | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index b60c29ecd0..927334f880 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -262,7 +262,7 @@ pub const GotIndirectionKey = struct { /// When allocating, the ideal_capacity is calculated by /// actual_capacity + (actual_capacity / ideal_factor) -const ideal_factor = 2; +const ideal_factor = 4; /// Default path to dyld const default_dyld_path: [*:0]const u8 = "/usr/lib/dyld"; diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index dca319fedd..97bff8be31 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -243,10 +243,21 @@ pub const Relocation = struct { pub fn resolve(self: Branch, args: ResolveArgs) !void { switch (self.arch) { .aarch64 => { - const displacement = try math.cast( + const displacement = math.cast( i28, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr), - ); + ) catch |err| switch (err) { + error.Overflow => { + log.err("jump too big to encode as i28 displacement value", .{}); + log.err(" (target - source) = displacement => 0x{x} - 0x{x} = 0x{x}", .{ + args.target_addr, + args.source_addr, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr), + }); + log.err(" TODO implement branch islands to extend jump distance for arm64", .{}); + return error.TODOImplementBranchIslands; + }, + }; const code = args.block.code.items[args.offset..][0..4]; var inst = aarch64.Instruction{ .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( From 9fb44e8e1fa4f07e44e2d354e09db9d953861e71 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 9 Sep 2021 00:25:55 +0200 Subject: [PATCH 64/78] macho: precompute total required size when parsing objects This way, we can preallocate the necessary sizes for segments and sections upfront rather than doing it per parsed atom. --- src/link/MachO.zig | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 927334f880..808d96b767 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2589,6 +2589,47 @@ fn resolveDyldStubBinder(self: *MachO) !void { } fn parseTextBlocks(self: *MachO) !void { + var section_metadata = std.AutoHashMap(MatchingSection, struct { + size: u64, + alignment: u32, + }).init(self.base.allocator); + defer section_metadata.deinit(); + + for (self.objects.items) |object| { + const seg = object.load_commands.items[object.segment_cmd_index.?].Segment; + for (seg.sections.items) |sect| { + const match = (try self.getMatchingSection(sect)) orelse { + log.debug("unhandled section", .{}); + continue; + }; + const res = try section_metadata.getOrPut(match); + if (!res.found_existing) { + res.value_ptr.* = .{ + .size = 0, + .alignment = 0, + }; + } + const alignment = try math.powi(u32, 2, sect.@"align"); + res.value_ptr.size += mem.alignForwardGeneric(u64, sect.size, alignment); + res.value_ptr.alignment = math.max(res.value_ptr.alignment, sect.@"align"); + } + } + + var it = section_metadata.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + const metadata = entry.value_ptr.*; + const seg = self.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + log.debug("{s},{s} => size: 0x{x}, alignment: 0x{x}", .{ + commands.segmentName(sect), + commands.sectionName(sect), + metadata.size, + metadata.alignment, + }); + try self.growSection(match, @intCast(u32, metadata.size)); + } + for (self.objects.items) |*object, object_id| { try object.parseTextBlocks(self.base.allocator, @intCast(u16, object_id), self); } From 23be9cae346614d7d42d9c1c6426bfe2d6721c68 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 9 Sep 2021 01:13:07 +0200 Subject: [PATCH 65/78] macho: padToIdeal each parsed section size before storing This way, we should not need to grow at all when allocating atoms representing objects' sections as atoms. --- src/link/MachO.zig | 60 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 5 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 808d96b767..909f9b3a9f 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1769,11 +1769,16 @@ fn writeAtoms(self: *MachO) !void { const match = entry.key_ptr.*; var atom: *TextBlock = entry.value_ptr.*; - while (atom.prev) |prev| { - try self.writeAtom(atom, match); - atom = prev; + while (true) { + if (atom.dirty) { + try self.writeAtom(atom, match); + atom.dirty = false; + } + + if (atom.prev) |prev| { + atom = prev; + } else break; } - try self.writeAtom(atom, match); } } @@ -2609,8 +2614,9 @@ fn parseTextBlocks(self: *MachO) !void { .alignment = 0, }; } + const size = padToIdeal(sect.size); const alignment = try math.powi(u32, 2, sect.@"align"); - res.value_ptr.size += mem.alignForwardGeneric(u64, sect.size, alignment); + res.value_ptr.size += mem.alignForwardGeneric(u64, size, alignment); res.value_ptr.alignment = math.max(res.value_ptr.alignment, sect.@"align"); } } @@ -2633,6 +2639,49 @@ fn parseTextBlocks(self: *MachO) !void { for (self.objects.items) |*object, object_id| { try object.parseTextBlocks(self.base.allocator, @intCast(u16, object_id), self); } + + // it = section_metadata.iterator(); + // while (it.next()) |entry| { + // const match = entry.key_ptr.*; + // const metadata = entry.value_ptr.*; + // const seg = self.load_commands.items[match.seg].Segment; + // const sect = seg.sections.items[match.sect]; + + // var buffer = try self.base.allocator.alloc(u8, metadata.size); + // defer self.base.allocator.free(buffer); + // log.warn("{s},{s} buffer size 0x{x}", .{ + // commands.segmentName(sect), + // commands.sectionName(sect), + // metadata.size, + // }); + + // var atom = self.blocks.get(match).?; + + // while (atom.prev) |prev| { + // atom = prev; + // } + + // const base = blk: { + // const sym = self.locals.items[atom.local_sym_index]; + // break :blk sym.n_value; + // }; + + // while (true) { + // const sym = self.locals.items[atom.local_sym_index]; + // const offset = sym.n_value - base; + // try atom.resolveRelocs(self); + // log.warn("writing atom for symbol {s} at buffer offset 0x{x}", .{ + // self.getString(sym.n_strx), + // offset, + // }); + // mem.copy(u8, buffer[offset..][0..atom.code.items.len], atom.code.items); + // atom.dirty = false; + + // if (atom.next) |next| { + // atom = next; + // } else break; + // } + // } } fn addDataInCodeLC(self: *MachO) !void { @@ -3836,6 +3885,7 @@ fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { const needed_size = mem.alignForwardGeneric(u32, ideal_size, alignment); if (needed_size > max_size) blk: { + log.debug(" (need to grow!)", .{}); // Need to move all sections below in file and address spaces. const offset_amt = offset: { const max_alignment = try self.getSectionMaxAlignment(match.seg, match.sect + 1); From 1efdb137d14058f3c428a001838b963de16694ea Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 9 Sep 2021 14:18:28 +0200 Subject: [PATCH 66/78] macho: don't allocate atoms when parsing objects --- src/link/MachO.zig | 170 +++++++++++++++++++++-------------- src/link/MachO/Object.zig | 36 +++++++- src/link/MachO/TextBlock.zig | 54 +++++++++-- 3 files changed, 182 insertions(+), 78 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 909f9b3a9f..4a8df9ae6c 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2594,30 +2594,64 @@ fn resolveDyldStubBinder(self: *MachO) !void { } fn parseTextBlocks(self: *MachO) !void { + var parsed_atoms = Object.ParsedAtoms.init(self.base.allocator); + defer parsed_atoms.deinit(); + + var first_atoms = Object.ParsedAtoms.init(self.base.allocator); + defer first_atoms.deinit(); + var section_metadata = std.AutoHashMap(MatchingSection, struct { size: u64, alignment: u32, }).init(self.base.allocator); defer section_metadata.deinit(); - for (self.objects.items) |object| { - const seg = object.load_commands.items[object.segment_cmd_index.?].Segment; - for (seg.sections.items) |sect| { - const match = (try self.getMatchingSection(sect)) orelse { - log.debug("unhandled section", .{}); - continue; - }; - const res = try section_metadata.getOrPut(match); - if (!res.found_existing) { - res.value_ptr.* = .{ + for (self.objects.items) |*object, object_id| { + var atoms_in_objects = try object.parseTextBlocks(self.base.allocator, @intCast(u16, object_id), self); + defer atoms_in_objects.deinit(); + + var it = atoms_in_objects.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + const last_atom = entry.value_ptr.*; + var atom = last_atom; + + const metadata = try section_metadata.getOrPut(match); + if (!metadata.found_existing) { + metadata.value_ptr.* = .{ .size = 0, .alignment = 0, }; } - const size = padToIdeal(sect.size); - const alignment = try math.powi(u32, 2, sect.@"align"); - res.value_ptr.size += mem.alignForwardGeneric(u64, size, alignment); - res.value_ptr.alignment = math.max(res.value_ptr.alignment, sect.@"align"); + + while (true) { + const alignment = try math.powi(u32, 2, atom.alignment); + metadata.value_ptr.size += mem.alignForwardGeneric(u64, atom.size, alignment); + metadata.value_ptr.alignment = math.max(metadata.value_ptr.alignment, atom.alignment); + + const sym = self.locals.items[atom.local_sym_index]; + log.debug(" {s}: n_value=0x{x}, size=0x{x}, alignment=0x{x}", .{ + self.getString(sym.n_strx), + sym.n_value, + atom.size, + atom.alignment, + }); + + if (atom.prev) |prev| { + atom = prev; + } else break; + } + + if (parsed_atoms.getPtr(match)) |last| { + last.*.next = atom; + atom.prev = last.*; + last.* = atom; + } + _ = try parsed_atoms.put(match, last_atom); + + if (!first_atoms.contains(match)) { + try first_atoms.putNoClobber(match, atom); + } } } @@ -2625,63 +2659,69 @@ fn parseTextBlocks(self: *MachO) !void { while (it.next()) |entry| { const match = entry.key_ptr.*; const metadata = entry.value_ptr.*; - const seg = self.load_commands.items[match.seg].Segment; - const sect = seg.sections.items[match.sect]; + const seg = &self.load_commands.items[match.seg].Segment; + const sect = &seg.sections.items[match.sect]; log.debug("{s},{s} => size: 0x{x}, alignment: 0x{x}", .{ - commands.segmentName(sect), - commands.sectionName(sect), + commands.segmentName(sect.*), + commands.sectionName(sect.*), metadata.size, metadata.alignment, }); + sect.@"align" = math.max(sect.@"align", metadata.alignment); try self.growSection(match, @intCast(u32, metadata.size)); + + var base_vaddr = if (self.blocks.get(match)) |last| blk: { + const last_atom_sym = self.locals.items[last.local_sym_index]; + break :blk last_atom_sym.n_value + last.size; + } else sect.addr; + const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + + var atom = first_atoms.get(match).?; + while (true) { + const alignment = try math.powi(u32, 2, atom.alignment); + base_vaddr = mem.alignForwardGeneric(u64, base_vaddr, alignment); + + const sym = &self.locals.items[atom.local_sym_index]; + sym.n_value = base_vaddr; + sym.n_sect = n_sect; + + log.debug(" {s}: start=0x{x}, end=0x{x}, size=0x{x}, alignment=0x{x}", .{ + self.getString(sym.n_strx), + base_vaddr, + base_vaddr + atom.size, + atom.size, + atom.alignment, + }); + + // Update each alias (if any) + for (atom.aliases.items) |index| { + const alias_sym = &self.locals.items[index]; + alias_sym.n_value = base_vaddr; + alias_sym.n_sect = n_sect; + } + + // Update each symbol contained within the TextBlock + for (atom.contained.items) |sym_at_off| { + const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; + contained_sym.n_value = base_vaddr + sym_at_off.offset; + contained_sym.n_sect = n_sect; + } + + base_vaddr += atom.size; + + if (atom.next) |next| { + atom = next; + } else break; + } + + if (self.blocks.getPtr(match)) |last| { + const first_atom = first_atoms.get(match).?; + last.*.next = first_atom; + first_atom.prev = last.*; + last.* = first_atom; + } + _ = try self.blocks.put(self.base.allocator, match, parsed_atoms.get(match).?); } - - for (self.objects.items) |*object, object_id| { - try object.parseTextBlocks(self.base.allocator, @intCast(u16, object_id), self); - } - - // it = section_metadata.iterator(); - // while (it.next()) |entry| { - // const match = entry.key_ptr.*; - // const metadata = entry.value_ptr.*; - // const seg = self.load_commands.items[match.seg].Segment; - // const sect = seg.sections.items[match.sect]; - - // var buffer = try self.base.allocator.alloc(u8, metadata.size); - // defer self.base.allocator.free(buffer); - // log.warn("{s},{s} buffer size 0x{x}", .{ - // commands.segmentName(sect), - // commands.sectionName(sect), - // metadata.size, - // }); - - // var atom = self.blocks.get(match).?; - - // while (atom.prev) |prev| { - // atom = prev; - // } - - // const base = blk: { - // const sym = self.locals.items[atom.local_sym_index]; - // break :blk sym.n_value; - // }; - - // while (true) { - // const sym = self.locals.items[atom.local_sym_index]; - // const offset = sym.n_value - base; - // try atom.resolveRelocs(self); - // log.warn("writing atom for symbol {s} at buffer offset 0x{x}", .{ - // self.getString(sym.n_strx), - // offset, - // }); - // mem.copy(u8, buffer[offset..][0..atom.code.items.len], atom.code.items); - // atom.dirty = false; - - // if (atom.next) |next| { - // atom = next; - // } else break; - // } - // } } fn addDataInCodeLC(self: *MachO) !void { diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index d94785b377..3bfd6c9f1a 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -317,6 +317,7 @@ const Context = struct { object: *Object, macho_file: *MachO, match: MachO.MatchingSection, + parsed_atoms: *ParsedAtoms, }; const TextBlockParser = struct { @@ -430,6 +431,7 @@ const TextBlockParser = struct { .allocator = context.allocator, .object = context.object, .macho_file = context.macho_file, + .parsed_atoms = context.parsed_atoms, }); if (context.macho_file.has_dices) { @@ -455,12 +457,15 @@ const TextBlockParser = struct { } }; +pub const ParsedAtoms = std.AutoHashMap(MachO.MatchingSection, *TextBlock); + pub fn parseTextBlocks( self: *Object, allocator: *Allocator, object_id: u16, macho_file: *MachO, -) !void { +) !ParsedAtoms { + var parsed_atoms = ParsedAtoms.init(allocator); const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; log.debug("analysing {s}", .{self.name}); @@ -589,6 +594,7 @@ pub fn parseTextBlocks( .allocator = allocator, .object = self, .macho_file = macho_file, + .parsed_atoms = &parsed_atoms, }); if (macho_file.has_dices) { @@ -604,7 +610,13 @@ pub fn parseTextBlocks( } } - _ = try macho_file.allocateAtom(block, match); + if (parsed_atoms.getPtr(match)) |last| { + last.*.next = block; + block.prev = last.*; + last.* = block; + } else { + try parsed_atoms.putNoClobber(match, block); + } try self.text_blocks.append(allocator, block); } @@ -620,6 +632,7 @@ pub fn parseTextBlocks( .object = self, .macho_file = macho_file, .match = match, + .parsed_atoms = &parsed_atoms, })) |block| { const sym = macho_file.locals.items[block.local_sym_index]; const is_ext = blk: { @@ -651,7 +664,13 @@ pub fn parseTextBlocks( } } - _ = try macho_file.allocateAtom(block, match); + if (parsed_atoms.getPtr(match)) |last| { + last.*.next = block; + block.prev = last.*; + last.* = block; + } else { + try parsed_atoms.putNoClobber(match, block); + } try self.text_blocks.append(allocator, block); } @@ -696,6 +715,7 @@ pub fn parseTextBlocks( .allocator = allocator, .object = self, .macho_file = macho_file, + .parsed_atoms = &parsed_atoms, }); if (macho_file.has_dices) { @@ -747,10 +767,18 @@ pub fn parseTextBlocks( }); } - _ = try macho_file.allocateAtom(block, match); + if (parsed_atoms.getPtr(match)) |last| { + last.*.next = block; + block.prev = last.*; + last.* = block; + } else { + try parsed_atoms.putNoClobber(match, block); + } try self.text_blocks.append(allocator, block); } } + + return parsed_atoms; } fn parseSymtab(self: *Object, allocator: *Allocator) !void { diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index 97bff8be31..57e93543b1 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -628,6 +628,7 @@ const RelocContext = struct { allocator: *Allocator, object: *Object, macho_file: *MachO, + parsed_atoms: *Object.ParsedAtoms, }; fn initRelocFromObject(rel: macho.relocation_info, context: RelocContext) !Relocation { @@ -855,7 +856,14 @@ pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: R .seg = context.macho_file.data_const_segment_cmd_index.?, .sect = context.macho_file.got_section_index.?, }; - _ = try context.macho_file.allocateAtom(atom, match); + + if (context.parsed_atoms.getPtr(match)) |last| { + last.*.next = atom; + atom.prev = last.*; + last.* = atom; + } else { + try context.parsed_atoms.putNoClobber(match, atom); + } } else if (parsed_rel.payload == .unsigned) { switch (parsed_rel.where) { .undef => { @@ -918,18 +926,46 @@ pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: R ); const stub_atom = try context.macho_file.createStubAtom(laptr_atom.local_sym_index); try context.macho_file.stubs_map.putNoClobber(context.allocator, parsed_rel.where_index, stub_atom); - _ = try context.macho_file.allocateAtom(stub_helper_atom, .{ + // TODO clean this up! + if (context.parsed_atoms.getPtr(.{ .seg = context.macho_file.text_segment_cmd_index.?, .sect = context.macho_file.stub_helper_section_index.?, - }); - _ = try context.macho_file.allocateAtom(laptr_atom, .{ - .seg = context.macho_file.data_segment_cmd_index.?, - .sect = context.macho_file.la_symbol_ptr_section_index.?, - }); - _ = try context.macho_file.allocateAtom(stub_atom, .{ + })) |last| { + last.*.next = stub_helper_atom; + stub_helper_atom.prev = last.*; + last.* = stub_helper_atom; + } else { + try context.parsed_atoms.putNoClobber(.{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stub_helper_section_index.?, + }, stub_helper_atom); + } + if (context.parsed_atoms.getPtr(.{ .seg = context.macho_file.text_segment_cmd_index.?, .sect = context.macho_file.stubs_section_index.?, - }); + })) |last| { + last.*.next = stub_atom; + stub_atom.prev = last.*; + last.* = stub_atom; + } else { + try context.parsed_atoms.putNoClobber(.{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stubs_section_index.?, + }, stub_atom); + } + if (context.parsed_atoms.getPtr(.{ + .seg = context.macho_file.data_segment_cmd_index.?, + .sect = context.macho_file.la_symbol_ptr_section_index.?, + })) |last| { + last.*.next = laptr_atom; + laptr_atom.prev = last.*; + last.* = laptr_atom; + } else { + try context.parsed_atoms.putNoClobber(.{ + .seg = context.macho_file.data_segment_cmd_index.?, + .sect = context.macho_file.la_symbol_ptr_section_index.?, + }, laptr_atom); + } } } } From 56fdada577d5d7f871bed8e5ae74e395291d4140 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 9 Sep 2021 16:55:56 +0200 Subject: [PATCH 67/78] macho: properly adjust section sizes --- src/link/MachO.zig | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 4a8df9ae6c..2ccedd70ea 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2667,8 +2667,16 @@ fn parseTextBlocks(self: *MachO) !void { metadata.size, metadata.alignment, }); + + const sect_size = if (self.blocks.get(match)) |last| blk: { + const last_atom_sym = self.locals.items[last.local_sym_index]; + break :blk last_atom_sym.n_value + last.size - sect.addr; + } else 0; + sect.@"align" = math.max(sect.@"align", metadata.alignment); - try self.growSection(match, @intCast(u32, metadata.size)); + const needed_size = @intCast(u32, metadata.size + sect_size); + try self.growSection(match, needed_size); + sect.size = needed_size; var base_vaddr = if (self.blocks.get(match)) |last| blk: { const last_atom_sym = self.locals.items[last.local_sym_index]; From aaacfc0d0a23918c6712272e10bb1cdca1daaf04 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 9 Sep 2021 18:32:03 +0200 Subject: [PATCH 68/78] macho: init process of renaming TextBlock to Atom Initially, internally within the linker. --- CMakeLists.txt | 2 +- src/link/MachO.zig | 4 +- src/link/MachO/{TextBlock.zig => Atom.zig} | 68 ++++++++++++---------- src/link/MachO/Object.zig | 4 +- 4 files changed, 43 insertions(+), 35 deletions(-) rename src/link/MachO/{TextBlock.zig => Atom.zig} (96%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0e4d5cfb43..b6aa3cfaea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -577,11 +577,11 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/Plan9/aout.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Archive.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/Atom.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/CodeSignature.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/DebugSymbols.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Dylib.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig" - "${CMAKE_SOURCE_DIR}/src/link/MachO/TextBlock.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/commands.zig" diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 2ccedd70ea..2705d47a85 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -24,6 +24,7 @@ const trace = @import("../tracy.zig").trace; const Air = @import("../Air.zig"); const Allocator = mem.Allocator; const Archive = @import("MachO/Archive.zig"); +const Atom = @import("MachO/Atom.zig"); const Cache = @import("../Cache.zig"); const CodeSignature = @import("MachO/CodeSignature.zig"); const Compilation = @import("../Compilation.zig"); @@ -37,9 +38,10 @@ const LlvmObject = @import("../codegen/llvm.zig").Object; const LoadCommand = commands.LoadCommand; const Module = @import("../Module.zig"); const SegmentCommand = commands.SegmentCommand; -pub const TextBlock = @import("MachO/TextBlock.zig"); const Trie = @import("MachO/Trie.zig"); +pub const TextBlock = Atom; + pub const base_tag: File.Tag = File.Tag.macho; base: File, diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/Atom.zig similarity index 96% rename from src/link/MachO/TextBlock.zig rename to src/link/MachO/Atom.zig index 57e93543b1..41e34bc6f7 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/Atom.zig @@ -1,4 +1,4 @@ -const TextBlock = @This(); +const Atom = @This(); const std = @import("std"); const build_options = @import("build_options"); @@ -24,28 +24,32 @@ const Object = @import("Object.zig"); /// offset table entry. local_sym_index: u32, -/// List of symbol aliases pointing to the same block via different nlists +/// List of symbol aliases pointing to the same atom via different nlists aliases: std.ArrayListUnmanaged(u32) = .{}, -/// List of symbols contained within this block +/// List of symbols contained within this atom contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, -/// Code (may be non-relocated) this block represents +/// Code (may be non-relocated) this atom represents code: std.ArrayListUnmanaged(u8) = .{}, -/// Size and alignment of this text block +/// Size and alignment of this atom /// Unlike in Elf, we need to store the size of this symbol as part of -/// the TextBlock since macho.nlist_64 lacks this information. +/// the atom since macho.nlist_64 lacks this information. size: u64, + +/// Alignment of this atom as a power of 2. +/// For instance, aligmment of 0 should be read as 2^0 = 1 byte aligned. alignment: u32, +/// List of relocations belonging to this atom. relocs: std.ArrayListUnmanaged(Relocation) = .{}, -/// List of offsets contained within this block that need rebasing by the dynamic -/// loader in presence of ASLR +/// List of offsets contained within this atom that need rebasing by the dynamic +/// loader in presence of ASLR. rebases: std.ArrayListUnmanaged(u64) = .{}, -/// List of offsets contained within this block that will be dynamically bound +/// List of offsets contained within this atom that will be dynamically bound /// by the dynamic loader and contain pointers to resolved (at load time) extern /// symbols (aka proxies aka imports) bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, @@ -56,20 +60,20 @@ lazy_bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, /// List of data-in-code entries. This is currently specific to x86_64 only. dices: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, -/// Stab entry for this block. This is currently specific to a binary created +/// Stab entry for this atom. This is currently specific to a binary created /// by linking object files in a traditional sense - in incremental sense, we /// bypass stabs altogether to produce dSYM bundle directly with fully relocated /// DWARF sections. stab: ?Stab = null, /// Points to the previous and next neighbours -next: ?*TextBlock, -prev: ?*TextBlock, +next: ?*Atom, +prev: ?*Atom, /// Previous/next linked list pointers. /// This is the linked list node for this Decl's corresponding .debug_info tag. -dbg_info_prev: ?*TextBlock, -dbg_info_next: ?*TextBlock, +dbg_info_prev: ?*Atom, +dbg_info_next: ?*Atom, /// Offset into .debug_info pointing to the tag for this Decl. dbg_info_off: u32, /// Size of the .debug_info tag for this Decl, not including padding. @@ -165,7 +169,7 @@ pub const Stab = union(enum) { }; pub const Relocation = struct { - /// Offset within the `block`s code buffer. + /// Offset within the atom's code buffer. /// Note relocation size can be inferred by relocation's kind. offset: u32, @@ -187,7 +191,7 @@ pub const Relocation = struct { }, const ResolveArgs = struct { - block: *TextBlock, + block: *Atom, offset: u32, source_addr: u64, target_addr: u64, @@ -572,7 +576,7 @@ pub const Relocation = struct { } }; -pub const empty = TextBlock{ +pub const empty = Atom{ .local_sym_index = 0, .size = 0, .alignment = 0, @@ -584,7 +588,7 @@ pub const empty = TextBlock{ .dbg_info_len = undefined, }; -pub fn deinit(self: *TextBlock, allocator: *Allocator) void { +pub fn deinit(self: *Atom, allocator: *Allocator) void { self.dices.deinit(allocator); self.lazy_bindings.deinit(allocator); self.bindings.deinit(allocator); @@ -598,20 +602,20 @@ pub fn deinit(self: *TextBlock, allocator: *Allocator) void { /// Returns how much room there is to grow in virtual address space. /// File offset relocation happens transparently, so it is not included in /// this calculation. -pub fn capacity(self: TextBlock, macho_file: MachO) u64 { +pub fn capacity(self: Atom, macho_file: MachO) u64 { const self_sym = macho_file.locals.items[self.local_sym_index]; if (self.next) |next| { const next_sym = macho_file.locals.items[next.local_sym_index]; return next_sym.n_value - self_sym.n_value; } else { - // We are the last block. + // We are the last atom. // The capacity is limited only by virtual address space. return std.math.maxInt(u64) - self_sym.n_value; } } -pub fn freeListEligible(self: TextBlock, macho_file: MachO) bool { - // No need to keep a free list node for the last block. +pub fn freeListEligible(self: Atom, macho_file: MachO) bool { + // No need to keep a free list node for the last atom. const next = self.next orelse return false; const self_sym = macho_file.locals.items[self.local_sym_index]; const next_sym = macho_file.locals.items[next.local_sym_index]; @@ -696,7 +700,7 @@ fn initRelocFromObject(rel: macho.relocation_info, context: RelocContext) !Reloc return parsed_rel; } -pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: RelocContext) !void { +pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocContext) !void { const filtered_relocs = filterRelocs(relocs, context.base_offset, context.base_offset + self.size); var it = RelocIterator{ .buffer = filtered_relocs, @@ -984,7 +988,7 @@ fn isSubtractor(rel: macho.relocation_info, arch: Arch) bool { } fn parseUnsigned( - self: TextBlock, + self: Atom, rel: macho.relocation_info, out: *Relocation, subtractor: ?u32, @@ -1018,7 +1022,7 @@ fn parseUnsigned( }; } -fn parseBranch(self: TextBlock, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void { +fn parseBranch(self: Atom, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void { _ = self; assert(rel.r_pcrel == 1); assert(rel.r_length == 2); @@ -1030,7 +1034,7 @@ fn parseBranch(self: TextBlock, rel: macho.relocation_info, out: *Relocation, co }; } -fn parsePage(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32) void { +fn parsePage(self: Atom, rel: macho.relocation_info, out: *Relocation, addend: u32) void { _ = self; assert(rel.r_pcrel == 1); assert(rel.r_length == 2); @@ -1048,7 +1052,7 @@ fn parsePage(self: TextBlock, rel: macho.relocation_info, out: *Relocation, adde }; } -fn parsePageOff(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32) void { +fn parsePageOff(self: Atom, rel: macho.relocation_info, out: *Relocation, addend: u32) void { assert(rel.r_pcrel == 0); assert(rel.r_length == 2); @@ -1076,7 +1080,7 @@ fn parsePageOff(self: TextBlock, rel: macho.relocation_info, out: *Relocation, a }; } -fn parsePointerToGot(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void { +fn parsePointerToGot(self: Atom, rel: macho.relocation_info, out: *Relocation) void { _ = self; assert(rel.r_pcrel == 1); assert(rel.r_length == 2); @@ -1086,7 +1090,7 @@ fn parsePointerToGot(self: TextBlock, rel: macho.relocation_info, out: *Relocati }; } -fn parseSigned(self: TextBlock, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void { +fn parseSigned(self: Atom, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void { assert(rel.r_pcrel == 1); assert(rel.r_length == 2); @@ -1114,7 +1118,7 @@ fn parseSigned(self: TextBlock, rel: macho.relocation_info, out: *Relocation, co }; } -fn parseLoad(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void { +fn parseLoad(self: Atom, rel: macho.relocation_info, out: *Relocation) void { assert(rel.r_pcrel == 1); assert(rel.r_length == 2); @@ -1136,7 +1140,7 @@ fn parseLoad(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void }; } -pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void { +pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { for (self.relocs.items) |rel| { log.debug("relocating {}", .{rel}); @@ -1242,7 +1246,7 @@ pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void { } } -pub fn format(self: TextBlock, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { +pub fn format(self: Atom, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { _ = fmt; _ = options; try std.fmt.format(writer, "TextBlock {{ ", .{}); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 3bfd6c9f1a..cacf2721a7 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -16,9 +16,11 @@ const segmentName = commands.segmentName; const sectionName = commands.sectionName; const Allocator = mem.Allocator; +const Atom = @import("Atom.zig"); const LoadCommand = commands.LoadCommand; const MachO = @import("../MachO.zig"); -const TextBlock = @import("TextBlock.zig"); + +const TextBlock = Atom; file: fs.File, name: []const u8, From 8e5f7f5fe89e1c2979df2c735046c81e37c3f842 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 10 Sep 2021 15:37:51 +0200 Subject: [PATCH 69/78] macho: write adjacent atoms to in-memory buffer and then commit the large buffer into file. --- src/link/MachO.zig | 48 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 2705d47a85..63e716907f 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -1766,20 +1766,60 @@ fn allocateGlobalSymbols(self: *MachO) !void { } fn writeAtoms(self: *MachO) !void { + var buffer = std.ArrayList(u8).init(self.base.allocator); + defer buffer.deinit(); + var file_offset: ?u64 = null; + var it = self.blocks.iterator(); while (it.next()) |entry| { const match = entry.key_ptr.*; + const seg = self.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; var atom: *TextBlock = entry.value_ptr.*; + while (atom.prev) |prev| { + atom = prev; + } + while (true) { if (atom.dirty) { - try self.writeAtom(atom, match); + const atom_sym = self.locals.items[atom.local_sym_index]; + const padding_size: u64 = if (atom.next) |next| blk: { + const next_sym = self.locals.items[next.local_sym_index]; + break :blk next_sym.n_value - (atom_sym.n_value + atom.size); + } else 0; + + try atom.resolveRelocs(self); + try buffer.appendSlice(atom.code.items); + try buffer.ensureUnusedCapacity(padding_size); + + var i: usize = 0; + while (i < padding_size) : (i += 1) { + buffer.appendAssumeCapacity(0); + } + + if (file_offset == null) { + file_offset = sect.offset + atom_sym.n_value - sect.addr; + } atom.dirty = false; + } else { + if (file_offset) |off| { + try self.base.file.?.pwriteAll(buffer.items, off); + } + file_offset = null; + buffer.clearRetainingCapacity(); } - if (atom.prev) |prev| { - atom = prev; - } else break; + if (atom.next) |next| { + atom = next; + } else { + if (file_offset) |off| { + try self.base.file.?.pwriteAll(buffer.items, off); + } + file_offset = null; + buffer.clearRetainingCapacity(); + break; + } } } } From 6e0c3950b8115e1e274214447763733b3d3055d6 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 10 Sep 2021 22:42:39 +0200 Subject: [PATCH 70/78] macho: rename blocks to atoms in Object.zig --- src/link/MachO.zig | 10 +-- src/link/MachO/Object.zig | 146 +++++++++++++++++++------------------- 2 files changed, 77 insertions(+), 79 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 63e716907f..56bc10c123 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2649,7 +2649,7 @@ fn parseTextBlocks(self: *MachO) !void { defer section_metadata.deinit(); for (self.objects.items) |*object, object_id| { - var atoms_in_objects = try object.parseTextBlocks(self.base.allocator, @intCast(u16, object_id), self); + var atoms_in_objects = try object.parseIntoAtoms(self.base.allocator, @intCast(u16, object_id), self); defer atoms_in_objects.deinit(); var it = atoms_in_objects.iterator(); @@ -4628,13 +4628,13 @@ fn writeSymbolTable(self: *MachO) !void { .n_value = object.mtime orelse 0, }); - for (object.text_blocks.items) |block| { - if (block.stab) |stab| { - const nlists = try stab.asNlists(block.local_sym_index, self); + for (object.atoms.items) |atom| { + if (atom.stab) |stab| { + const nlists = try stab.asNlists(atom.local_sym_index, self); defer self.base.allocator.free(nlists); try locals.appendSlice(nlists); } else { - for (block.contained.items) |sym_at_off| { + for (atom.contained.items) |sym_at_off| { const stab = sym_at_off.stab orelse continue; const nlists = try stab.asNlists(sym_at_off.local_sym_index, self); defer self.base.allocator.free(nlists); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index cacf2721a7..0f68890e74 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -20,8 +20,6 @@ const Atom = @import("Atom.zig"); const LoadCommand = commands.LoadCommand; const MachO = @import("../MachO.zig"); -const TextBlock = Atom; - file: fs.File, name: []const u8, @@ -57,7 +55,7 @@ tu_name: ?[]const u8 = null, tu_comp_dir: ?[]const u8 = null, mtime: ?u64 = null, -text_blocks: std.ArrayListUnmanaged(*TextBlock) = .{}, +atoms: std.ArrayListUnmanaged(*Atom) = .{}, sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{}, // TODO symbol mapping and its inverse can probably be simple arrays @@ -137,7 +135,7 @@ pub fn deinit(self: *Object, allocator: *Allocator) void { self.data_in_code_entries.deinit(allocator); self.symtab.deinit(allocator); self.strtab.deinit(allocator); - self.text_blocks.deinit(allocator); + self.atoms.deinit(allocator); self.sections_as_symbols.deinit(allocator); self.symbol_mapping.deinit(allocator); self.reverse_symbol_mapping.deinit(allocator); @@ -322,14 +320,14 @@ const Context = struct { parsed_atoms: *ParsedAtoms, }; -const TextBlockParser = struct { +const AtomParser = struct { section: macho.section_64, code: []u8, relocs: []macho.relocation_info, nlists: []NlistWithIndex, index: u32 = 0, - fn peek(self: TextBlockParser) ?NlistWithIndex { + fn peek(self: AtomParser) ?NlistWithIndex { return if (self.index + 1 < self.nlists.len) self.nlists[self.index + 1] else null; } @@ -343,7 +341,7 @@ const TextBlockParser = struct { } } - pub fn next(self: *TextBlockParser, context: Context) !?*TextBlock { + pub fn next(self: *AtomParser, context: Context) !?*Atom { if (self.index == self.nlists.len) return null; var aliases = std.ArrayList(NlistWithIndex).init(context.allocator); @@ -368,12 +366,12 @@ const TextBlockParser = struct { } if (aliases.items.len > 1) { - // Bubble-up senior symbol as the main link to the text block. + // Bubble-up senior symbol as the main link to the atom. sort.sort( NlistWithIndex, aliases.items, context, - TextBlockParser.lessThanBySeniority, + AtomParser.lessThanBySeniority, ); } @@ -393,12 +391,12 @@ const TextBlockParser = struct { else max_align; - const stab: ?TextBlock.Stab = if (context.object.debug_info) |di| blk: { + const stab: ?Atom.Stab = if (context.object.debug_info) |di| blk: { // TODO there has to be a better to handle this. for (di.inner.func_list.items) |func| { if (func.pc_range) |range| { if (senior_nlist.nlist.n_value >= range.start and senior_nlist.nlist.n_value < range.end) { - break :blk TextBlock.Stab{ + break :blk Atom.Stab{ .function = range.end - range.start, }; } @@ -409,25 +407,25 @@ const TextBlockParser = struct { break :blk .static; } else null; - const block = try context.macho_file.createEmptyAtom(senior_nlist.index, size, actual_align); - block.stab = stab; + const atom = try context.macho_file.createEmptyAtom(senior_nlist.index, size, actual_align); + atom.stab = stab; const is_zerofill = blk: { const section_type = commands.sectionType(self.section); break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; }; if (!is_zerofill) { - mem.copy(u8, block.code.items, code); + mem.copy(u8, atom.code.items, code); } - try block.aliases.ensureTotalCapacity(context.allocator, aliases.items.len); + try atom.aliases.ensureTotalCapacity(context.allocator, aliases.items.len); for (aliases.items) |alias| { - block.aliases.appendAssumeCapacity(alias.index); + atom.aliases.appendAssumeCapacity(alias.index); const sym = &context.macho_file.locals.items[alias.index]; sym.n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(context.match).? + 1); } - try block.parseRelocs(self.relocs, .{ + try atom.parseRelocs(self.relocs, .{ .base_addr = self.section.addr, .base_offset = start_addr, .allocator = context.allocator, @@ -442,10 +440,10 @@ const TextBlockParser = struct { senior_nlist.nlist.n_value, senior_nlist.nlist.n_value + size, ); - try block.dices.ensureTotalCapacity(context.allocator, dices.len); + try atom.dices.ensureTotalCapacity(context.allocator, dices.len); for (dices) |dice| { - block.dices.appendAssumeCapacity(.{ + atom.dices.appendAssumeCapacity(.{ .offset = dice.offset - try math.cast(u32, senior_nlist.nlist.n_value), .length = dice.length, .kind = dice.kind, @@ -455,13 +453,13 @@ const TextBlockParser = struct { self.index += 1; - return block; + return atom; } }; -pub const ParsedAtoms = std.AutoHashMap(MachO.MatchingSection, *TextBlock); +pub const ParsedAtoms = std.AutoHashMap(MachO.MatchingSection, *Atom); -pub fn parseTextBlocks( +pub fn parseIntoAtoms( self: *Object, allocator: *Allocator, object_id: u16, @@ -508,7 +506,7 @@ pub fn parseTextBlocks( for (seg.sections.items) |sect, id| { const sect_id = @intCast(u8, id); - log.debug("putting section '{s},{s}' as a TextBlock", .{ + log.debug("putting section '{s},{s}' as an Atom", .{ segmentName(sect), sectionName(sect), }); @@ -551,12 +549,12 @@ pub fn parseTextBlocks( macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null; next: { - if (is_splittable) blocks: { - if (filtered_nlists.len == 0) break :blocks; + if (is_splittable) atoms: { + if (filtered_nlists.len == 0) break :atoms; // If the first nlist does not match the start of the section, // then we need to encapsulate the memory range [section start, first symbol) - // as a temporary symbol and insert the matching TextBlock. + // as a temporary symbol and insert the matching Atom. const first_nlist = filtered_nlists[0].nlist; if (first_nlist.n_value > sect.addr) { const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{ @@ -566,8 +564,8 @@ pub fn parseTextBlocks( }); defer allocator.free(sym_name); - const block_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { - const block_local_sym_index = @intCast(u32, macho_file.locals.items.len); + const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { + const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len); try macho_file.locals.append(allocator, .{ .n_strx = try macho_file.makeString(sym_name), .n_type = macho.N_SECT, @@ -575,22 +573,22 @@ pub fn parseTextBlocks( .n_desc = 0, .n_value = 0, }); - try self.sections_as_symbols.putNoClobber(allocator, sect_id, block_local_sym_index); - break :blk block_local_sym_index; + try self.sections_as_symbols.putNoClobber(allocator, sect_id, atom_local_sym_index); + break :blk atom_local_sym_index; }; - const block_code = code[0 .. first_nlist.n_value - sect.addr]; - const block_size = block_code.len; - const block = try macho_file.createEmptyAtom(block_local_sym_index, block_size, sect.@"align"); + const atom_code = code[0 .. first_nlist.n_value - sect.addr]; + const atom_size = atom_code.len; + const atom = try macho_file.createEmptyAtom(atom_local_sym_index, atom_size, sect.@"align"); const is_zerofill = blk: { const section_type = commands.sectionType(sect); break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; }; if (!is_zerofill) { - mem.copy(u8, block.code.items, block_code); + mem.copy(u8, atom.code.items, atom_code); } - try block.parseRelocs(relocs, .{ + try atom.parseRelocs(relocs, .{ .base_addr = sect.addr, .base_offset = 0, .allocator = allocator, @@ -600,11 +598,11 @@ pub fn parseTextBlocks( }); if (macho_file.has_dices) { - const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + block_size); - try block.dices.ensureTotalCapacity(allocator, dices.len); + const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + atom_size); + try atom.dices.ensureTotalCapacity(allocator, dices.len); for (dices) |dice| { - block.dices.appendAssumeCapacity(.{ + atom.dices.appendAssumeCapacity(.{ .offset = dice.offset - try math.cast(u32, sect.addr), .length = dice.length, .kind = dice.kind, @@ -613,16 +611,16 @@ pub fn parseTextBlocks( } if (parsed_atoms.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; + last.*.next = atom; + atom.prev = last.*; + last.* = atom; } else { - try parsed_atoms.putNoClobber(match, block); + try parsed_atoms.putNoClobber(match, atom); } - try self.text_blocks.append(allocator, block); + try self.atoms.append(allocator, atom); } - var parser = TextBlockParser{ + var parser = AtomParser{ .section = sect, .code = code, .relocs = relocs, @@ -635,10 +633,10 @@ pub fn parseTextBlocks( .macho_file = macho_file, .match = match, .parsed_atoms = &parsed_atoms, - })) |block| { - const sym = macho_file.locals.items[block.local_sym_index]; + })) |atom| { + const sym = macho_file.locals.items[atom.local_sym_index]; const is_ext = blk: { - const orig_sym_id = self.reverse_symbol_mapping.get(block.local_sym_index) orelse unreachable; + const orig_sym_id = self.reverse_symbol_mapping.get(atom.local_sym_index) orelse unreachable; break :blk MachO.symbolIsExt(self.symtab.items[orig_sym_id]); }; if (is_ext) { @@ -662,26 +660,26 @@ pub fn parseTextBlocks( // In x86_64 relocs, it can so happen that the compiler refers to the same // atom by both the actual assigned symbol and the start of the section. In this // case, we need to link the two together so add an alias. - try block.aliases.append(allocator, alias); + try atom.aliases.append(allocator, alias); } } if (parsed_atoms.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; + last.*.next = atom; + atom.prev = last.*; + last.* = atom; } else { - try parsed_atoms.putNoClobber(match, block); + try parsed_atoms.putNoClobber(match, atom); } - try self.text_blocks.append(allocator, block); + try self.atoms.append(allocator, atom); } break :next; } - // Since there is no symbol to refer to this block, we create + // Since there is no symbol to refer to this atom, we create // a temp one, unless we already did that when working out the relocations - // of other text blocks. + // of other atoms. const sym_name = try std.fmt.allocPrint(allocator, "l_{s}_{s}_{s}", .{ self.name, segmentName(sect), @@ -689,8 +687,8 @@ pub fn parseTextBlocks( }); defer allocator.free(sym_name); - const block_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { - const block_local_sym_index = @intCast(u32, macho_file.locals.items.len); + const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { + const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len); try macho_file.locals.append(allocator, .{ .n_strx = try macho_file.makeString(sym_name), .n_type = macho.N_SECT, @@ -698,20 +696,20 @@ pub fn parseTextBlocks( .n_desc = 0, .n_value = 0, }); - try self.sections_as_symbols.putNoClobber(allocator, sect_id, block_local_sym_index); - break :blk block_local_sym_index; + try self.sections_as_symbols.putNoClobber(allocator, sect_id, atom_local_sym_index); + break :blk atom_local_sym_index; }; - const block = try macho_file.createEmptyAtom(block_local_sym_index, sect.size, sect.@"align"); + const atom = try macho_file.createEmptyAtom(atom_local_sym_index, sect.size, sect.@"align"); const is_zerofill = blk: { const section_type = commands.sectionType(sect); break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; }; if (!is_zerofill) { - mem.copy(u8, block.code.items, code); + mem.copy(u8, atom.code.items, code); } - try block.parseRelocs(relocs, .{ + try atom.parseRelocs(relocs, .{ .base_addr = sect.addr, .base_offset = 0, .allocator = allocator, @@ -722,10 +720,10 @@ pub fn parseTextBlocks( if (macho_file.has_dices) { const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size); - try block.dices.ensureTotalCapacity(allocator, dices.len); + try atom.dices.ensureTotalCapacity(allocator, dices.len); for (dices) |dice| { - block.dices.appendAssumeCapacity(.{ + atom.dices.appendAssumeCapacity(.{ .offset = dice.offset - try math.cast(u32, sect.addr), .length = dice.length, .kind = dice.kind, @@ -733,12 +731,12 @@ pub fn parseTextBlocks( } } - // Since this is block gets a helper local temporary symbol that didn't exist + // Since this is atom gets a helper local temporary symbol that didn't exist // in the object file which encompasses the entire section, we need traverse // the filtered symbols and note which symbol is contained within so that // we can properly allocate addresses down the line. // While we're at it, we need to update segment,section mapping of each symbol too. - try block.contained.ensureTotalCapacity(allocator, filtered_nlists.len); + try atom.contained.ensureTotalCapacity(allocator, filtered_nlists.len); for (filtered_nlists) |nlist_with_index| { const nlist = nlist_with_index.nlist; @@ -746,12 +744,12 @@ pub fn parseTextBlocks( const local = &macho_file.locals.items[local_sym_index]; local.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1); - const stab: ?TextBlock.Stab = if (self.debug_info) |di| blk: { + const stab: ?Atom.Stab = if (self.debug_info) |di| blk: { // TODO there has to be a better to handle this. for (di.inner.func_list.items) |func| { if (func.pc_range) |range| { if (nlist.n_value >= range.start and nlist.n_value < range.end) { - break :blk TextBlock.Stab{ + break :blk Atom.Stab{ .function = range.end - range.start, }; } @@ -762,7 +760,7 @@ pub fn parseTextBlocks( break :blk .static; } else null; - block.contained.appendAssumeCapacity(.{ + atom.contained.appendAssumeCapacity(.{ .local_sym_index = local_sym_index, .offset = nlist.n_value - sect.addr, .stab = stab, @@ -770,13 +768,13 @@ pub fn parseTextBlocks( } if (parsed_atoms.getPtr(match)) |last| { - last.*.next = block; - block.prev = last.*; - last.* = block; + last.*.next = atom; + atom.prev = last.*; + last.* = atom; } else { - try parsed_atoms.putNoClobber(match, block); + try parsed_atoms.putNoClobber(match, atom); } - try self.text_blocks.append(allocator, block); + try self.atoms.append(allocator, atom); } } From 31dcb0dde71e7c6968a9ee902782783b139c4900 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 11 Sep 2021 01:18:37 +0200 Subject: [PATCH 71/78] macho: change all occurrences of TextBlock into Atom and unify allocateAtom with allocateTextBlock. --- src/link/MachO.zig | 510 +++++++++++++++++++++------------------------ 1 file changed, 238 insertions(+), 272 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 56bc10c123..4436391979 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -158,8 +158,8 @@ stub_preamble_sym_index: ?u32 = null, strtab: std.ArrayListUnmanaged(u8) = .{}, strtab_dir: std.HashMapUnmanaged(u32, u32, StringIndexContext, std.hash_map.default_max_load_percentage) = .{}, -got_entries_map: std.AutoArrayHashMapUnmanaged(GotIndirectionKey, *TextBlock) = .{}, -stubs_map: std.AutoArrayHashMapUnmanaged(u32, *TextBlock) = .{}, +got_entries_map: std.AutoArrayHashMapUnmanaged(GotIndirectionKey, *Atom) = .{}, +stubs_map: std.AutoArrayHashMapUnmanaged(u32, *Atom) = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, @@ -171,12 +171,12 @@ has_stabs: bool = false, section_ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}, -/// A list of text blocks that have surplus capacity. This list can have false +/// A list of atoms that have surplus capacity. This list can have false /// positives, as functions grow and shrink over time, only sometimes being added /// or removed from the freelist. /// -/// A text block has surplus capacity when its overcapacity value is greater than -/// padToIdeal(minimum_text_block_size). That is, when it has so +/// An atom has surplus capacity when its overcapacity value is greater than +/// padToIdeal(minimum_atom_size). That is, when it has so /// much extra capacity, that we could fit a small new symbol in it, itself with /// ideal_capacity or more. /// @@ -184,23 +184,23 @@ section_ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}, /// /// Overcapacity is measured by actual_capacity - ideal_capacity. Note that /// overcapacity can be negative. A simple way to have negative overcapacity is to -/// allocate a fresh text block, which will have ideal capacity, and then grow it +/// allocate a fresh atom, which will have ideal capacity, and then grow it /// by 1 byte. It will then have -1 overcapacity. -block_free_lists: std.AutoHashMapUnmanaged(MatchingSection, std.ArrayListUnmanaged(*TextBlock)) = .{}, +atom_free_lists: std.AutoHashMapUnmanaged(MatchingSection, std.ArrayListUnmanaged(*Atom)) = .{}, -/// Pointer to the last allocated text block -blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, +/// Pointer to the last allocated atom +atoms: std.AutoHashMapUnmanaged(MatchingSection, *Atom) = .{}, -/// List of TextBlocks that are owned directly by the linker. -/// Currently these are only TextBlocks that are the result of linking -/// object files. TextBlock which take part in incremental linking are +/// List of atoms that are owned directly by the linker. +/// Currently these are only atoms that are the result of linking +/// object files. Atoms which take part in incremental linking are /// at present owned by Module.Decl. /// TODO consolidate this. -managed_blocks: std.ArrayListUnmanaged(*TextBlock) = .{}, +managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, /// Table of Decls that are currently alive. /// We store them here so that we can properly dispose of any allocated -/// memory within the TextBlock in the incremental linker. +/// memory within the atom in the incremental linker. /// TODO consolidate this. decls: std.AutoArrayHashMapUnmanaged(*Module.Decl, void) = .{}, @@ -768,31 +768,8 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.addDataInCodeLC(); try self.addCodeSignatureLC(); - try self.parseTextBlocks(); + try self.parseObjectsIntoAtoms(); try self.allocateGlobalSymbols(); - { - log.debug("locals:", .{}); - for (self.locals.items) |sym| { - log.debug(" {s}: {}", .{ self.getString(sym.n_strx), sym }); - } - log.debug("globals:", .{}); - for (self.globals.items) |sym| { - log.debug(" {s}: {}", .{ self.getString(sym.n_strx), sym }); - } - log.debug("undefs:", .{}); - for (self.undefs.items) |sym| { - log.debug(" {s}: {}", .{ self.getString(sym.n_strx), sym }); - } - log.debug("unresolved:", .{}); - for (self.unresolved.keys()) |key| { - log.debug(" {d} => {s}", .{ key, self.unresolved.get(key).? }); - } - log.debug("resolved:", .{}); - var it = self.symbol_resolver.iterator(); - while (it.next()) |entry| { - log.debug(" {s} => {}", .{ self.getString(entry.key_ptr.*), entry.value_ptr.* }); - } - } try self.writeAtoms(); if (self.bss_section_index) |idx| { @@ -1637,87 +1614,24 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio return res; } -pub fn createEmptyAtom(self: *MachO, local_sym_index: u32, size: u64, alignment: u32) !*TextBlock { +pub fn createEmptyAtom(self: *MachO, local_sym_index: u32, size: u64, alignment: u32) !*Atom { const code = try self.base.allocator.alloc(u8, size); defer self.base.allocator.free(code); mem.set(u8, code, 0); - const atom = try self.base.allocator.create(TextBlock); + const atom = try self.base.allocator.create(Atom); errdefer self.base.allocator.destroy(atom); - atom.* = TextBlock.empty; + atom.* = Atom.empty; atom.local_sym_index = local_sym_index; atom.size = size; atom.alignment = alignment; try atom.code.appendSlice(self.base.allocator, code); - try self.managed_blocks.append(self.base.allocator, atom); + try self.managed_atoms.append(self.base.allocator, atom); return atom; } -pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 { - const seg = &self.load_commands.items[match.seg].Segment; - const sect = &seg.sections.items[match.sect]; - - const sym = &self.locals.items[atom.local_sym_index]; - const needs_padding = match.seg == self.text_segment_cmd_index.? and match.sect == self.text_section_index.?; - - var atom_placement: ?*TextBlock = null; - const atom_alignment = try math.powi(u32, 2, atom.alignment); - - // TODO converge with `allocateTextBlock` and handle free list - var vaddr = if (self.blocks.get(match)) |last| blk: { - const last_atom_sym = self.locals.items[last.local_sym_index]; - const ideal_capacity = if (needs_padding) padToIdeal(last.size) else last.size; - const ideal_capacity_end_vaddr = last_atom_sym.n_value + ideal_capacity; - const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, atom_alignment); - atom_placement = last; - break :blk new_start_vaddr; - } else mem.alignForwardGeneric(u64, sect.addr, atom_alignment); - - // TODO what if the section which was preallocated is not aligned to the maximum (section) alignment? - // Should we move the section? - - log.debug("allocating atom for symbol {s} at address 0x{x}", .{ self.getString(sym.n_strx), vaddr }); - - const expand_section = atom_placement == null or atom_placement.?.next == null; - if (expand_section) { - const needed_size = @intCast(u32, (vaddr + atom.size) - sect.addr); - try self.growSection(match, needed_size); - sect.size = needed_size; - self.load_commands_dirty = true; - } - sect.@"align" = math.max(sect.@"align", atom.alignment); - - const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - sym.n_value = vaddr; - sym.n_sect = n_sect; - - // Update each alias (if any) - for (atom.aliases.items) |index| { - const alias_sym = &self.locals.items[index]; - alias_sym.n_value = vaddr; - alias_sym.n_sect = n_sect; - } - - // Update each symbol contained within the TextBlock - for (atom.contained.items) |sym_at_off| { - const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; - contained_sym.n_value = vaddr + sym_at_off.offset; - contained_sym.n_sect = n_sect; - } - - if (self.blocks.getPtr(match)) |last| { - last.*.next = atom; - atom.prev = last.*; - last.* = atom; - } else { - try self.blocks.putNoClobber(self.base.allocator, match, atom); - } - - return vaddr; -} - -pub fn writeAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !void { +pub fn writeAtom(self: *MachO, atom: *Atom, match: MatchingSection) !void { const seg = self.load_commands.items[match.seg].Segment; const sect = seg.sections.items[match.sect]; const sym = self.locals.items[atom.local_sym_index]; @@ -1728,7 +1642,7 @@ pub fn writeAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !void { } fn allocateLocalSymbols(self: *MachO, match: MatchingSection, offset: i64) !void { - var atom = self.blocks.get(match) orelse return; + var atom = self.atoms.get(match) orelse return; while (true) { const atom_sym = &self.locals.items[atom.local_sym_index]; @@ -1751,8 +1665,6 @@ fn allocateLocalSymbols(self: *MachO, match: MatchingSection, offset: i64) !void } fn allocateGlobalSymbols(self: *MachO) !void { - // TODO should we do this in `allocateAtom` (or similar)? Then, we would need to - // store the link atom -> globals somewhere. var sym_it = self.symbol_resolver.valueIterator(); while (sym_it.next()) |resolv| { if (resolv.where != .global) continue; @@ -1770,12 +1682,14 @@ fn writeAtoms(self: *MachO) !void { defer buffer.deinit(); var file_offset: ?u64 = null; - var it = self.blocks.iterator(); + var it = self.atoms.iterator(); while (it.next()) |entry| { const match = entry.key_ptr.*; const seg = self.load_commands.items[match.seg].Segment; const sect = seg.sections.items[match.sect]; - var atom: *TextBlock = entry.value_ptr.*; + var atom: *Atom = entry.value_ptr.*; + + log.debug("writing atoms in {s},{s}", .{ commands.segmentName(sect), commands.sectionName(sect) }); while (atom.prev) |prev| { atom = prev; @@ -1789,6 +1703,8 @@ fn writeAtoms(self: *MachO) !void { break :blk next_sym.n_value - (atom_sym.n_value + atom.size); } else 0; + log.debug(" (adding atom {s} to buffer: {})", .{ self.getString(atom_sym.n_strx), atom_sym }); + try atom.resolveRelocs(self); try buffer.appendSlice(atom.code.items); try buffer.ensureUnusedCapacity(padding_size); @@ -1824,7 +1740,7 @@ fn writeAtoms(self: *MachO) !void { } } -pub fn createGotAtom(self: *MachO, key: GotIndirectionKey) !*TextBlock { +pub fn createGotAtom(self: *MachO, key: GotIndirectionKey) !*Atom { const local_sym_index = @intCast(u32, self.locals.items.len); try self.locals.append(self.base.allocator, .{ .n_strx = try self.makeString("l_zld_got_entry"), @@ -1860,7 +1776,7 @@ pub fn createGotAtom(self: *MachO, key: GotIndirectionKey) !*TextBlock { return atom; } -fn createDyldPrivateAtom(self: *MachO) !*TextBlock { +fn createDyldPrivateAtom(self: *MachO) !*Atom { const local_sym_index = @intCast(u32, self.locals.items.len); try self.locals.append(self.base.allocator, .{ .n_strx = try self.makeString("l_zld_dyld_private"), @@ -1873,7 +1789,7 @@ fn createDyldPrivateAtom(self: *MachO) !*TextBlock { return self.createEmptyAtom(local_sym_index, @sizeOf(u64), 3); } -fn createStubHelperPreambleAtom(self: *MachO) !*TextBlock { +fn createStubHelperPreambleAtom(self: *MachO) !*Atom { const arch = self.base.options.target.cpu.arch; const size: u64 = switch (arch) { .x86_64 => 15, @@ -2006,7 +1922,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !*TextBlock { return atom; } -pub fn createStubHelperAtom(self: *MachO) !*TextBlock { +pub fn createStubHelperAtom(self: *MachO) !*Atom { const arch = self.base.options.target.cpu.arch; const stub_size: u4 = switch (arch) { .x86_64 => 10, @@ -2072,7 +1988,7 @@ pub fn createStubHelperAtom(self: *MachO) !*TextBlock { return atom; } -pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, lazy_binding_sym_index: u32) !*TextBlock { +pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, lazy_binding_sym_index: u32) !*Atom { const local_sym_index = @intCast(u32, self.locals.items.len); try self.locals.append(self.base.allocator, .{ .n_strx = try self.makeString("l_zld_lazy_ptr"), @@ -2102,7 +2018,7 @@ pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, lazy_binding_sym return atom; } -pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*TextBlock { +pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { const arch = self.base.options.target.cpu.arch; const alignment: u2 = switch (arch) { .x86_64 => 0, @@ -2273,14 +2189,6 @@ fn resolveSymbolsInObject( continue; }, .undef => { - // const undef = &self.undefs.items[resolv.where_index]; - // undef.* = .{ - // .n_strx = 0, - // .n_type = macho.N_UNDF, - // .n_sect = 0, - // .n_desc = 0, - // .n_value = 0, - // }; _ = self.unresolved.fetchSwapRemove(resolv.where_index); }, } @@ -2437,23 +2345,36 @@ fn resolveSymbols(self: *MachO) !void { resolv.local_sym_index = local_sym_index; const atom = try self.createEmptyAtom(local_sym_index, size, alignment); - _ = try self.allocateAtom(atom, match); + const alignment_pow_2 = try math.powi(u32, 2, alignment); + const vaddr = try self.allocateAtom(atom, size, alignment_pow_2, match); + sym.n_value = vaddr; } try self.resolveDyldStubBinder(); { - const atom = try self.createDyldPrivateAtom(); - _ = try self.allocateAtom(atom, .{ + const match = MatchingSection{ .seg = self.data_segment_cmd_index.?, .sect = self.data_section_index.?, - }); + }; + const atom = try self.createDyldPrivateAtom(); + const sym = &self.locals.items[atom.local_sym_index]; + const vaddr = try self.allocateAtom(atom, @sizeOf(u64), 8, match); + sym.n_value = vaddr; + sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + log.debug("allocated {s} atom at 0x{x}", .{ self.getString(sym.n_strx), vaddr }); } { - const atom = try self.createStubHelperPreambleAtom(); - _ = try self.allocateAtom(atom, .{ + const match = MatchingSection{ .seg = self.text_segment_cmd_index.?, .sect = self.stub_helper_section_index.?, - }); + }; + const atom = try self.createStubHelperPreambleAtom(); + const sym = &self.locals.items[atom.local_sym_index]; + const alignment = try math.powi(u32, 2, atom.alignment); + const vaddr = try self.allocateAtom(atom, atom.size, alignment, match); + sym.n_value = vaddr; + sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + log.debug("allocated {s} atom at 0x{x}", .{ self.getString(sym.n_strx), vaddr }); } // Third pass, resolve symbols in dynamic libraries. @@ -2483,30 +2404,45 @@ fn resolveSymbols(self: *MachO) !void { .stub => { if (self.stubs_map.contains(resolv.where_index)) break :outer_blk; const stub_helper_atom = blk: { - const atom = try self.createStubHelperAtom(); - _ = try self.allocateAtom(atom, .{ + const match = MatchingSection{ .seg = self.text_segment_cmd_index.?, .sect = self.stub_helper_section_index.?, - }); + }; + const atom = try self.createStubHelperAtom(); + const atom_sym = &self.locals.items[atom.local_sym_index]; + const alignment = try math.powi(u32, 2, atom.alignment); + const vaddr = try self.allocateAtom(atom, atom.size, alignment, match); + atom_sym.n_value = vaddr; + atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); break :blk atom; }; const laptr_atom = blk: { + const match = MatchingSection{ + .seg = self.data_segment_cmd_index.?, + .sect = self.la_symbol_ptr_section_index.?, + }; const atom = try self.createLazyPointerAtom( stub_helper_atom.local_sym_index, resolv.where_index, ); - _ = try self.allocateAtom(atom, .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.la_symbol_ptr_section_index.?, - }); + const atom_sym = &self.locals.items[atom.local_sym_index]; + const alignment = try math.powi(u32, 2, atom.alignment); + const vaddr = try self.allocateAtom(atom, atom.size, alignment, match); + atom_sym.n_value = vaddr; + atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); break :blk atom; }; const stub_atom = blk: { - const atom = try self.createStubAtom(laptr_atom.local_sym_index); - _ = try self.allocateAtom(atom, .{ + const match = MatchingSection{ .seg = self.text_segment_cmd_index.?, .sect = self.stubs_section_index.?, - }); + }; + const atom = try self.createStubAtom(laptr_atom.local_sym_index); + const atom_sym = &self.locals.items[atom.local_sym_index]; + const alignment = try math.powi(u32, 2, atom.alignment); + const vaddr = try self.allocateAtom(atom, atom.size, alignment, match); + atom_sym.n_value = vaddr; + atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); break :blk atom; }; try self.stubs_map.putNoClobber(self.base.allocator, resolv.where_index, stub_atom); @@ -2565,7 +2501,10 @@ fn resolveSymbols(self: *MachO) !void { // TODO perhaps we should special-case special symbols? Create a separate // linked list of atoms? const atom = try self.createEmptyAtom(local_sym_index, 0, 0); - _ = try self.allocateAtom(atom, match); + const sym = &self.locals.items[local_sym_index]; + const vaddr = try self.allocateAtom(atom, 0, 1, match); + sym.n_value = vaddr; + atom.dirty = false; // We don't really want to write it to file. } for (self.unresolved.keys()) |index| { @@ -2632,10 +2571,14 @@ fn resolveDyldStubBinder(self: *MachO) !void { .seg = self.data_const_segment_cmd_index.?, .sect = self.got_section_index.?, }; - _ = try self.allocateAtom(atom, match); + const atom_sym = &self.locals.items[atom.local_sym_index]; + const vaddr = try self.allocateAtom(atom, @sizeOf(u64), 8, match); + atom_sym.n_value = vaddr; + atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + log.debug("allocated {s} atom at 0x{x}", .{ self.getString(sym.n_strx), vaddr }); } -fn parseTextBlocks(self: *MachO) !void { +fn parseObjectsIntoAtoms(self: *MachO) !void { var parsed_atoms = Object.ParsedAtoms.init(self.base.allocator); defer parsed_atoms.deinit(); @@ -2710,7 +2653,7 @@ fn parseTextBlocks(self: *MachO) !void { metadata.alignment, }); - const sect_size = if (self.blocks.get(match)) |last| blk: { + const sect_size = if (self.atoms.get(match)) |last| blk: { const last_atom_sym = self.locals.items[last.local_sym_index]; break :blk last_atom_sym.n_value + last.size - sect.addr; } else 0; @@ -2720,7 +2663,7 @@ fn parseTextBlocks(self: *MachO) !void { try self.growSection(match, needed_size); sect.size = needed_size; - var base_vaddr = if (self.blocks.get(match)) |last| blk: { + var base_vaddr = if (self.atoms.get(match)) |last| blk: { const last_atom_sym = self.locals.items[last.local_sym_index]; break :blk last_atom_sym.n_value + last.size; } else sect.addr; @@ -2750,7 +2693,7 @@ fn parseTextBlocks(self: *MachO) !void { alias_sym.n_sect = n_sect; } - // Update each symbol contained within the TextBlock + // Update each symbol contained within the atom for (atom.contained.items) |sym_at_off| { const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; contained_sym.n_value = base_vaddr + sym_at_off.offset; @@ -2764,13 +2707,13 @@ fn parseTextBlocks(self: *MachO) !void { } else break; } - if (self.blocks.getPtr(match)) |last| { + if (self.atoms.getPtr(match)) |last| { const first_atom = first_atoms.get(match).?; last.*.next = first_atom; first_atom.prev = last.*; last.* = first_atom; } - _ = try self.blocks.put(self.base.allocator, match, parsed_atoms.get(match).?); + _ = try self.atoms.put(self.base.allocator, match, parsed_atoms.get(match).?); } } @@ -2905,18 +2848,18 @@ pub fn deinit(self: *MachO) void { } self.load_commands.deinit(self.base.allocator); - for (self.managed_blocks.items) |block| { - block.deinit(self.base.allocator); - self.base.allocator.destroy(block); + for (self.managed_atoms.items) |atom| { + atom.deinit(self.base.allocator); + self.base.allocator.destroy(atom); } - self.managed_blocks.deinit(self.base.allocator); - self.blocks.deinit(self.base.allocator); + self.managed_atoms.deinit(self.base.allocator); + self.atoms.deinit(self.base.allocator); { - var it = self.block_free_lists.valueIterator(); + var it = self.atom_free_lists.valueIterator(); while (it.next()) |free_list| { free_list.deinit(self.base.allocator); } - self.block_free_lists.deinit(self.base.allocator); + self.atom_free_lists.deinit(self.base.allocator); } for (self.decls.keys()) |decl| { decl.link.macho.deinit(self.base.allocator); @@ -2936,25 +2879,21 @@ pub fn closeFiles(self: MachO) void { } } -fn freeTextBlock(self: *MachO, text_block: *TextBlock) void { - log.debug("freeTextBlock {*}", .{text_block}); - text_block.deinit(self.base.allocator); +fn freeAtom(self: *MachO, atom: *Atom, match: MatchingSection) void { + log.debug("freeAtom {*}", .{atom}); + atom.deinit(self.base.allocator); - const match = MatchingSection{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }; - const text_block_free_list = self.block_free_lists.getPtr(match).?; + const free_list = self.atom_free_lists.getPtr(match).?; var already_have_free_list_node = false; { var i: usize = 0; - // TODO turn text_block_free_list into a hash map - while (i < text_block_free_list.items.len) { - if (text_block_free_list.items[i] == text_block) { - _ = text_block_free_list.swapRemove(i); + // TODO turn free_list into a hash map + while (i < free_list.items.len) { + if (free_list.items[i] == atom) { + _ = free_list.swapRemove(i); continue; } - if (text_block_free_list.items[i] == text_block.prev) { + if (free_list.items[i] == atom.prev) { already_have_free_list_node = true; } i += 1; @@ -2962,72 +2901,73 @@ fn freeTextBlock(self: *MachO, text_block: *TextBlock) void { } // TODO process free list for dbg info just like we do above for vaddrs - if (self.blocks.getPtr(match)) |last_text_block| { - if (last_text_block.* == text_block) { - if (text_block.prev) |prev| { - // TODO shrink the __text section size here - last_text_block.* = prev; + if (self.atoms.getPtr(match)) |last_atom| { + if (last_atom.* == atom) { + if (atom.prev) |prev| { + // TODO shrink the section size here + last_atom.* = prev; } } } if (self.d_sym) |*ds| { - if (ds.dbg_info_decl_first == text_block) { - ds.dbg_info_decl_first = text_block.dbg_info_next; + if (ds.dbg_info_decl_first == atom) { + ds.dbg_info_decl_first = atom.dbg_info_next; } - if (ds.dbg_info_decl_last == text_block) { + if (ds.dbg_info_decl_last == atom) { // TODO shrink the .debug_info section size here - ds.dbg_info_decl_last = text_block.dbg_info_prev; + ds.dbg_info_decl_last = atom.dbg_info_prev; } } - if (text_block.prev) |prev| { - prev.next = text_block.next; + if (atom.prev) |prev| { + prev.next = atom.next; if (!already_have_free_list_node and prev.freeListEligible(self.*)) { // The free list is heuristics, it doesn't have to be perfect, so we can ignore // the OOM here. - text_block_free_list.append(self.base.allocator, prev) catch {}; + free_list.append(self.base.allocator, prev) catch {}; } } else { - text_block.prev = null; + atom.prev = null; } - if (text_block.next) |next| { - next.prev = text_block.prev; + if (atom.next) |next| { + next.prev = atom.prev; } else { - text_block.next = null; + atom.next = null; } - if (text_block.dbg_info_prev) |prev| { - prev.dbg_info_next = text_block.dbg_info_next; + if (atom.dbg_info_prev) |prev| { + prev.dbg_info_next = atom.dbg_info_next; - // TODO the free list logic like we do for text blocks above + // TODO the free list logic like we do for atoms above } else { - text_block.dbg_info_prev = null; + atom.dbg_info_prev = null; } - if (text_block.dbg_info_next) |next| { - next.dbg_info_prev = text_block.dbg_info_prev; + if (atom.dbg_info_next) |next| { + next.dbg_info_prev = atom.dbg_info_prev; } else { - text_block.dbg_info_next = null; + atom.dbg_info_next = null; } } -fn shrinkTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64) void { +fn shrinkAtom(self: *MachO, atom: *Atom, new_block_size: u64, match: MatchingSection) void { _ = self; - _ = text_block; + _ = atom; _ = new_block_size; + _ = match; // TODO check the new capacity, and if it crosses the size threshold into a big enough // capacity, insert a free list node for it. } -fn growTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, alignment: u64) !u64 { - const sym = self.locals.items[text_block.local_sym_index]; +fn growAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, match: MatchingSection) !u64 { + const sym = self.locals.items[atom.local_sym_index]; const align_ok = mem.alignBackwardGeneric(u64, sym.n_value, alignment) == sym.n_value; - const need_realloc = !align_ok or new_block_size > text_block.capacity(self.*); + const need_realloc = !align_ok or new_atom_size > atom.capacity(self.*); if (!need_realloc) return sym.n_value; - return self.allocateTextBlock(text_block, new_block_size, alignment); + return self.allocateAtom(atom, new_atom_size, alignment, match); } pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { @@ -3112,7 +3052,7 @@ pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liv // in a different, smarter, more automatic way somewhere else, in a more centralised // way than this. // If we don't clear the buffers here, we are up for some nasty surprises when - // this TextBlock is reused later on and was not freed by freeTextBlock(). + // this atom is reused later on and was not freed by freeAtom(). decl.link.macho.code.clearAndFree(self.base.allocator); try decl.link.macho.code.appendSlice(self.base.allocator, code_buffer.items); }, @@ -3202,7 +3142,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { // in a different, smarter, more automatic way somewhere else, in a more centralised // way than this. // If we don't clear the buffers here, we are up for some nasty surprises when - // this TextBlock is reused later on and was not freed by freeTextBlock(). + // this atom is reused later on and was not freed by freeAtom(). decl.link.macho.code.clearAndFree(self.base.allocator); try decl.link.macho.code.appendSlice(self.base.allocator, code_buffer.items); break :blk decl.link.macho.code.items; @@ -3231,7 +3171,10 @@ fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 const capacity = decl.link.macho.capacity(self.*); const need_realloc = code_len > capacity or !mem.isAlignedGeneric(u64, symbol.n_value, required_alignment); if (need_realloc) { - const vaddr = try self.growTextBlock(&decl.link.macho, code_len, required_alignment); + const vaddr = try self.growAtom(&decl.link.macho, code_len, required_alignment, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }); log.debug("growing {s} and moving from 0x{x} to 0x{x}", .{ decl.name, symbol.n_value, vaddr }); @@ -3241,15 +3184,24 @@ fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 .where = .local, .where_index = decl.link.macho.local_sym_index, }) orelse unreachable; - _ = try self.allocateAtom(got_atom, .{ + const got_sym = &self.locals.items[got_atom.local_sym_index]; + const got_vaddr = try self.allocateAtom(got_atom, @sizeOf(u64), 8, .{ .seg = self.data_const_segment_cmd_index.?, .sect = self.got_section_index.?, }); + got_sym.n_value = got_vaddr; + got_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(.{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, + }).? + 1); } symbol.n_value = vaddr; } else if (code_len < decl.link.macho.size) { - self.shrinkTextBlock(&decl.link.macho, code_len); + self.shrinkAtom(&decl.link.macho, code_len, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }); } decl.link.macho.size = code_len; @@ -3265,11 +3217,17 @@ fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 defer self.base.allocator.free(decl_name); const name_str_index = try self.makeString(decl_name); - const addr = try self.allocateTextBlock(&decl.link.macho, code_len, required_alignment); + const addr = try self.allocateAtom(&decl.link.macho, code_len, required_alignment, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }); - log.debug("allocated text block for {s} at 0x{x}", .{ decl_name, addr }); + log.debug("allocated atom for {s} at 0x{x}", .{ decl_name, addr }); - errdefer self.freeTextBlock(&decl.link.macho); + errdefer self.freeAtom(&decl.link.macho, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }); symbol.* = .{ .n_strx = name_str_index, @@ -3282,10 +3240,16 @@ fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 .where = .local, .where_index = decl.link.macho.local_sym_index, }) orelse unreachable; - _ = try self.allocateAtom(got_atom, .{ + const got_sym = &self.locals.items[got_atom.local_sym_index]; + const vaddr = try self.allocateAtom(got_atom, @sizeOf(u64), 8, .{ .seg = self.data_const_segment_cmd_index.?, .sect = self.got_section_index.?, }); + got_sym.n_value = vaddr; + got_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(.{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, + }).? + 1); } return symbol; @@ -3402,7 +3366,10 @@ pub fn freeDecl(self: *MachO, decl: *Module.Decl) void { log.debug("freeDecl {*}", .{decl}); _ = self.decls.swapRemove(decl); // Appending to free lists is allowed to fail because the free lists are heuristics based anyway. - self.freeTextBlock(&decl.link.macho); + self.freeAtom(&decl.link.macho, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }); if (decl.link.macho.local_sym_index != 0) { self.locals_free_list.append(self.base.allocator, decl.link.macho.local_sym_index) catch {}; @@ -3412,7 +3379,7 @@ pub fn freeDecl(self: *MachO, decl: *Module.Decl) void { decl.link.macho.local_sym_index = 0; } if (self.d_sym) |*ds| { - // TODO make this logic match freeTextBlock. Maybe abstract the logic + // TODO make this logic match freeAtom. Maybe abstract the logic // out since the same thing is desired for both. _ = ds.dbg_line_fn_free_list.remove(&decl.fn_link.macho); if (decl.fn_link.macho.prev) |prev| { @@ -3947,7 +3914,7 @@ fn allocateSection( .sect = index, }; _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); + try self.atom_free_lists.putNoClobber(self.base.allocator, match, .{}); self.load_commands_dirty = true; self.sections_order_dirty = true; @@ -4113,106 +4080,105 @@ fn getSectionMaxAlignment(self: *MachO, segment_id: u16, start_sect_id: u16) !u3 return max_alignment; } -fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, alignment: u64) !u64 { - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const text_section = &text_segment.sections.items[self.text_section_index.?]; - const match = MatchingSection{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }; - var text_block_free_list = self.block_free_lists.get(match).?; - const new_block_ideal_capacity = padToIdeal(new_block_size); +fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, match: MatchingSection) !u64 { + const seg = &self.load_commands.items[match.seg].Segment; + const sect = &seg.sections.items[match.sect]; + var free_list = self.atom_free_lists.get(match).?; + const needs_padding = match.seg == self.text_segment_cmd_index.? and match.sect == self.text_section_index.?; + const new_atom_ideal_capacity = if (needs_padding) padToIdeal(new_atom_size) else new_atom_size; - // We use these to indicate our intention to update metadata, placing the new block, + // We use these to indicate our intention to update metadata, placing the new atom, // and possibly removing a free list node. // It would be simpler to do it inside the for loop below, but that would cause a // problem if an error was returned later in the function. So this action // is actually carried out at the end of the function, when errors are no longer possible. - var block_placement: ?*TextBlock = null; + var atom_placement: ?*Atom = null; var free_list_removal: ?usize = null; // First we look for an appropriately sized free list node. // The list is unordered. We'll just take the first thing that works. var vaddr = blk: { var i: usize = 0; - while (i < text_block_free_list.items.len) { - const big_block = text_block_free_list.items[i]; - // We now have a pointer to a live text block that has too much capacity. - // Is it enough that we could fit this new text block? - const sym = self.locals.items[big_block.local_sym_index]; - const capacity = big_block.capacity(self.*); - const ideal_capacity = padToIdeal(capacity); + while (i < free_list.items.len) { + const big_atom = free_list.items[i]; + // We now have a pointer to a live atom that has too much capacity. + // Is it enough that we could fit this new atom? + const sym = self.locals.items[big_atom.local_sym_index]; + const capacity = big_atom.capacity(self.*); + const ideal_capacity = if (needs_padding) padToIdeal(capacity) else capacity; const ideal_capacity_end_vaddr = sym.n_value + ideal_capacity; const capacity_end_vaddr = sym.n_value + capacity; - const new_start_vaddr_unaligned = capacity_end_vaddr - new_block_ideal_capacity; + const new_start_vaddr_unaligned = capacity_end_vaddr - new_atom_ideal_capacity; const new_start_vaddr = mem.alignBackwardGeneric(u64, new_start_vaddr_unaligned, alignment); if (new_start_vaddr < ideal_capacity_end_vaddr) { // Additional bookkeeping here to notice if this free list node - // should be deleted because the block that it points to has grown to take up + // should be deleted because the atom that it points to has grown to take up // more of the extra capacity. - if (!big_block.freeListEligible(self.*)) { - const bl = text_block_free_list.swapRemove(i); + if (!big_atom.freeListEligible(self.*)) { + const bl = free_list.swapRemove(i); bl.deinit(self.base.allocator); } else { i += 1; } continue; } - // At this point we know that we will place the new block here. But the + // At this point we know that we will place the new atom here. But the // remaining question is whether there is still yet enough capacity left // over for there to still be a free list node. const remaining_capacity = new_start_vaddr - ideal_capacity_end_vaddr; const keep_free_list_node = remaining_capacity >= min_text_capacity; // Set up the metadata to be updated, after errors are no longer possible. - block_placement = big_block; + atom_placement = big_atom; if (!keep_free_list_node) { free_list_removal = i; } break :blk new_start_vaddr; - } else if (self.blocks.get(match)) |last| { + } else if (self.atoms.get(match)) |last| { const last_symbol = self.locals.items[last.local_sym_index]; - // TODO We should pad out the excess capacity with NOPs. For executables, - // no padding seems to be OK, but it will probably not be for objects. - const ideal_capacity = padToIdeal(last.size); + const ideal_capacity = if (needs_padding) padToIdeal(last.size) else last.size; const ideal_capacity_end_vaddr = last_symbol.n_value + ideal_capacity; const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, alignment); - block_placement = last; + atom_placement = last; break :blk new_start_vaddr; } else { - break :blk text_section.addr; + break :blk mem.alignForwardGeneric(u64, sect.addr, alignment); } }; - const expand_text_section = block_placement == null or block_placement.?.next == null; - if (expand_text_section) { - const needed_size = @intCast(u32, (vaddr + new_block_size) - text_section.addr); + const expand_section = atom_placement == null or atom_placement.?.next == null; + if (expand_section) { + const needed_size = @intCast(u32, (vaddr + new_atom_size) - sect.addr); try self.growSection(match, needed_size); - _ = try self.blocks.put(self.base.allocator, match, text_block); - text_section.size = needed_size; + _ = try self.atoms.put(self.base.allocator, match, atom); + sect.size = needed_size; self.load_commands_dirty = true; } const align_pow = @intCast(u32, math.log2(alignment)); - text_section.@"align" = math.max(text_section.@"align", align_pow); - text_block.size = new_block_size; - - if (text_block.prev) |prev| { - prev.next = text_block.next; + if (sect.@"align" < align_pow) { + sect.@"align" = align_pow; + self.load_commands_dirty = true; } - if (text_block.next) |next| { - next.prev = text_block.prev; + atom.size = new_atom_size; + atom.alignment = align_pow; + + if (atom.prev) |prev| { + prev.next = atom.next; + } + if (atom.next) |next| { + next.prev = atom.prev; } - if (block_placement) |big_block| { - text_block.prev = big_block; - text_block.next = big_block.next; - big_block.next = text_block; + if (atom_placement) |big_atom| { + atom.prev = big_atom; + atom.next = big_atom.next; + big_atom.next = atom; } else { - text_block.prev = null; - text_block.next = null; + atom.prev = null; + atom.next = null; } if (free_list_removal) |i| { - _ = text_block_free_list.swapRemove(i); + _ = free_list.swapRemove(i); } return vaddr; @@ -4319,10 +4285,10 @@ fn writeDyldInfoData(self: *MachO) !void { defer lazy_bind_pointers.deinit(); { - var it = self.blocks.iterator(); + var it = self.atoms.iterator(); while (it.next()) |entry| { const match = entry.key_ptr.*; - var atom: *TextBlock = entry.value_ptr.*; + var atom: *Atom = entry.value_ptr.*; if (match.seg == self.text_segment_cmd_index.?) continue; // __TEXT is non-writable @@ -4444,7 +4410,7 @@ fn writeDyldInfoData(self: *MachO) !void { } fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { - const last_atom = self.blocks.get(.{ + const last_atom = self.atoms.get(.{ .seg = self.text_segment_cmd_index.?, .sect = self.stub_helper_section_index.?, }) orelse return; @@ -4538,25 +4504,25 @@ fn writeDices(self: *MachO) !void { var buf = std.ArrayList(u8).init(self.base.allocator); defer buf.deinit(); - var block: *TextBlock = self.blocks.get(.{ + var atom: *Atom = self.atoms.get(.{ .seg = self.text_segment_cmd_index orelse return, .sect = self.text_section_index orelse return, }) orelse return; - while (block.prev) |prev| { - block = prev; + while (atom.prev) |prev| { + atom = prev; } const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].Segment; const text_sect = text_seg.sections.items[self.text_section_index.?]; while (true) { - if (block.dices.items.len > 0) { - const sym = self.locals.items[block.local_sym_index]; + if (atom.dices.items.len > 0) { + const sym = self.locals.items[atom.local_sym_index]; const base_off = try math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset); - try buf.ensureUnusedCapacity(block.dices.items.len * @sizeOf(macho.data_in_code_entry)); - for (block.dices.items) |dice| { + try buf.ensureUnusedCapacity(atom.dices.items.len * @sizeOf(macho.data_in_code_entry)); + for (atom.dices.items) |dice| { const rebased_dice = macho.data_in_code_entry{ .offset = base_off + dice.offset, .length = dice.length, @@ -4566,8 +4532,8 @@ fn writeDices(self: *MachO) !void { } } - if (block.next) |next| { - block = next; + if (atom.next) |next| { + atom = next; } else break; } From 054fe96bcd84cc4f0536696636531b192224df48 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 11 Sep 2021 12:25:00 +0200 Subject: [PATCH 72/78] macho: enable tracy in more places within the linker --- src/link/MachO.zig | 9 +++++++++ src/link/MachO/Atom.zig | 7 +++++++ src/link/MachO/Object.zig | 7 +++++++ 3 files changed, 23 insertions(+) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 4436391979..0115a9d2b3 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2579,6 +2579,9 @@ fn resolveDyldStubBinder(self: *MachO) !void { } fn parseObjectsIntoAtoms(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + var parsed_atoms = Object.ParsedAtoms.init(self.base.allocator); defer parsed_atoms.deinit(); @@ -3933,6 +3936,9 @@ fn findFreeSpace(self: MachO, segment_id: u16, alignment: u64, start: ?u64) u64 } fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { + const tracy = trace(@src()); + defer tracy.end(); + const seg = &self.load_commands.items[match.seg].Segment; const sect = &seg.sections.items[match.sect]; @@ -4081,6 +4087,9 @@ fn getSectionMaxAlignment(self: *MachO, segment_id: u16, start_sect_id: u16) !u3 } fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, match: MatchingSection) !u64 { + const tracy = trace(@src()); + defer tracy.end(); + const seg = &self.load_commands.items[match.seg].Segment; const sect = &seg.sections.items[match.sect]; var free_list = self.atom_free_lists.get(match).?; diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 41e34bc6f7..7566670488 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -10,6 +10,7 @@ const macho = std.macho; const math = std.math; const mem = std.mem; const meta = std.meta; +const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; @@ -701,6 +702,9 @@ fn initRelocFromObject(rel: macho.relocation_info, context: RelocContext) !Reloc } pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocContext) !void { + const tracy = trace(@src()); + defer tracy.end(); + const filtered_relocs = filterRelocs(relocs, context.base_offset, context.base_offset + self.size); var it = RelocIterator{ .buffer = filtered_relocs, @@ -1141,6 +1145,9 @@ fn parseLoad(self: Atom, rel: macho.relocation_info, out: *Relocation) void { } pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + for (self.relocs.items) |rel| { log.debug("relocating {}", .{rel}); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 0f68890e74..aae3a40bd1 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -14,6 +14,7 @@ const sort = std.sort; const commands = @import("commands.zig"); const segmentName = commands.segmentName; const sectionName = commands.sectionName; +const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); @@ -344,6 +345,9 @@ const AtomParser = struct { pub fn next(self: *AtomParser, context: Context) !?*Atom { if (self.index == self.nlists.len) return null; + const tracy = trace(@src()); + defer tracy.end(); + var aliases = std.ArrayList(NlistWithIndex).init(context.allocator); defer aliases.deinit(); @@ -465,6 +469,9 @@ pub fn parseIntoAtoms( object_id: u16, macho_file: *MachO, ) !ParsedAtoms { + const tracy = trace(@src()); + defer tracy.end(); + var parsed_atoms = ParsedAtoms.init(allocator); const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; From 7aa6064638322286b5aed940c17da1c8f8fad81a Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 11 Sep 2021 14:18:09 +0200 Subject: [PATCH 73/78] macho: insert rpaths upon parsing Also, insert empty data-in-code lc in populateMissingMetadata fn. --- src/link/MachO.zig | 66 +++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 36 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 0115a9d2b3..3eb2ab1d7d 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -688,7 +688,22 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { var rpath_table = std.StringArrayHashMap(void).init(arena); for (self.base.options.rpath_list) |rpath| { if (rpath_table.contains(rpath)) continue; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.rpath_command) + rpath.len + 1, + @sizeOf(u64), + )); + var rpath_cmd = commands.emptyGenericCommandWithData(macho.rpath_command{ + .cmd = macho.LC_RPATH, + .cmdsize = cmdsize, + .path = @sizeOf(macho.rpath_command), + }); + rpath_cmd.data = try self.base.allocator.alloc(u8, cmdsize - rpath_cmd.inner.path); + mem.set(u8, rpath_cmd.data, 0); + mem.copy(u8, rpath_cmd.data, rpath); + try self.load_commands.append(self.base.allocator, .{ .Rpath = rpath_cmd }); try rpath_table.putNoClobber(rpath, {}); + self.load_commands_dirty = true; } if (self.base.options.verbose_link) { @@ -763,9 +778,7 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { } try self.resolveSymbols(); - try self.addRpathLCs(rpath_table.keys()); try self.addLoadDylibLCs(); - try self.addDataInCodeLC(); try self.addCodeSignatureLC(); try self.parseObjectsIntoAtoms(); @@ -2720,20 +2733,6 @@ fn parseObjectsIntoAtoms(self: *MachO) !void { } } -fn addDataInCodeLC(self: *MachO) !void { - if (self.data_in_code_cmd_index != null) return; - self.data_in_code_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .LinkeditData = .{ - .cmd = macho.LC_DATA_IN_CODE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - self.load_commands_dirty = true; -} - fn addCodeSignatureLC(self: *MachO) !void { if (self.code_signature_cmd_index != null or !self.requires_adhoc_codesig) return; self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); @@ -2748,26 +2747,6 @@ fn addCodeSignatureLC(self: *MachO) !void { self.load_commands_dirty = true; } -fn addRpathLCs(self: *MachO, rpaths: []const []const u8) !void { - for (rpaths) |rpath| { - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.rpath_command) + rpath.len + 1, - @sizeOf(u64), - )); - var rpath_cmd = commands.emptyGenericCommandWithData(macho.rpath_command{ - .cmd = macho.LC_RPATH, - .cmdsize = cmdsize, - .path = @sizeOf(macho.rpath_command), - }); - rpath_cmd.data = try self.base.allocator.alloc(u8, cmdsize - rpath_cmd.inner.path); - mem.set(u8, rpath_cmd.data, 0); - mem.copy(u8, rpath_cmd.data, rpath); - try self.load_commands.append(self.base.allocator, .{ .Rpath = rpath_cmd }); - self.load_commands_dirty = true; - } -} - fn addLoadDylibLCs(self: *MachO) !void { for (self.referenced_dylibs.keys()) |id| { const dylib = self.dylibs.items[id]; @@ -3270,6 +3249,8 @@ pub fn updateDeclExports( decl: *Module.Decl, exports: []const *Module.Export, ) !void { + // TODO If we are exporting with global linkage, check for already defined globals and flag + // symbol duplicate/collision! if (build_options.skip_non_native and builtin.object_format != .macho) { @panic("Attempted to compile for object format that was disabled by build configuration"); } @@ -3866,6 +3847,19 @@ pub fn populateMissingMetadata(self: *MachO) !void { try self.load_commands.append(self.base.allocator, .{ .Uuid = uuid_cmd }); self.load_commands_dirty = true; } + + if (self.data_in_code_cmd_index == null) { + self.data_in_code_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .LinkeditData = .{ + .cmd = macho.LC_DATA_IN_CODE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = 0, + .datasize = 0, + }, + }); + self.load_commands_dirty = true; + } } const AllocateSectionOpts = struct { From 1965465ced82dc9c0fb93ea9182f196e6d6f4409 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 13 Sep 2021 13:19:08 +0200 Subject: [PATCH 74/78] macho: split resolveSymbols into standalone functions --- src/link/MachO.zig | 362 +++++++++++++++++++++++---------------------- 1 file changed, 185 insertions(+), 177 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 3eb2ab1d7d..6362e6b9aa 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -147,13 +147,14 @@ unresolved: std.AutoArrayHashMapUnmanaged(u32, enum { stub, got, }) = .{}, +tentatives: std.AutoArrayHashMapUnmanaged(u32, void) = .{}, locals_free_list: std.ArrayListUnmanaged(u32) = .{}, globals_free_list: std.ArrayListUnmanaged(u32) = .{}, -dyld_private_sym_index: ?u32 = null, dyld_stub_binder_index: ?u32 = null, -stub_preamble_sym_index: ?u32 = null, +dyld_private_atom: ?*Atom = null, +stub_helper_preamble_atom: ?*Atom = null, strtab: std.ArrayListUnmanaged(u8) = .{}, strtab_dir: std.HashMapUnmanaged(u32, u32, StringIndexContext, std.hash_map.default_max_load_percentage) = .{}, @@ -777,10 +778,31 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { sect.offset = self.tlv_bss_file_offset; } - try self.resolveSymbols(); - try self.addLoadDylibLCs(); + for (self.objects.items) |_, object_id| { + try self.resolveSymbolsInObject(@intCast(u16, object_id)); + } + + try self.resolveSymbolsInArchives(); + try self.resolveDyldStubBinder(); + try self.createDyldPrivateAtom(); + try self.createStubHelperPreambleAtom(); + try self.resolveSymbolsInDylibs(); + try self.createDsoHandleAtom(); try self.addCodeSignatureLC(); + for (self.unresolved.keys()) |index| { + const sym = self.undefs.items[index]; + const sym_name = self.getString(sym.n_strx); + const resolv = self.symbol_resolver.get(sym.n_strx) orelse unreachable; + + log.err("undefined reference to symbol '{s}'", .{sym_name}); + log.err(" first referenced in '{s}'", .{self.objects.items[resolv.file].name}); + } + if (self.unresolved.count() > 0) { + return error.UndefinedSymbolReference; + } + + try self.createTentativeDefAtoms(); try self.parseObjectsIntoAtoms(); try self.allocateGlobalSymbols(); try self.writeAtoms(); @@ -1055,6 +1077,7 @@ pub fn parseDylib(self: *MachO, path: []const u8, opts: DylibCreateOpts) ParseDy try self.dylibs_map.putNoClobber(self.base.allocator, dylib.id.?.name, dylib_id); if (!(opts.is_dependent or self.referenced_dylibs.contains(dylib_id))) { + try self.addLoadDylibLC(dylib_id); try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); } @@ -1789,20 +1812,31 @@ pub fn createGotAtom(self: *MachO, key: GotIndirectionKey) !*Atom { return atom; } -fn createDyldPrivateAtom(self: *MachO) !*Atom { +fn createDyldPrivateAtom(self: *MachO) !void { + if (self.dyld_private_atom != null) return; const local_sym_index = @intCast(u32, self.locals.items.len); - try self.locals.append(self.base.allocator, .{ + const sym = try self.locals.addOne(self.base.allocator); + sym.* = .{ .n_strx = try self.makeString("l_zld_dyld_private"), .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, .n_value = 0, - }); - self.dyld_private_sym_index = local_sym_index; - return self.createEmptyAtom(local_sym_index, @sizeOf(u64), 3); + }; + const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), 3); + self.dyld_private_atom = atom; + const match = MatchingSection{ + .seg = self.data_segment_cmd_index.?, + .sect = self.data_section_index.?, + }; + const vaddr = try self.allocateAtom(atom, @sizeOf(u64), 8, match); + sym.n_value = vaddr; + sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + log.debug("allocated {s} atom at 0x{x}", .{ self.getString(sym.n_strx), vaddr }); } -fn createStubHelperPreambleAtom(self: *MachO) !*Atom { +fn createStubHelperPreambleAtom(self: *MachO) !void { + if (self.stub_helper_preamble_atom != null) return; const arch = self.base.options.target.cpu.arch; const size: u64 = switch (arch) { .x86_64 => 15, @@ -1815,14 +1849,16 @@ fn createStubHelperPreambleAtom(self: *MachO) !*Atom { else => unreachable, }; const local_sym_index = @intCast(u32, self.locals.items.len); - try self.locals.append(self.base.allocator, .{ + const sym = try self.locals.addOne(self.base.allocator); + sym.* = .{ .n_strx = try self.makeString("l_zld_stub_preamble"), .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, .n_value = 0, - }); + }; const atom = try self.createEmptyAtom(local_sym_index, size, alignment); + const dyld_private_sym_index = self.dyld_private_atom.?.local_sym_index; switch (arch) { .x86_64 => { try atom.relocs.ensureUnusedCapacity(self.base.allocator, 2); @@ -1833,7 +1869,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !*Atom { atom.relocs.appendAssumeCapacity(.{ .offset = 3, .where = .local, - .where_index = self.dyld_private_sym_index.?, + .where_index = dyld_private_sym_index, .payload = .{ .signed = .{ .addend = 0, @@ -1866,7 +1902,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !*Atom { atom.relocs.appendAssumeCapacity(.{ .offset = 0, .where = .local, - .where_index = self.dyld_private_sym_index.?, + .where_index = dyld_private_sym_index, .payload = .{ .page = .{ .kind = .page, @@ -1879,7 +1915,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !*Atom { atom.relocs.appendAssumeCapacity(.{ .offset = 4, .where = .local, - .where_index = self.dyld_private_sym_index.?, + .where_index = dyld_private_sym_index, .payload = .{ .page_off = .{ .kind = .page, @@ -1931,8 +1967,16 @@ fn createStubHelperPreambleAtom(self: *MachO) !*Atom { }, else => unreachable, } - self.stub_preamble_sym_index = local_sym_index; - return atom; + self.stub_helper_preamble_atom = atom; + const match = MatchingSection{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, + }; + const alignment_pow_2 = try math.powi(u32, 2, atom.alignment); + const vaddr = try self.allocateAtom(atom, atom.size, alignment_pow_2, match); + sym.n_value = vaddr; + sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + log.debug("allocated {s} atom at 0x{x}", .{ self.getString(sym.n_strx), vaddr }); } pub fn createStubHelperAtom(self: *MachO) !*Atom { @@ -1968,7 +2012,7 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { atom.relocs.appendAssumeCapacity(.{ .offset = 6, .where = .local, - .where_index = self.stub_preamble_sym_index.?, + .where_index = self.stub_helper_preamble_atom.?.local_sym_index, .payload = .{ .branch = .{ .arch = arch }, }, @@ -1988,7 +2032,7 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { atom.relocs.appendAssumeCapacity(.{ .offset = 4, .where = .local, - .where_index = self.stub_preamble_sym_index.?, + .where_index = self.stub_helper_preamble_atom.?.local_sym_index, .payload = .{ .branch = .{ .arch = arch }, }, @@ -2108,11 +2152,99 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { return atom; } -fn resolveSymbolsInObject( - self: *MachO, - object_id: u16, - tentatives: *std.AutoArrayHashMap(u32, void), -) !void { +fn createTentativeDefAtoms(self: *MachO) !void { + if (self.tentatives.count() == 0) return; + // Convert any tentative definition into a regular symbol and allocate + // text blocks for each tentative defintion. + while (self.tentatives.popOrNull()) |entry| { + const match = MatchingSection{ + .seg = self.data_segment_cmd_index.?, + .sect = self.bss_section_index.?, + }; + _ = try self.section_ordinals.getOrPut(self.base.allocator, match); + + const global_sym = &self.globals.items[entry.key]; + const size = global_sym.n_value; + const alignment = (global_sym.n_desc >> 8) & 0x0f; + + global_sym.n_value = 0; + global_sym.n_desc = 0; + global_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + + const local_sym_index = @intCast(u32, self.locals.items.len); + const local_sym = try self.locals.addOne(self.base.allocator); + local_sym.* = .{ + .n_strx = global_sym.n_strx, + .n_type = macho.N_SECT, + .n_sect = global_sym.n_sect, + .n_desc = 0, + .n_value = 0, + }; + + const resolv = self.symbol_resolver.getPtr(local_sym.n_strx) orelse unreachable; + resolv.local_sym_index = local_sym_index; + + const atom = try self.createEmptyAtom(local_sym_index, size, alignment); + const alignment_pow_2 = try math.powi(u32, 2, alignment); + const vaddr = try self.allocateAtom(atom, size, alignment_pow_2, match); + local_sym.n_value = vaddr; + global_sym.n_value = vaddr; + } +} + +fn createDsoHandleAtom(self: *MachO) !void { + if (self.strtab_dir.getAdapted(@as([]const u8, "___dso_handle"), StringSliceAdapter{ + .strtab = &self.strtab, + })) |n_strx| blk: { + const resolv = self.symbol_resolver.getPtr(n_strx) orelse break :blk; + if (resolv.where != .undef) break :blk; + + const undef = &self.undefs.items[resolv.where_index]; + const match: MatchingSection = .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, + }; + const local_sym_index = @intCast(u32, self.locals.items.len); + var nlist = macho.nlist_64{ + .n_strx = undef.n_strx, + .n_type = macho.N_SECT, + .n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1), + .n_desc = 0, + .n_value = 0, + }; + try self.locals.append(self.base.allocator, nlist); + const global_sym_index = @intCast(u32, self.globals.items.len); + nlist.n_type |= macho.N_EXT; + nlist.n_desc = macho.N_WEAK_DEF; + try self.globals.append(self.base.allocator, nlist); + + _ = self.unresolved.fetchSwapRemove(resolv.where_index); + + undef.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + resolv.* = .{ + .where = .global, + .where_index = global_sym_index, + .local_sym_index = local_sym_index, + }; + + // We create an empty atom for this symbol. + // TODO perhaps we should special-case special symbols? Create a separate + // linked list of atoms? + const atom = try self.createEmptyAtom(local_sym_index, 0, 0); + const sym = &self.locals.items[local_sym_index]; + const vaddr = try self.allocateAtom(atom, 0, 1, match); + sym.n_value = vaddr; + atom.dirty = false; // We don't really want to write it to file. + } +} + +fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { const object = &self.objects.items[object_id]; log.debug("resolving symbols in '{s}'", .{object.name}); @@ -2184,7 +2316,7 @@ fn resolveSymbolsInObject( const global = &self.globals.items[resolv.where_index]; if (symbolIsTentative(global.*)) { - _ = tentatives.fetchSwapRemove(resolv.where_index); + _ = self.tentatives.fetchSwapRemove(resolv.where_index); } else if (!(symbolIsWeakDef(sym) or symbolIsPext(sym)) and !(symbolIsWeakDef(global.*) or symbolIsPext(global.*))) { @@ -2236,7 +2368,7 @@ fn resolveSymbolsInObject( .where_index = global_sym_index, .file = object_id, }); - _ = try tentatives.getOrPut(global_sym_index); + _ = try self.tentatives.getOrPut(self.base.allocator, global_sym_index); continue; }; @@ -2260,7 +2392,7 @@ fn resolveSymbolsInObject( .n_desc = sym.n_desc, .n_value = sym.n_value, }); - _ = try tentatives.getOrPut(global_sym_index); + _ = try self.tentatives.getOrPut(self.base.allocator, global_sym_index); resolv.* = .{ .where = .global, .where_index = global_sym_index, @@ -2298,16 +2430,9 @@ fn resolveSymbolsInObject( } } -fn resolveSymbols(self: *MachO) !void { - var tentatives = std.AutoArrayHashMap(u32, void).init(self.base.allocator); - defer tentatives.deinit(); +fn resolveSymbolsInArchives(self: *MachO) !void { + if (self.archives.items.len == 0) return; - // First pass, resolve symbols in provided objects. - for (self.objects.items) |_, object_id| { - try self.resolveSymbolsInObject(@intCast(u16, object_id), &tentatives); - } - - // Second pass, resolve symbols in static libraries. var next_sym: usize = 0; loop: while (next_sym < self.unresolved.count()) { const sym = self.undefs.items[self.unresolved.keys()[next_sym]]; @@ -2324,74 +2449,19 @@ fn resolveSymbols(self: *MachO) !void { const object_id = @intCast(u16, self.objects.items.len); const object = try self.objects.addOne(self.base.allocator); object.* = try archive.parseObject(self.base.allocator, self.base.options.target, offsets.items[0]); - try self.resolveSymbolsInObject(object_id, &tentatives); + try self.resolveSymbolsInObject(object_id); continue :loop; } next_sym += 1; } +} - // Convert any tentative definition into a regular symbol and allocate - // text blocks for each tentative defintion. - while (tentatives.popOrNull()) |entry| { - const sym = &self.globals.items[entry.key]; - const match = MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); +fn resolveSymbolsInDylibs(self: *MachO) !void { + if (self.dylibs.items.len == 0) return; - const size = sym.n_value; - const alignment = (sym.n_desc >> 8) & 0x0f; - - sym.n_value = 0; - sym.n_desc = 0; - sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - var local_sym = sym.*; - local_sym.n_type = macho.N_SECT; - - const local_sym_index = @intCast(u32, self.locals.items.len); - try self.locals.append(self.base.allocator, local_sym); - - const resolv = self.symbol_resolver.getPtr(sym.n_strx) orelse unreachable; - resolv.local_sym_index = local_sym_index; - - const atom = try self.createEmptyAtom(local_sym_index, size, alignment); - const alignment_pow_2 = try math.powi(u32, 2, alignment); - const vaddr = try self.allocateAtom(atom, size, alignment_pow_2, match); - sym.n_value = vaddr; - } - - try self.resolveDyldStubBinder(); - { - const match = MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }; - const atom = try self.createDyldPrivateAtom(); - const sym = &self.locals.items[atom.local_sym_index]; - const vaddr = try self.allocateAtom(atom, @sizeOf(u64), 8, match); - sym.n_value = vaddr; - sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - log.debug("allocated {s} atom at 0x{x}", .{ self.getString(sym.n_strx), vaddr }); - } - { - const match = MatchingSection{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, - }; - const atom = try self.createStubHelperPreambleAtom(); - const sym = &self.locals.items[atom.local_sym_index]; - const alignment = try math.powi(u32, 2, atom.alignment); - const vaddr = try self.allocateAtom(atom, atom.size, alignment, match); - sym.n_value = vaddr; - sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - log.debug("allocated {s} atom at 0x{x}", .{ self.getString(sym.n_strx), vaddr }); - } - - // Third pass, resolve symbols in dynamic libraries. - next_sym = 0; + var next_sym: usize = 0; loop: while (next_sym < self.unresolved.count()) { const sym = self.undefs.items[self.unresolved.keys()[next_sym]]; const sym_name = self.getString(sym.n_strx); @@ -2401,6 +2471,7 @@ fn resolveSymbols(self: *MachO) !void { const dylib_id = @intCast(u16, id); if (!self.referenced_dylibs.contains(dylib_id)) { + try self.addLoadDylibLC(dylib_id); try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); } @@ -2468,69 +2539,6 @@ fn resolveSymbols(self: *MachO) !void { next_sym += 1; } - - // Fourth pass, handle synthetic symbols and flag any undefined references. - if (self.strtab_dir.getAdapted(@as([]const u8, "___dso_handle"), StringSliceAdapter{ - .strtab = &self.strtab, - })) |n_strx| blk: { - const resolv = self.symbol_resolver.getPtr(n_strx) orelse break :blk; - if (resolv.where != .undef) break :blk; - - const undef = &self.undefs.items[resolv.where_index]; - const match: MatchingSection = .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }; - const local_sym_index = @intCast(u32, self.locals.items.len); - var nlist = macho.nlist_64{ - .n_strx = undef.n_strx, - .n_type = macho.N_SECT, - .n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1), - .n_desc = 0, - .n_value = 0, - }; - try self.locals.append(self.base.allocator, nlist); - const global_sym_index = @intCast(u32, self.globals.items.len); - nlist.n_type |= macho.N_EXT; - nlist.n_desc = macho.N_WEAK_DEF; - try self.globals.append(self.base.allocator, nlist); - - _ = self.unresolved.fetchSwapRemove(resolv.where_index); - - undef.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - resolv.* = .{ - .where = .global, - .where_index = global_sym_index, - .local_sym_index = local_sym_index, - }; - - // We create an empty atom for this symbol. - // TODO perhaps we should special-case special symbols? Create a separate - // linked list of atoms? - const atom = try self.createEmptyAtom(local_sym_index, 0, 0); - const sym = &self.locals.items[local_sym_index]; - const vaddr = try self.allocateAtom(atom, 0, 1, match); - sym.n_value = vaddr; - atom.dirty = false; // We don't really want to write it to file. - } - - for (self.unresolved.keys()) |index| { - const sym = self.undefs.items[index]; - const sym_name = self.getString(sym.n_strx); - const resolv = self.symbol_resolver.get(sym.n_strx) orelse unreachable; - - log.err("undefined reference to symbol '{s}'", .{sym_name}); - log.err(" first referenced in '{s}'", .{self.objects.items[resolv.file].name}); - } - - if (self.unresolved.count() > 0) - return error.UndefinedSymbolReference; } fn resolveDyldStubBinder(self: *MachO) !void { @@ -2557,6 +2565,7 @@ fn resolveDyldStubBinder(self: *MachO) !void { const dylib_id = @intCast(u16, id); if (!self.referenced_dylibs.contains(dylib_id)) { + try self.addLoadDylibLC(dylib_id); try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); } @@ -2733,6 +2742,21 @@ fn parseObjectsIntoAtoms(self: *MachO) !void { } } +fn addLoadDylibLC(self: *MachO, id: u16) !void { + const dylib = self.dylibs.items[id]; + const dylib_id = dylib.id orelse unreachable; + var dylib_cmd = try commands.createLoadDylibCommand( + self.base.allocator, + dylib_id.name, + dylib_id.timestamp, + dylib_id.current_version, + dylib_id.compatibility_version, + ); + errdefer dylib_cmd.deinit(self.base.allocator); + try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); + self.load_commands_dirty = true; +} + fn addCodeSignatureLC(self: *MachO) !void { if (self.code_signature_cmd_index != null or !self.requires_adhoc_codesig) return; self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); @@ -2747,23 +2771,6 @@ fn addCodeSignatureLC(self: *MachO) !void { self.load_commands_dirty = true; } -fn addLoadDylibLCs(self: *MachO) !void { - for (self.referenced_dylibs.keys()) |id| { - const dylib = self.dylibs.items[id]; - const dylib_id = dylib.id orelse unreachable; - var dylib_cmd = try commands.createLoadDylibCommand( - self.base.allocator, - dylib_id.name, - dylib_id.timestamp, - dylib_id.current_version, - dylib_id.compatibility_version, - ); - errdefer dylib_cmd.deinit(self.base.allocator); - try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); - self.load_commands_dirty = true; - } -} - fn setEntryPoint(self: *MachO) !void { if (self.base.options.output_mode != .Exe) return; @@ -2807,6 +2814,7 @@ pub fn deinit(self: *MachO) void { self.locals_free_list.deinit(self.base.allocator); self.symbol_resolver.deinit(self.base.allocator); self.unresolved.deinit(self.base.allocator); + self.tentatives.deinit(self.base.allocator); for (self.objects.items) |*object| { object.deinit(self.base.allocator); @@ -4417,7 +4425,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { .seg = self.text_segment_cmd_index.?, .sect = self.stub_helper_section_index.?, }) orelse return; - if (last_atom.local_sym_index == self.stub_preamble_sym_index.?) return; + if (last_atom == self.stub_helper_preamble_atom.?) return; // Because we insert lazy binding opcodes in reverse order (from last to the first atom), // we need reverse the order of atom traversal here as well. From 4c36da1047a83019ce7af653a32938c9d1ea616d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 13 Sep 2021 17:00:36 +0200 Subject: [PATCH 75/78] macho: fix incremental compilation --- src/link/MachO.zig | 462 +++++++++++++++++++++----------------- src/link/MachO/Atom.zig | 11 + src/link/MachO/Object.zig | 2 + 3 files changed, 267 insertions(+), 208 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 6362e6b9aa..1e32150ae7 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -170,6 +170,8 @@ sections_order_dirty: bool = false, has_dices: bool = false, has_stabs: bool = false, +args_digest: [Cache.hex_digest_len]u8 = undefined, + section_ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}, /// A list of atoms that have surplus capacity. This list can have false @@ -334,31 +336,31 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio return self; } - if (!options.strip and options.module != null) { - // Create dSYM bundle. - const dir = options.module.?.zig_cache_artifact_directory; - log.debug("creating {s}.dSYM bundle in {s}", .{ sub_path, dir.path }); + // if (!options.strip and options.module != null) { + // // Create dSYM bundle. + // const dir = options.module.?.zig_cache_artifact_directory; + // log.debug("creating {s}.dSYM bundle in {s}", .{ sub_path, dir.path }); - const d_sym_path = try fmt.allocPrint( - allocator, - "{s}.dSYM" ++ fs.path.sep_str ++ "Contents" ++ fs.path.sep_str ++ "Resources" ++ fs.path.sep_str ++ "DWARF", - .{sub_path}, - ); - defer allocator.free(d_sym_path); + // const d_sym_path = try fmt.allocPrint( + // allocator, + // "{s}.dSYM" ++ fs.path.sep_str ++ "Contents" ++ fs.path.sep_str ++ "Resources" ++ fs.path.sep_str ++ "DWARF", + // .{sub_path}, + // ); + // defer allocator.free(d_sym_path); - var d_sym_bundle = try dir.handle.makeOpenPath(d_sym_path, .{}); - defer d_sym_bundle.close(); + // var d_sym_bundle = try dir.handle.makeOpenPath(d_sym_path, .{}); + // defer d_sym_bundle.close(); - const d_sym_file = try d_sym_bundle.createFile(sub_path, .{ - .truncate = false, - .read = true, - }); + // const d_sym_file = try d_sym_bundle.createFile(sub_path, .{ + // .truncate = false, + // .read = true, + // }); - self.d_sym = .{ - .base = self, - .file = d_sym_file, - }; - } + // self.d_sym = .{ + // .base = self, + // .file = d_sym_file, + // }; + // } // Index 0 is always a null symbol. try self.locals.append(allocator, .{ @@ -555,218 +557,256 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.strtab.append(self.base.allocator, 0); } - // Positional arguments to the linker such as object files and static archives. - var positionals = std.ArrayList([]const u8).init(arena); + const needs_full_relink = blk: { + if (use_stage1) break :blk true; - try positionals.appendSlice(self.base.options.objects); + var hh: Cache.HashHelper = .{}; + hh.addListOfBytes(self.base.options.objects); + for (comp.c_object_table.keys()) |key| { + hh.addBytes(key.status.success.object_path); + } + hh.addOptionalBytes(module_obj_path); + if (comp.compiler_rt_static_lib) |lib| { + hh.addBytes(lib.full_object_path); + } + if (self.base.options.link_libcpp) { + hh.addBytes(comp.libcxxabi_static_lib.?.full_object_path); + hh.addBytes(comp.libcxx_static_lib.?.full_object_path); + } + hh.addListOfBytes(self.base.options.lib_dirs); + hh.addListOfBytes(self.base.options.framework_dirs); + hh.addListOfBytes(self.base.options.frameworks); + hh.addListOfBytes(self.base.options.rpath_list); + hh.addStringSet(self.base.options.system_libs); + hh.addOptionalBytes(self.base.options.sysroot); + const new_digest = hh.final(); + const needs_full_relink = !mem.eql(u8, &new_digest, &self.args_digest); + mem.copy(u8, &self.args_digest, &new_digest); + break :blk needs_full_relink; + }; - for (comp.c_object_table.keys()) |key| { - try positionals.append(key.status.success.object_path); - } + if (needs_full_relink) { + self.objects.clearRetainingCapacity(); + self.archives.clearRetainingCapacity(); + self.dylibs.clearRetainingCapacity(); + self.dylibs_map.clearRetainingCapacity(); + self.referenced_dylibs.clearRetainingCapacity(); - if (module_obj_path) |p| { - try positionals.append(p); - } + // TODO figure out how to clear atoms from objects, etc. - if (comp.compiler_rt_static_lib) |lib| { - try positionals.append(lib.full_object_path); - } + // Positional arguments to the linker such as object files and static archives. + var positionals = std.ArrayList([]const u8).init(arena); - // libc++ dep - if (self.base.options.link_libcpp) { - try positionals.append(comp.libcxxabi_static_lib.?.full_object_path); - try positionals.append(comp.libcxx_static_lib.?.full_object_path); - } + try positionals.appendSlice(self.base.options.objects); - // Shared and static libraries passed via `-l` flag. - var search_lib_names = std.ArrayList([]const u8).init(arena); - - const system_libs = self.base.options.system_libs.keys(); - for (system_libs) |link_lib| { - // By this time, we depend on these libs being dynamically linked libraries and not static libraries - // (the check for that needs to be earlier), but they could be full paths to .dylib files, in which - // case we want to avoid prepending "-l". - if (Compilation.classifyFileExt(link_lib) == .shared_library) { - try positionals.append(link_lib); - continue; + for (comp.c_object_table.keys()) |key| { + try positionals.append(key.status.success.object_path); } - try search_lib_names.append(link_lib); - } - - var lib_dirs = std.ArrayList([]const u8).init(arena); - for (self.base.options.lib_dirs) |dir| { - if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { - try lib_dirs.append(search_dir); - } else { - log.warn("directory not found for '-L{s}'", .{dir}); + if (module_obj_path) |p| { + try positionals.append(p); } - } - var libs = std.ArrayList([]const u8).init(arena); - var lib_not_found = false; - for (search_lib_names.items) |lib_name| { - // Assume ld64 default: -search_paths_first - // Look in each directory for a dylib (stub first), and then for archive - // TODO implement alternative: -search_dylibs_first - for (&[_][]const u8{ ".tbd", ".dylib", ".a" }) |ext| { - if (try resolveLib(arena, lib_dirs.items, lib_name, ext)) |full_path| { - try libs.append(full_path); - break; + if (comp.compiler_rt_static_lib) |lib| { + try positionals.append(lib.full_object_path); + } + + // libc++ dep + if (self.base.options.link_libcpp) { + try positionals.append(comp.libcxxabi_static_lib.?.full_object_path); + try positionals.append(comp.libcxx_static_lib.?.full_object_path); + } + + // Shared and static libraries passed via `-l` flag. + var search_lib_names = std.ArrayList([]const u8).init(arena); + + const system_libs = self.base.options.system_libs.keys(); + for (system_libs) |link_lib| { + // By this time, we depend on these libs being dynamically linked libraries and not static libraries + // (the check for that needs to be earlier), but they could be full paths to .dylib files, in which + // case we want to avoid prepending "-l". + if (Compilation.classifyFileExt(link_lib) == .shared_library) { + try positionals.append(link_lib); + continue; } - } else { - log.warn("library not found for '-l{s}'", .{lib_name}); - lib_not_found = true; - } - } - if (lib_not_found) { - log.warn("Library search paths:", .{}); - for (lib_dirs.items) |dir| { - log.warn(" {s}", .{dir}); + try search_lib_names.append(link_lib); } - } - // If we were given the sysroot, try to look there first for libSystem.B.{dylib, tbd}. - var libsystem_available = false; - if (self.base.options.sysroot != null) blk: { - // Try stub file first. If we hit it, then we're done as the stub file - // re-exports every single symbol definition. - if (try resolveLib(arena, lib_dirs.items, "System", ".tbd")) |full_path| { - try libs.append(full_path); - libsystem_available = true; - break :blk; + var lib_dirs = std.ArrayList([]const u8).init(arena); + for (self.base.options.lib_dirs) |dir| { + if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { + try lib_dirs.append(search_dir); + } else { + log.warn("directory not found for '-L{s}'", .{dir}); + } } - // If we didn't hit the stub file, try .dylib next. However, libSystem.dylib - // doesn't export libc.dylib which we'll need to resolve subsequently also. - if (try resolveLib(arena, lib_dirs.items, "System", ".dylib")) |libsystem_path| { - if (try resolveLib(arena, lib_dirs.items, "c", ".dylib")) |libc_path| { - try libs.append(libsystem_path); - try libs.append(libc_path); + + var libs = std.ArrayList([]const u8).init(arena); + var lib_not_found = false; + for (search_lib_names.items) |lib_name| { + // Assume ld64 default: -search_paths_first + // Look in each directory for a dylib (stub first), and then for archive + // TODO implement alternative: -search_dylibs_first + for (&[_][]const u8{ ".tbd", ".dylib", ".a" }) |ext| { + if (try resolveLib(arena, lib_dirs.items, lib_name, ext)) |full_path| { + try libs.append(full_path); + break; + } + } else { + log.warn("library not found for '-l{s}'", .{lib_name}); + lib_not_found = true; + } + } + + if (lib_not_found) { + log.warn("Library search paths:", .{}); + for (lib_dirs.items) |dir| { + log.warn(" {s}", .{dir}); + } + } + + // If we were given the sysroot, try to look there first for libSystem.B.{dylib, tbd}. + var libsystem_available = false; + if (self.base.options.sysroot != null) blk: { + // Try stub file first. If we hit it, then we're done as the stub file + // re-exports every single symbol definition. + if (try resolveLib(arena, lib_dirs.items, "System", ".tbd")) |full_path| { + try libs.append(full_path); libsystem_available = true; break :blk; } - } - } - if (!libsystem_available) { - const full_path = try comp.zig_lib_directory.join(arena, &[_][]const u8{ - "libc", "darwin", "libSystem.B.tbd", - }); - try libs.append(full_path); - } - - // frameworks - var framework_dirs = std.ArrayList([]const u8).init(arena); - for (self.base.options.framework_dirs) |dir| { - if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { - try framework_dirs.append(search_dir); - } else { - log.warn("directory not found for '-F{s}'", .{dir}); - } - } - - var framework_not_found = false; - for (self.base.options.frameworks) |framework| { - for (&[_][]const u8{ ".tbd", ".dylib", "" }) |ext| { - if (try resolveFramework(arena, framework_dirs.items, framework, ext)) |full_path| { - try libs.append(full_path); - break; + // If we didn't hit the stub file, try .dylib next. However, libSystem.dylib + // doesn't export libc.dylib which we'll need to resolve subsequently also. + if (try resolveLib(arena, lib_dirs.items, "System", ".dylib")) |libsystem_path| { + if (try resolveLib(arena, lib_dirs.items, "c", ".dylib")) |libc_path| { + try libs.append(libsystem_path); + try libs.append(libc_path); + libsystem_available = true; + break :blk; + } } - } else { - log.warn("framework not found for '-framework {s}'", .{framework}); - framework_not_found = true; } - } - - if (framework_not_found) { - log.warn("Framework search paths:", .{}); - for (framework_dirs.items) |dir| { - log.warn(" {s}", .{dir}); - } - } - - // rpaths - var rpath_table = std.StringArrayHashMap(void).init(arena); - for (self.base.options.rpath_list) |rpath| { - if (rpath_table.contains(rpath)) continue; - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.rpath_command) + rpath.len + 1, - @sizeOf(u64), - )); - var rpath_cmd = commands.emptyGenericCommandWithData(macho.rpath_command{ - .cmd = macho.LC_RPATH, - .cmdsize = cmdsize, - .path = @sizeOf(macho.rpath_command), - }); - rpath_cmd.data = try self.base.allocator.alloc(u8, cmdsize - rpath_cmd.inner.path); - mem.set(u8, rpath_cmd.data, 0); - mem.copy(u8, rpath_cmd.data, rpath); - try self.load_commands.append(self.base.allocator, .{ .Rpath = rpath_cmd }); - try rpath_table.putNoClobber(rpath, {}); - self.load_commands_dirty = true; - } - - if (self.base.options.verbose_link) { - var argv = std.ArrayList([]const u8).init(arena); - - try argv.append("zig"); - try argv.append("ld"); - - if (is_exe_or_dyn_lib) { - try argv.append("-dynamic"); - } - - if (is_dyn_lib) { - try argv.append("-dylib"); - - const install_name = try std.fmt.allocPrint(arena, "@rpath/{s}", .{ - self.base.options.emit.?.sub_path, + if (!libsystem_available) { + const full_path = try comp.zig_lib_directory.join(arena, &[_][]const u8{ + "libc", "darwin", "libSystem.B.tbd", }); - try argv.append("-install_name"); - try argv.append(install_name); + try libs.append(full_path); } - if (self.base.options.sysroot) |syslibroot| { - try argv.append("-syslibroot"); - try argv.append(syslibroot); - } - - for (rpath_table.keys()) |rpath| { - try argv.append("-rpath"); - try argv.append(rpath); - } - - try argv.appendSlice(positionals.items); - - try argv.append("-o"); - try argv.append(full_out_path); - - try argv.append("-lSystem"); - try argv.append("-lc"); - - for (search_lib_names.items) |l_name| { - try argv.append(try std.fmt.allocPrint(arena, "-l{s}", .{l_name})); - } - - for (self.base.options.lib_dirs) |lib_dir| { - try argv.append(try std.fmt.allocPrint(arena, "-L{s}", .{lib_dir})); + // frameworks + var framework_dirs = std.ArrayList([]const u8).init(arena); + for (self.base.options.framework_dirs) |dir| { + if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { + try framework_dirs.append(search_dir); + } else { + log.warn("directory not found for '-F{s}'", .{dir}); + } } + var framework_not_found = false; for (self.base.options.frameworks) |framework| { - try argv.append(try std.fmt.allocPrint(arena, "-framework {s}", .{framework})); + for (&[_][]const u8{ ".tbd", ".dylib", "" }) |ext| { + if (try resolveFramework(arena, framework_dirs.items, framework, ext)) |full_path| { + try libs.append(full_path); + break; + } + } else { + log.warn("framework not found for '-framework {s}'", .{framework}); + framework_not_found = true; + } } - for (self.base.options.framework_dirs) |framework_dir| { - try argv.append(try std.fmt.allocPrint(arena, "-F{s}", .{framework_dir})); + if (framework_not_found) { + log.warn("Framework search paths:", .{}); + for (framework_dirs.items) |dir| { + log.warn(" {s}", .{dir}); + } } - Compilation.dump_argv(argv.items); + // rpaths + var rpath_table = std.StringArrayHashMap(void).init(arena); + for (self.base.options.rpath_list) |rpath| { + if (rpath_table.contains(rpath)) continue; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.rpath_command) + rpath.len + 1, + @sizeOf(u64), + )); + var rpath_cmd = commands.emptyGenericCommandWithData(macho.rpath_command{ + .cmd = macho.LC_RPATH, + .cmdsize = cmdsize, + .path = @sizeOf(macho.rpath_command), + }); + rpath_cmd.data = try self.base.allocator.alloc(u8, cmdsize - rpath_cmd.inner.path); + mem.set(u8, rpath_cmd.data, 0); + mem.copy(u8, rpath_cmd.data, rpath); + try self.load_commands.append(self.base.allocator, .{ .Rpath = rpath_cmd }); + try rpath_table.putNoClobber(rpath, {}); + self.load_commands_dirty = true; + } + + if (self.base.options.verbose_link) { + var argv = std.ArrayList([]const u8).init(arena); + + try argv.append("zig"); + try argv.append("ld"); + + if (is_exe_or_dyn_lib) { + try argv.append("-dynamic"); + } + + if (is_dyn_lib) { + try argv.append("-dylib"); + + const install_name = try std.fmt.allocPrint(arena, "@rpath/{s}", .{ + self.base.options.emit.?.sub_path, + }); + try argv.append("-install_name"); + try argv.append(install_name); + } + + if (self.base.options.sysroot) |syslibroot| { + try argv.append("-syslibroot"); + try argv.append(syslibroot); + } + + for (rpath_table.keys()) |rpath| { + try argv.append("-rpath"); + try argv.append(rpath); + } + + try argv.appendSlice(positionals.items); + + try argv.append("-o"); + try argv.append(full_out_path); + + try argv.append("-lSystem"); + try argv.append("-lc"); + + for (search_lib_names.items) |l_name| { + try argv.append(try std.fmt.allocPrint(arena, "-l{s}", .{l_name})); + } + + for (self.base.options.lib_dirs) |lib_dir| { + try argv.append(try std.fmt.allocPrint(arena, "-L{s}", .{lib_dir})); + } + + for (self.base.options.frameworks) |framework| { + try argv.append(try std.fmt.allocPrint(arena, "-framework {s}", .{framework})); + } + + for (self.base.options.framework_dirs) |framework_dir| { + try argv.append(try std.fmt.allocPrint(arena, "-F{s}", .{framework_dir})); + } + + Compilation.dump_argv(argv.items); + } + + try self.parseInputFiles(positionals.items, self.base.options.sysroot); + try self.parseLibs(libs.items, self.base.options.sysroot); } - try self.parseInputFiles(positionals.items, self.base.options.sysroot); - try self.parseLibs(libs.items, self.base.options.sysroot); - if (self.bss_section_index) |idx| { const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; const sect = &seg.sections.items[idx]; @@ -778,7 +818,8 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { sect.offset = self.tlv_bss_file_offset; } - for (self.objects.items) |_, object_id| { + for (self.objects.items) |*object, object_id| { + if (object.analyzed) continue; try self.resolveSymbolsInObject(@intCast(u16, object_id)); } @@ -2617,6 +2658,8 @@ fn parseObjectsIntoAtoms(self: *MachO) !void { defer section_metadata.deinit(); for (self.objects.items) |*object, object_id| { + if (object.analyzed) continue; + var atoms_in_objects = try object.parseIntoAtoms(self.base.allocator, @intCast(u16, object_id), self); defer atoms_in_objects.deinit(); @@ -2663,6 +2706,8 @@ fn parseObjectsIntoAtoms(self: *MachO) !void { try first_atoms.putNoClobber(match, atom); } } + + object.analyzed = true; } var it = section_metadata.iterator(); @@ -3003,6 +3048,12 @@ pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liv defer tracy.end(); const decl = func.owner_decl; + // TODO clearing the code and relocs buffer should probably be orchestrated + // in a different, smarter, more automatic way somewhere else, in a more centralised + // way than this. + // If we don't clear the buffers here, we are up for some nasty surprises when + // this atom is reused later on and was not freed by freeAtom(). + decl.link.macho.clearRetainingCapacity(); var code_buffer = std.ArrayList(u8).init(self.base.allocator); defer code_buffer.deinit(); @@ -3038,12 +3089,6 @@ pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liv try codegen.generateFunction(&self.base, decl.srcLoc(), func, air, liveness, &code_buffer, .none); switch (res) { .appended => { - // TODO clearing the code and relocs buffer should probably be orchestrated - // in a different, smarter, more automatic way somewhere else, in a more centralised - // way than this. - // If we don't clear the buffers here, we are up for some nasty surprises when - // this atom is reused later on and was not freed by freeAtom(). - decl.link.macho.code.clearAndFree(self.base.allocator); try decl.link.macho.code.appendSlice(self.base.allocator, code_buffer.items); }, .fail => |em| { @@ -3194,6 +3239,7 @@ fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64 }); } decl.link.macho.size = code_len; + decl.link.macho.dirty = true; const new_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{mem.spanZ(decl.name)}); defer self.base.allocator.free(new_name); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 7566670488..673ebf5cb0 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -600,6 +600,17 @@ pub fn deinit(self: *Atom, allocator: *Allocator) void { self.code.deinit(allocator); } +pub fn clearRetainingCapacity(self: *Atom) void { + self.dices.clearRetainingCapacity(); + self.lazy_bindings.clearRetainingCapacity(); + self.bindings.clearRetainingCapacity(); + self.rebases.clearRetainingCapacity(); + self.relocs.clearRetainingCapacity(); + self.contained.clearRetainingCapacity(); + self.aliases.clearRetainingCapacity(); + self.code.clearRetainingCapacity(); +} + /// Returns how much room there is to grow in virtual address space. /// File offset relocation happens transparently, so it is not included in /// this calculation. diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index aae3a40bd1..b558463cea 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -64,6 +64,8 @@ sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{}, symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{}, reverse_symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{}, +analyzed: bool = false, + const DebugInfo = struct { inner: dwarf.DwarfInfo, debug_info: []u8, From 46a10401f035b50122af9a91348edeb3f57e864e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 13 Sep 2021 22:11:20 +0200 Subject: [PATCH 76/78] macho: fix logic for updating exports in incremental codepath --- src/link/MachO.zig | 73 ++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 1e32150ae7..d528330e45 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -848,6 +848,22 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.allocateGlobalSymbols(); try self.writeAtoms(); + // log.warn("Locals:", .{}); + // for (self.locals.items) |sym, i| { + // log.warn(" => {d}: {s}, {}", .{ i, self.getString(sym.n_strx), sym }); + // } + // log.warn("Globals:", .{}); + // for (self.globals.items) |sym, i| { + // log.warn(" => {d}: {s} {}", .{ i, self.getString(sym.n_strx), sym }); + // } + // { + // log.warn("Resolver:", .{}); + // var it = self.symbol_resolver.iterator(); + // while (it.next()) |entry| { + // log.warn(" => {s}: {}", .{ self.getString(entry.key_ptr.*), entry.value_ptr.* }); + // } + // } + if (self.bss_section_index) |idx| { const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; const sect = &seg.sections.items[idx]; @@ -1751,6 +1767,7 @@ fn allocateGlobalSymbols(self: *MachO) !void { const sym = &self.globals.items[resolv.where_index]; sym.n_value = local_sym.n_value; sym.n_sect = local_sym.n_sect; + log.debug("allocating global symbol {s} at 0x{x}", .{ self.getString(sym.n_strx), local_sym.n_value }); } } @@ -2941,6 +2958,8 @@ fn freeAtom(self: *MachO, atom: *Atom, match: MatchingSection) void { if (atom.prev) |prev| { // TODO shrink the section size here last_atom.* = prev; + } else { + _ = self.atoms.fetchRemove(match); } } } @@ -3360,44 +3379,40 @@ pub fn updateDeclExports( }, } - if (exp.link.macho.sym_index) |i| { - const sym = &self.globals.items[i]; - sym.* = .{ - .n_strx = sym.n_strx, - .n_type = n_type, - .n_sect = @intCast(u8, self.text_section_index.?) + 1, - .n_desc = n_desc, - .n_value = decl_sym.n_value, - }; - } else { - const name_str_index = try self.makeString(exp_name); - const i = if (self.globals_free_list.popOrNull()) |i| i else blk: { + const global_sym_index = if (exp.link.macho.sym_index) |i| i else blk: { + const i = if (self.globals_free_list.popOrNull()) |i| i else inner: { _ = self.globals.addOneAssumeCapacity(); - break :blk @intCast(u32, self.globals.items.len - 1); - }; - self.globals.items[i] = .{ - .n_strx = name_str_index, - .n_type = n_type, - .n_sect = @intCast(u8, self.text_section_index.?) + 1, - .n_desc = n_desc, - .n_value = decl_sym.n_value, - }; - const resolv = try self.symbol_resolver.getOrPut(self.base.allocator, name_str_index); - resolv.value_ptr.* = .{ - .where = .global, - .where_index = i, - .local_sym_index = decl.link.macho.local_sym_index, + break :inner @intCast(u32, self.globals.items.len - 1); }; + break :blk i; + }; - exp.link.macho.sym_index = @intCast(u32, i); - } + const n_strx = try self.makeString(exp_name); + const sym = &self.globals.items[global_sym_index]; + sym.* = .{ + .n_strx = try self.makeString(exp_name), + .n_type = n_type, + .n_sect = @intCast(u8, self.text_section_index.?) + 1, + .n_desc = n_desc, + .n_value = decl_sym.n_value, + }; + exp.link.macho.sym_index = global_sym_index; + + const resolv = try self.symbol_resolver.getOrPut(self.base.allocator, n_strx); + resolv.value_ptr.* = .{ + .where = .global, + .where_index = global_sym_index, + .local_sym_index = decl.link.macho.local_sym_index, + }; } } pub fn deleteExport(self: *MachO, exp: Export) void { const sym_index = exp.sym_index orelse return; self.globals_free_list.append(self.base.allocator, sym_index) catch {}; - self.globals.items[sym_index].n_type = 0; + const global = &self.globals.items[sym_index]; + global.n_type = 0; + assert(self.symbol_resolver.remove(global.n_strx)); } pub fn freeDecl(self: *MachO, decl: *Module.Decl) void { From 760241ce50eaa9031339f6b591358b53f5797486 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 13 Sep 2021 23:02:21 +0200 Subject: [PATCH 77/78] macho: use the cache system to know if need to relink objects This applies to stage2 where we make use of the cache system to work out if we need to relink objects when performing incremental updates. When the process is restarted however, while in principle the idea is to carry on where we left off by reparsing the prelinked binary from file, the required machinery is not there yet, and therefore we always fully relink upon restart. --- src/link/MachO.zig | 95 ++++++++++++++++++------------------------ test/stage2/darwin.zig | 22 +++++----- 2 files changed, 51 insertions(+), 66 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index d528330e45..be5bc230de 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -166,11 +166,13 @@ error_flags: File.ErrorFlags = File.ErrorFlags{}, load_commands_dirty: bool = false, sections_order_dirty: bool = false, - has_dices: bool = false, has_stabs: bool = false, - -args_digest: [Cache.hex_digest_len]u8 = undefined, +/// A helper var to indicate if we are at the start of the incremental updates, or +/// already somewhere further along the update-and-run chain. +/// TODO once we add opening a prelinked output binary from file, this will become +/// obsolete as we will carry on where we left off. +cold_start: bool = false, section_ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}, @@ -336,6 +338,7 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio return self; } + // TODO Migrate DebugSymbols to the merged linker codepaths // if (!options.strip and options.module != null) { // // Create dSYM bundle. // const dir = options.module.?.zig_cache_artifact_directory; @@ -456,8 +459,11 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { defer if (!self.base.options.disable_lld_caching) man.deinit(); var digest: [Cache.hex_digest_len]u8 = undefined; + var needs_full_relink = true; + + cache: { + if (use_stage1 and self.base.options.disable_lld_caching) break :cache; - if (!self.base.options.disable_lld_caching) { man = comp.cache_parent.obtain(); // We are about to obtain this lock, so here we give other processes a chance first. @@ -491,17 +497,36 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { id_symlink_basename, &prev_digest_buf, ) catch |err| blk: { - log.debug("MachO Zld new_digest={s} error: {s}", .{ std.fmt.fmtSliceHexLower(&digest), @errorName(err) }); + log.debug("MachO Zld new_digest={s} error: {s}", .{ + std.fmt.fmtSliceHexLower(&digest), + @errorName(err), + }); // Handle this as a cache miss. break :blk prev_digest_buf[0..0]; }; if (mem.eql(u8, prev_digest, &digest)) { - log.debug("MachO Zld digest={s} match - skipping invocation", .{std.fmt.fmtSliceHexLower(&digest)}); // Hot diggity dog! The output binary is already there. - self.base.lock = man.toOwnedLock(); - return; + + if (use_stage1) { + log.debug("MachO Zld digest={s} match - skipping invocation", .{std.fmt.fmtSliceHexLower(&digest)}); + self.base.lock = man.toOwnedLock(); + return; + } else { + log.debug("MachO Zld digest={s} match", .{std.fmt.fmtSliceHexLower(&digest)}); + if (!self.cold_start) { + log.debug(" no need to relink objects", .{}); + needs_full_relink = false; + } else { + log.debug(" TODO parse prelinked binary and continue linking where we left off", .{}); + // TODO until such time however, perform a full relink of objects. + needs_full_relink = true; + } + } } - log.debug("MachO Zld prev_digest={s} new_digest={s}", .{ std.fmt.fmtSliceHexLower(prev_digest), std.fmt.fmtSliceHexLower(&digest) }); + log.debug("MachO Zld prev_digest={s} new_digest={s}", .{ + std.fmt.fmtSliceHexLower(prev_digest), + std.fmt.fmtSliceHexLower(&digest), + }); // We are about to change the output file to be different, so we invalidate the build hash now. directory.handle.deleteFile(id_symlink_basename) catch |err| switch (err) { @@ -509,7 +534,6 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { else => |e| return e, }; } - const full_out_path = try directory.join(arena, &[_][]const u8{self.base.options.emit.?.sub_path}); if (self.base.options.output_mode == .Obj) { @@ -557,34 +581,6 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.strtab.append(self.base.allocator, 0); } - const needs_full_relink = blk: { - if (use_stage1) break :blk true; - - var hh: Cache.HashHelper = .{}; - hh.addListOfBytes(self.base.options.objects); - for (comp.c_object_table.keys()) |key| { - hh.addBytes(key.status.success.object_path); - } - hh.addOptionalBytes(module_obj_path); - if (comp.compiler_rt_static_lib) |lib| { - hh.addBytes(lib.full_object_path); - } - if (self.base.options.link_libcpp) { - hh.addBytes(comp.libcxxabi_static_lib.?.full_object_path); - hh.addBytes(comp.libcxx_static_lib.?.full_object_path); - } - hh.addListOfBytes(self.base.options.lib_dirs); - hh.addListOfBytes(self.base.options.framework_dirs); - hh.addListOfBytes(self.base.options.frameworks); - hh.addListOfBytes(self.base.options.rpath_list); - hh.addStringSet(self.base.options.system_libs); - hh.addOptionalBytes(self.base.options.sysroot); - const new_digest = hh.final(); - const needs_full_relink = !mem.eql(u8, &new_digest, &self.args_digest); - mem.copy(u8, &self.args_digest, &new_digest); - break :blk needs_full_relink; - }; - if (needs_full_relink) { self.objects.clearRetainingCapacity(); self.archives.clearRetainingCapacity(); @@ -848,22 +844,6 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.allocateGlobalSymbols(); try self.writeAtoms(); - // log.warn("Locals:", .{}); - // for (self.locals.items) |sym, i| { - // log.warn(" => {d}: {s}, {}", .{ i, self.getString(sym.n_strx), sym }); - // } - // log.warn("Globals:", .{}); - // for (self.globals.items) |sym, i| { - // log.warn(" => {d}: {s} {}", .{ i, self.getString(sym.n_strx), sym }); - // } - // { - // log.warn("Resolver:", .{}); - // var it = self.symbol_resolver.iterator(); - // while (it.next()) |entry| { - // log.warn(" => {s}: {}", .{ self.getString(entry.key_ptr.*), entry.value_ptr.* }); - // } - // } - if (self.bss_section_index) |idx| { const seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; const sect = &seg.sections.items[idx]; @@ -880,7 +860,8 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.flushModule(comp); } - if (!self.base.options.disable_lld_caching) { + cache: { + if (use_stage1 and self.base.options.disable_lld_caching) break :cache; // Update the file with the digest. If it fails we can continue; it only // means that the next invocation will have an unnecessary cache miss. Cache.writeSmallFile(directory.handle, id_symlink_basename, &digest) catch |err| { @@ -894,6 +875,8 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { // other processes clobbering it. self.base.lock = man.toOwnedLock(); } + + self.cold_start = false; } pub fn flushModule(self: *MachO, comp: *Compilation) !void { @@ -3929,6 +3912,8 @@ pub fn populateMissingMetadata(self: *MachO) !void { }); self.load_commands_dirty = true; } + + self.cold_start = true; } const AllocateSectionOpts = struct { diff --git a/test/stage2/darwin.zig b/test/stage2/darwin.zig index 87b04d1dff..86c2d313a0 100644 --- a/test/stage2/darwin.zig +++ b/test/stage2/darwin.zig @@ -27,8 +27,8 @@ pub fn addCases(ctx: *TestContext) !void { // Regular old hello world case.addCompareOutput( - \\extern "c" fn write(usize, usize, usize) usize; - \\extern "c" fn exit(usize) noreturn; + \\extern fn write(usize, usize, usize) usize; + \\extern fn exit(usize) noreturn; \\ \\pub export fn main() noreturn { \\ print(); @@ -47,8 +47,8 @@ pub fn addCases(ctx: *TestContext) !void { // Print it 4 times and force growth and realloc. case.addCompareOutput( - \\extern "c" fn write(usize, usize, usize) usize; - \\extern "c" fn exit(usize) noreturn; + \\extern fn write(usize, usize, usize) usize; + \\extern fn exit(usize) noreturn; \\ \\pub export fn main() noreturn { \\ print(); @@ -74,8 +74,8 @@ pub fn addCases(ctx: *TestContext) !void { // Print it once, and change the message. case.addCompareOutput( - \\extern "c" fn write(usize, usize, usize) usize; - \\extern "c" fn exit(usize) noreturn; + \\extern fn write(usize, usize, usize) usize; + \\extern fn exit(usize) noreturn; \\ \\pub export fn main() noreturn { \\ print(); @@ -94,8 +94,8 @@ pub fn addCases(ctx: *TestContext) !void { // Now we print it twice. case.addCompareOutput( - \\extern "c" fn write(usize, usize, usize) usize; - \\extern "c" fn exit(usize) noreturn; + \\extern fn write(usize, usize, usize) usize; + \\extern fn exit(usize) noreturn; \\ \\pub export fn main() noreturn { \\ print(); @@ -121,7 +121,7 @@ pub fn addCases(ctx: *TestContext) !void { // This test case also covers an infrequent scenarion where the string table *may* be relocated // into the position preceeding the symbol table which results in a dyld error. case.addCompareOutput( - \\extern "c" fn exit(usize) noreturn; + \\extern fn exit(usize) noreturn; \\ \\pub export fn main() noreturn { \\ exit(0); @@ -131,8 +131,8 @@ pub fn addCases(ctx: *TestContext) !void { ); case.addCompareOutput( - \\extern "c" fn exit(usize) noreturn; - \\extern "c" fn write(usize, usize, usize) usize; + \\extern fn exit(usize) noreturn; + \\extern fn write(usize, usize, usize) usize; \\ \\pub export fn main() noreturn { \\ _ = write(1, @ptrToInt("Hey!\n"), 5); From 05763f43b3d8318c95891650c11ab243ce9a1fd5 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 14 Sep 2021 10:28:58 +0200 Subject: [PATCH 78/78] macho: disable splitting sections into atoms in release since we don't actually benefit from it just yet, and getting it right for release and dead code stripping will require some more thought put into it. --- src/link/MachO/Object.zig | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index a7e312e6c7..27da019be8 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -540,12 +540,15 @@ pub fn parseIntoAtoms( // Symbols within this section only. const filtered_nlists = NlistWithIndex.filterInSection(sorted_nlists, sect); + // TODO rewrite and re-enable dead-code stripping optimisation. I think it might make sense + // to do this in a standalone pass after we parse the sections as atoms. // In release mode, if the object file was generated with dead code stripping optimisations, // note it now and parse sections as atoms. - const is_splittable = blk: { - if (macho_file.base.options.optimize_mode == .Debug) break :blk false; - break :blk self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; - }; + // const is_splittable = blk: { + // if (macho_file.base.options.optimize_mode == .Debug) break :blk false; + // break :blk self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; + // }; + const is_splittable = false; macho_file.has_dices = macho_file.has_dices or blk: { if (self.text_section_index) |index| {