diff --git a/src/codegen.zig b/src/codegen.zig index ec75cbadc6..7cb7119f0d 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -2523,36 +2523,29 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } } else if (func_value.castTag(.extern_fn)) |func_payload| { const decl = func_payload.data; - const decl_name = try std.fmt.allocPrint(self.bin_file.allocator, "_{s}", .{decl.name}); - defer self.bin_file.allocator.free(decl_name); - const already_defined = macho_file.symbol_resolver.contains(decl_name); - const resolv = macho_file.symbol_resolver.get(decl_name) orelse blk: { - break :blk try macho_file.addExternFn(decl_name); - }; - const start = self.code.items.len; - const len: usize = blk: { - switch (arch) { - .x86_64 => { - // callq - try self.code.ensureCapacity(self.code.items.len + 5); - self.code.appendSliceAssumeCapacity(&[5]u8{ 0xe8, 0x0, 0x0, 0x0, 0x0 }); - break :blk 5; - }, - .aarch64 => { - // bl - writeInt(u32, try self.code.addManyAsArray(4), 0); - break :blk 4; - }, - else => unreachable, // unsupported architecture on MachO - } - }; - try macho_file.stub_fixups.append(self.bin_file.allocator, .{ - .symbol = resolv.where_index, - .already_defined = already_defined, - .start = start, - .len = len, + const where_index = try macho_file.addExternFn(mem.spanZ(decl.name)); + const offset = @intCast(u32, self.code.items.len); + switch (arch) { + .x86_64 => { + // callq + try self.code.ensureCapacity(self.code.items.len + 5); + self.code.appendSliceAssumeCapacity(&[5]u8{ 0xe8, 0x0, 0x0, 0x0, 0x0 }); + }, + .aarch64 => { + // bl + writeInt(u32, try self.code.addManyAsArray(4), Instruction.bl(0).toU32()); + }, + else => unreachable, // unsupported architecture on MachO + } + // Add relocation to the decl. + try decl.link.macho.relocs.append(self.bin_file.allocator, .{ + .offset = offset, + .where = .import, + .where_index = where_index, + .payload = .{ .branch = .{ + .arch = arch, + } }, }); - // We mark the space and fix it up later. } else { return self.fail(inst.base.src, "TODO implement calling bitcasted functions", .{}); } diff --git a/src/link/MachO.zig b/src/link/MachO.zig index f5e31c2eeb..452b6992bc 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -159,6 +159,14 @@ strtab_needs_relocation: bool = false, has_dices: bool = false, has_stabs: bool = false, +pending_updates: std.ArrayListUnmanaged(struct { + kind: enum { + got, + stub, + }, + index: u32, +}) = .{}, + /// A list of text blocks that have surplus capacity. This list can have false /// positives, as functions grow and shrink over time, only sometimes being added /// or removed from the freelist. @@ -179,6 +187,7 @@ text_block_free_list: std.ArrayListUnmanaged(*TextBlock) = .{}, /// Pointer to the last allocated text block last_text_block: ?*TextBlock = null, +managed_blocks: std.ArrayListUnmanaged(TextBlock) = .{}, blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, /// A list of all PIE fixups required for this run of the linker. @@ -190,13 +199,6 @@ blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{}, /// backends. pie_fixups: std.ArrayListUnmanaged(PIEFixup) = .{}, -/// A list of all stub (extern decls) fixups required for this run of the linker. -/// Warning, this is currently NOT thread-safe. See the TODO below. -/// TODO Move this list inside `updateDecl` where it should be allocated -/// prior to calling `generateSymbol`, and then immediately deallocated -/// rather than sitting in the global scope. -stub_fixups: std.ArrayListUnmanaged(StubFixup) = .{}, - const SymbolWithLoc = struct { // Table where the symbol can be found. where: enum { @@ -229,19 +231,6 @@ pub const PIEFixup = struct { size: usize, }; -pub const StubFixup = struct { - /// Id of extern (lazy) symbol. - symbol: u32, - /// Signals whether the symbol has already been declared before. If so, - /// then there is no need to rewrite the stub entry and related. - already_defined: bool, - /// Where in the byte stream we should perform the fixup. - start: usize, - /// The length of the byte stream. For x86_64, this will be - /// variable. For aarch64, it will be fixed at 4 bytes. - len: usize, -}; - /// When allocating, the ideal_capacity is calculated by /// actual_capacity + (actual_capacity / ideal_factor) const ideal_factor = 2; @@ -2244,9 +2233,7 @@ fn resolveSymbols(self: *MachO) !void { .local_sym_index = local_sym_index, }; - const block = try self.base.allocator.create(TextBlock); - errdefer self.base.allocator.destroy(block); - + const block = try self.managed_blocks.addOne(self.base.allocator); block.* = TextBlock.empty; block.local_sym_index = local_sym_index; block.code = code; @@ -2382,9 +2369,7 @@ fn resolveSymbols(self: *MachO) !void { // We create an empty atom for this symbol. // TODO perhaps we should special-case special symbols? Create a separate // linked list of atoms? - const block = try self.base.allocator.create(TextBlock); - errdefer self.base.allocator.destroy(block); - + const block = try self.managed_blocks.addOne(self.base.allocator); block.* = TextBlock.empty; block.local_sym_index = local_sym_index; block.code = try self.base.allocator.alloc(u8, 0); @@ -3243,9 +3228,8 @@ pub fn deinit(self: *MachO) void { ds.deinit(self.base.allocator); } + self.pending_updates.deinit(self.base.allocator); self.pie_fixups.deinit(self.base.allocator); - self.stub_fixups.deinit(self.base.allocator); - self.text_block_free_list.deinit(self.base.allocator); self.got_entries.deinit(self.base.allocator); self.got_entries_map.deinit(self.base.allocator); self.got_entries_free_list.deinit(self.base.allocator); @@ -3288,8 +3272,12 @@ pub fn deinit(self: *MachO) void { } self.load_commands.deinit(self.base.allocator); - // TODO dealloc all blocks + for (self.managed_blocks.items) |*block| { + block.deinit(self.base.allocator); + } + self.managed_blocks.deinit(self.base.allocator); self.blocks.deinit(self.base.allocator); + self.text_block_free_list.deinit(self.base.allocator); } pub fn closeFiles(self: MachO) void { @@ -3302,6 +3290,9 @@ pub fn closeFiles(self: MachO) void { } fn freeTextBlock(self: *MachO, text_block: *TextBlock) void { + log.debug("freeTextBlock {*}", .{text_block}); + // text_block.deinit(self.base.allocator); + var already_have_free_list_node = false; { var i: usize = 0; @@ -3467,18 +3458,22 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { .val = decl.val, }, &code_buffer, .none); - const code = switch (res) { - .externally_managed => |x| x, - .appended => code_buffer.items, - .fail => |em| { - // Clear any PIE fixups for this decl. - self.pie_fixups.shrinkRetainingCapacity(0); - // Clear any stub fixups for this decl. - self.stub_fixups.shrinkRetainingCapacity(0); - decl.analysis = .codegen_failure; - try module.failed_decls.put(module.gpa, decl, em); - return; - }, + const code = blk: { + switch (res) { + .externally_managed => |x| break :blk x, + .appended => { + decl.link.macho.code = code_buffer.toOwnedSlice(); + log.warn("WAT", .{}); + break :blk decl.link.macho.code; + }, + .fail => |em| { + // Clear any PIE fixups for this decl. + self.pie_fixups.shrinkRetainingCapacity(0); + decl.analysis = .codegen_failure; + try module.failed_decls.put(module.gpa, decl, em); + return; + }, + } }; const required_alignment = decl.ty.abiAlignment(self.base.options.target); @@ -3559,12 +3554,12 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { switch (self.base.options.target.cpu.arch) { .x86_64 => { const displacement = try math.cast(u32, target_addr - this_addr - 4); - mem.writeIntLittle(u32, code_buffer.items[fixup.offset..][0..4], displacement); + mem.writeIntLittle(u32, decl.link.macho.code[fixup.offset..][0..4], displacement); }, .aarch64 => { // TODO optimize instruction based on jump length (use ldr(literal) + nop if possible). { - const inst = code_buffer.items[fixup.offset..][0..4]; + const inst = decl.link.macho.code[fixup.offset..][0..4]; var parsed = mem.bytesAsValue(meta.TagPayload( aarch64.Instruction, aarch64.Instruction.pc_relative_address, @@ -3576,7 +3571,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { parsed.immlo = @truncate(u2, pages); } { - const inst = code_buffer.items[fixup.offset + 4 ..][0..4]; + const inst = decl.link.macho.code[fixup.offset + 4 ..][0..4]; var parsed = mem.bytesAsValue(meta.TagPayload( aarch64.Instruction, aarch64.Instruction.load_store_register, @@ -3590,39 +3585,24 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { } } - // Resolve stubs (if any) - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const stubs = text_segment.sections.items[self.stubs_section_index.?]; - for (self.stub_fixups.items) |fixup| { - const stubs_index = self.stubs_map.get(fixup.symbol) orelse unreachable; - const stub_addr = stubs.addr + stubs_index * stubs.reserved2; - const text_addr = symbol.n_value + fixup.start; - switch (self.base.options.target.cpu.arch) { - .x86_64 => { - assert(stub_addr >= text_addr + fixup.len); - const displacement = try math.cast(u32, stub_addr - text_addr - fixup.len); - var placeholder = code_buffer.items[fixup.start + fixup.len - @sizeOf(u32) ..][0..@sizeOf(u32)]; - mem.writeIntSliceLittle(u32, placeholder, displacement); - }, - .aarch64 => { - assert(stub_addr >= text_addr); - const displacement = try math.cast(i28, stub_addr - text_addr); - var placeholder = code_buffer.items[fixup.start..][0..fixup.len]; - mem.writeIntSliceLittle(u32, placeholder, aarch64.Instruction.bl(displacement).toU32()); - }, - else => unreachable, // unsupported target architecture - } - if (!fixup.already_defined) { - try self.writeStub(stubs_index); - try self.writeStubInStubHelper(stubs_index); - try self.writeLazySymbolPointer(stubs_index); + // Resolve relocations + try decl.link.macho.resolveRelocs(self); - self.rebase_info_dirty = true; - self.lazy_binding_info_dirty = true; + // Apply pending updates + while (self.pending_updates.popOrNull()) |update| { + switch (update.kind) { + .got => unreachable, + .stub => { + try self.writeStub(update.index); + try self.writeStubInStubHelper(update.index); + try self.writeLazySymbolPointer(update.index); + self.rebase_info_dirty = true; + self.lazy_binding_info_dirty = true; + }, } } - self.stub_fixups.shrinkRetainingCapacity(0); + const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; const text_section = text_segment.sections.items[self.text_section_index.?]; const section_offset = symbol.n_value - text_section.addr; const file_offset = text_section.offset + section_offset; @@ -3756,6 +3736,7 @@ pub fn deleteExport(self: *MachO, exp: Export) void { } pub fn freeDecl(self: *MachO, decl: *Module.Decl) void { + log.debug("freeDecl {*}", .{decl}); // Appending to free lists is allowed to fail because the free lists are heuristics based anyway. self.freeTextBlock(&decl.link.macho); if (decl.link.macho.local_sym_index != 0) { @@ -4314,7 +4295,8 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, // should be deleted because the block that it points to has grown to take up // more of the extra capacity. if (!big_block.freeListEligible(self.*)) { - _ = self.text_block_free_list.swapRemove(i); + const bl = self.text_block_free_list.swapRemove(i); + bl.deinit(self.base.allocator); } else { i += 1; } @@ -4386,25 +4368,43 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, return vaddr; } -pub fn addExternFn(self: *MachO, name: []const u8) !SymbolWithLoc { - log.debug("adding new extern function '{s}' with dylib ordinal 1", .{name}); +pub fn addExternFn(self: *MachO, name: []const u8) !u32 { + const sym_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{name}); + const already_defined = self.symbol_resolver.contains(sym_name); + + if (already_defined) { + const resolv = self.symbol_resolver.get(sym_name) orelse unreachable; + self.base.allocator.free(sym_name); + return resolv.where_index; + } + + log.debug("adding new extern function '{s}' with dylib ordinal 1", .{sym_name}); const import_sym_index = @intCast(u32, self.imports.items.len); try self.imports.append(self.base.allocator, .{ - .n_strx = try self.makeString(name), + .n_strx = try self.makeString(sym_name), .n_type = macho.N_UNDF | macho.N_EXT, .n_sect = 0, .n_desc = packDylibOrdinal(1), .n_value = 0, }); - const resolv = .{ + try self.symbol_resolver.putNoClobber(self.base.allocator, sym_name, .{ .where = .import, .where_index = import_sym_index, - }; - try self.symbol_resolver.putNoClobber(self.base.allocator, try self.base.allocator.dupe(u8, name), resolv); + }); + const stubs_index = @intCast(u32, self.stubs.items.len); try self.stubs.append(self.base.allocator, import_sym_index); try self.stubs_map.putNoClobber(self.base.allocator, import_sym_index, stubs_index); - return resolv; + + // TODO discuss this. The caller context expects codegen.InnerError{ OutOfMemory, CodegenFail }, + // which obviously doesn't include file writing op errors. So instead of trying to write the stub + // entry right here and now, queue it up and dispose of when updating decl. + try self.pending_updates.append(self.base.allocator, .{ + .kind = .stub, + .index = stubs_index, + }); + + return import_sym_index; } const NextSegmentAddressAndOffset = struct { diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index eaab05140e..407918456b 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -723,9 +723,7 @@ pub fn parseTextBlocks(self: *Object, macho_file: *MachO) !void { break :blk block_local_sym_index; }; - const block = try self.allocator.create(TextBlock); - errdefer self.allocator.destroy(block); - + const block = try macho_file.managed_blocks.addOne(macho_file.base.allocator); block.* = TextBlock.empty; block.local_sym_index = block_local_sym_index; block.code = try self.allocator.dupe(u8, code); diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index 0b6ff20d94..d28b9630e6 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -558,9 +558,9 @@ pub fn deinit(self: *TextBlock, allocator: *Allocator) void { self.bindings.deinit(allocator); self.rebases.deinit(allocator); self.relocs.deinit(allocator); - self.allocator.free(self.code); self.contained.deinit(allocator); self.aliases.deinit(allocator); + allocator.free(self.code); } /// Returns how much room there is to grow in virtual address space.