From 2831d6e9b8b29c21bc7417c5e370674e3130f6ae Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 30 Aug 2021 15:43:20 +0200 Subject: [PATCH] macho: add first pass at allocating parsed atoms in objects This commit makes it possible to combine self-hosted with a pre-compiled C object file, e.g.: ``` zig-out/bin/zig build-exe hello.zig add.o ``` where `add.o` is a pre-compiled C object file. --- src/link/MachO.zig | 171 ++++++++++++++++++++++++++++------- src/link/MachO/Object.zig | 24 ++--- src/link/MachO/TextBlock.zig | 13 +++ src/link/MachO/commands.zig | 2 +- 4 files changed, 164 insertions(+), 46 deletions(-) diff --git a/src/link/MachO.zig b/src/link/MachO.zig index effe6d641b..1def32c41a 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -789,6 +789,31 @@ pub fn flush(self: *MachO, comp: *Compilation) !void { try self.allocateTextBlocks(); try self.flushZld(); } else { + try self.parseTextBlocks(); + try self.allocateGlobalSymbols(); + { + log.debug("locals:", .{}); + for (self.locals.items) |sym| { + log.debug(" {s}: {}", .{ self.getString(sym.n_strx), sym }); + } + log.debug("globals:", .{}); + for (self.globals.items) |sym| { + log.debug(" {s}: {}", .{ self.getString(sym.n_strx), sym }); + } + log.debug("undefs:", .{}); + for (self.undefs.items) |sym| { + log.debug(" {s}: {}", .{ self.getString(sym.n_strx), sym }); + } + log.debug("unresolved:", .{}); + for (self.unresolved.keys()) |key| { + log.debug(" {d} => {s}", .{ key, self.unresolved.get(key).? }); + } + log.debug("resolved:", .{}); + var it = self.symbol_resolver.iterator(); + while (it.next()) |entry| { + log.debug(" {s} => {}", .{ self.getString(entry.key_ptr.*), entry.value_ptr.* }); + } + } try self.writeAtoms(); try self.flushModule(comp); } @@ -1114,12 +1139,14 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio const segname = commands.segmentName(sect); const sectname = commands.sectionName(sect); + var needs_allocation = false; const res: ?MatchingSection = blk: { switch (commands.sectionType(sect)) { macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => { if (self.text_const_section_index == null) { self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); try text_seg.addSection(self.base.allocator, "__const", .{}); + needs_allocation = true; } break :blk .{ @@ -1136,6 +1163,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try text_seg.addSection(self.base.allocator, "__objc_methname", .{ .flags = macho.S_CSTRING_LITERALS, }); + needs_allocation = true; } break :blk .{ @@ -1148,6 +1176,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try text_seg.addSection(self.base.allocator, "__objc_methtype", .{ .flags = macho.S_CSTRING_LITERALS, }); + needs_allocation = true; } break :blk .{ @@ -1158,6 +1187,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.objc_classname_section_index == null) { self.objc_classname_section_index = @intCast(u16, text_seg.sections.items.len); try text_seg.addSection(self.base.allocator, "__objc_classname", .{}); + needs_allocation = true; } break :blk .{ @@ -1171,6 +1201,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try text_seg.addSection(self.base.allocator, "__cstring", .{ .flags = macho.S_CSTRING_LITERALS, }); + needs_allocation = true; } break :blk .{ @@ -1185,6 +1216,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try data_seg.addSection(self.base.allocator, "__objc_selrefs", .{ .flags = macho.S_LITERAL_POINTERS, }); + needs_allocation = true; } break :blk .{ @@ -1202,6 +1234,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try data_const_seg.addSection(self.base.allocator, "__mod_init_func", .{ .flags = macho.S_MOD_INIT_FUNC_POINTERS, }); + needs_allocation = true; } break :blk .{ @@ -1215,6 +1248,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try data_const_seg.addSection(self.base.allocator, "__mod_term_func", .{ .flags = macho.S_MOD_TERM_FUNC_POINTERS, }); + needs_allocation = true; } break :blk .{ @@ -1228,6 +1262,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try data_seg.addSection(self.base.allocator, "__bss", .{ .flags = macho.S_ZEROFILL, }); + needs_allocation = true; } break :blk .{ @@ -1241,6 +1276,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try data_seg.addSection(self.base.allocator, "__thread_vars", .{ .flags = macho.S_THREAD_LOCAL_VARIABLES, }); + needs_allocation = true; } break :blk .{ @@ -1254,6 +1290,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try data_seg.addSection(self.base.allocator, "__thread_data", .{ .flags = macho.S_THREAD_LOCAL_REGULAR, }); + needs_allocation = true; } break :blk .{ @@ -1267,6 +1304,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try data_seg.addSection(self.base.allocator, "__thread_bss", .{ .flags = macho.S_THREAD_LOCAL_ZEROFILL, }); + needs_allocation = true; } break :blk .{ @@ -1281,6 +1319,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.eh_frame_section_index == null) { self.eh_frame_section_index = @intCast(u16, text_seg.sections.items.len); try text_seg.addSection(self.base.allocator, "__eh_frame", .{}); + needs_allocation = true; } break :blk .{ @@ -1293,6 +1332,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.data_const_section_index == null) { self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); try data_const_seg.addSection(self.base.allocator, "__const", .{}); + needs_allocation = true; } break :blk .{ @@ -1307,6 +1347,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio try text_seg.addSection(self.base.allocator, "__text", .{ .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, }); + needs_allocation = true; } break :blk .{ @@ -1329,6 +1370,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.ustring_section_index == null) { self.ustring_section_index = @intCast(u16, text_seg.sections.items.len); try text_seg.addSection(self.base.allocator, "__ustring", .{}); + needs_allocation = true; } break :blk .{ @@ -1339,6 +1381,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.gcc_except_tab_section_index == null) { self.gcc_except_tab_section_index = @intCast(u16, text_seg.sections.items.len); try text_seg.addSection(self.base.allocator, "__gcc_except_tab", .{}); + needs_allocation = true; } break :blk .{ @@ -1349,6 +1392,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.objc_methlist_section_index == null) { self.objc_methlist_section_index = @intCast(u16, text_seg.sections.items.len); try text_seg.addSection(self.base.allocator, "__objc_methlist", .{}); + needs_allocation = true; } break :blk .{ @@ -1364,6 +1408,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.data_const_section_index == null) { self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); try data_const_seg.addSection(self.base.allocator, "__const", .{}); + needs_allocation = true; } break :blk .{ @@ -1374,6 +1419,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.text_const_section_index == null) { self.text_const_section_index = @intCast(u16, text_seg.sections.items.len); try text_seg.addSection(self.base.allocator, "__const", .{}); + needs_allocation = true; } break :blk .{ @@ -1387,6 +1433,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.data_const_section_index == null) { self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); try data_const_seg.addSection(self.base.allocator, "__const", .{}); + needs_allocation = true; } break :blk .{ @@ -1400,6 +1447,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.data_const_section_index == null) { self.data_const_section_index = @intCast(u16, data_const_seg.sections.items.len); try data_const_seg.addSection(self.base.allocator, "__const", .{}); + needs_allocation = true; } break :blk .{ @@ -1410,6 +1458,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.objc_cfstring_section_index == null) { self.objc_cfstring_section_index = @intCast(u16, data_const_seg.sections.items.len); try data_const_seg.addSection(self.base.allocator, "__cfstring", .{}); + needs_allocation = true; } break :blk .{ @@ -1420,6 +1469,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.objc_classlist_section_index == null) { self.objc_classlist_section_index = @intCast(u16, data_const_seg.sections.items.len); try data_const_seg.addSection(self.base.allocator, "__objc_classlist", .{}); + needs_allocation = true; } break :blk .{ @@ -1430,6 +1480,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.objc_imageinfo_section_index == null) { self.objc_imageinfo_section_index = @intCast(u16, data_const_seg.sections.items.len); try data_const_seg.addSection(self.base.allocator, "__objc_imageinfo", .{}); + needs_allocation = true; } break :blk .{ @@ -1440,6 +1491,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.objc_const_section_index == null) { self.objc_const_section_index = @intCast(u16, data_seg.sections.items.len); try data_seg.addSection(self.base.allocator, "__objc_const", .{}); + needs_allocation = true; } break :blk .{ @@ -1450,6 +1502,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.objc_classrefs_section_index == null) { self.objc_classrefs_section_index = @intCast(u16, data_seg.sections.items.len); try data_seg.addSection(self.base.allocator, "__objc_classrefs", .{}); + needs_allocation = true; } break :blk .{ @@ -1460,6 +1513,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.objc_data_section_index == null) { self.objc_data_section_index = @intCast(u16, data_seg.sections.items.len); try data_seg.addSection(self.base.allocator, "__objc_data", .{}); + needs_allocation = true; } break :blk .{ @@ -1470,6 +1524,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (self.data_section_index == null) { self.data_section_index = @intCast(u16, data_seg.sections.items.len); try data_seg.addSection(self.base.allocator, "__data", .{}); + needs_allocation = true; } break :blk .{ @@ -1494,6 +1549,36 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio if (res) |match| { _ = try self.section_ordinals.getOrPut(self.base.allocator, match); _ = try self.block_free_lists.getOrPutValue(self.base.allocator, match, .{}); + + const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; + if (!use_stage1) { + const target_seg = &self.load_commands.items[match.seg].Segment; + const target_sect = &target_seg.sections.items[match.sect]; + + // Update section's alignment + // TODO if sect.@"align" > target_sect.@"align", should we move the entire + // section to match the required alignment? + target_sect.@"align" = math.max(target_sect.@"align", sect.@"align"); + + if (needs_allocation) { + const alignment = try math.powi(u32, 2, target_sect.@"align"); + const needed_size = sect.size; + const off = target_seg.findFreeSpace(needed_size, alignment, self.header_pad); + assert(off + needed_size <= target_seg.inner.fileoff + target_seg.inner.filesize); // TODO expand + + log.debug("found {s},{s} section free space 0x{x} to 0x{x}", .{ + segname, + sectname, + off, + off + needed_size, + }); + + target_sect.addr = target_seg.inner.vmaddr + off; + target_sect.size = needed_size; + target_sect.offset = @intCast(u32, off); + self.load_commands_dirty = true; + } + } } return res; @@ -1759,23 +1844,41 @@ pub fn createEmptyAtom(self: *MachO, local_sym_index: u32, size: u64, alignment: } pub fn allocateAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !u64 { - // TODO converge with `allocateTextBlock` - const seg = self.load_commands.items[match.seg].Segment; - const sect = seg.sections.items[match.sect]; + const seg = &self.load_commands.items[match.seg].Segment; + const sect = &seg.sections.items[match.sect]; const sym = &self.locals.items[atom.local_sym_index]; - const base_addr = if (self.blocks.get(match)) |last| blk: { + + var atom_placement: ?*TextBlock = null; + + // TODO converge with `allocateTextBlock` and handle free list + const vaddr = if (self.blocks.get(match)) |last| blk: { const last_atom_sym = self.locals.items[last.local_sym_index]; - break :blk last_atom_sym.n_value + last.size; + const ideal_capacity = padToIdeal(last.size); + const ideal_capacity_end_vaddr = last_atom_sym.n_value + ideal_capacity; + const last_atom_alignment = try math.powi(u32, 2, atom.alignment); + const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, last_atom_alignment); + atom_placement = last; + break :blk new_start_vaddr; } else sect.addr; - const atom_alignment = try math.powi(u32, 2, atom.alignment); - const vaddr = mem.alignForwardGeneric(u64, base_addr, atom_alignment); + log.debug("allocating atom for symbol {s} at address 0x{x}", .{ self.getString(sym.n_strx), vaddr }); - const expand_section = true; + const expand_section = atom_placement == null or atom_placement.?.next == null; if (expand_section) { - // Expand the section, possibly shifting all the atoms for the sections following it. - // It might also be needed to shift entire segments too if there is not enough - // padding left. + const needed_size = (vaddr + atom.size) - sect.addr; + const end_addr = blk: { + const next_ordinal = self.section_ordinals.getIndex(match).?; // Ordinals are +1 to begin with. + const end_addr = if (self.section_ordinals.keys().len > next_ordinal) inner: { + const next_match = self.section_ordinals.keys()[next_ordinal]; + const next_seg = self.load_commands.items[next_match.seg].Segment; + const next_sect = next_seg.sections.items[next_match.sect]; + break :inner next_sect.addr; + } else seg.inner.filesize; + break :blk end_addr; + }; + assert(needed_size <= end_addr); // TODO must expand the section + sect.size = needed_size; + self.load_commands_dirty = true; } const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); sym.n_value = vaddr; @@ -1828,6 +1931,21 @@ pub fn writeAtom(self: *MachO, atom: *TextBlock, match: MatchingSection) !void { try self.writeLocalSymbol(atom.local_sym_index); } +fn allocateGlobalSymbols(self: *MachO) !void { + // TODO should we do this in `allocateAtom` (or similar)? Then, we would need to + // store the link atom -> globals somewhere. + var sym_it = self.symbol_resolver.valueIterator(); + while (sym_it.next()) |resolv| { + if (resolv.where != .global) continue; + + assert(resolv.local_sym_index != 0); + const local_sym = self.locals.items[resolv.local_sym_index]; + const sym = &self.globals.items[resolv.where_index]; + sym.n_value = local_sym.n_value; + sym.n_sect = local_sym.n_sect; + } +} + pub fn allocateAtomStage1(self: *MachO, atom: *TextBlock, match: MatchingSection) !void { // Update target section's metadata // TODO should we update segment's size here too? @@ -2313,14 +2431,14 @@ fn resolveSymbolsInObject( continue; }, .undef => { - const undef = &self.undefs.items[resolv.where_index]; - undef.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; + // const undef = &self.undefs.items[resolv.where_index]; + // undef.* = .{ + // .n_strx = 0, + // .n_type = macho.N_UNDF, + // .n_sect = 0, + // .n_desc = 0, + // .n_value = 0, + // }; _ = self.unresolved.fetchSwapRemove(resolv.where_index); }, } @@ -2457,18 +2575,9 @@ fn resolveSymbols(self: *MachO) !void { // text blocks for each tentative defintion. while (tentatives.popOrNull()) |entry| { const sym = &self.globals.items[entry.key]; - const match: MatchingSection = blk: { - if (self.bss_section_index == null) { - const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; - self.bss_section_index = @intCast(u16, data_seg.sections.items.len); - try data_seg.addSection(self.base.allocator, "__bss", .{ - .flags = macho.S_ZEROFILL, - }); - } - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; + const match = MatchingSection{ + .seg = self.data_segment_cmd_index.?, + .sect = self.bss_section_index.?, }; _ = try self.section_ordinals.getOrPut(self.base.allocator, match); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index d90e3837b5..ec9a4901fe 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -504,7 +504,6 @@ pub fn parseTextBlocks( log.debug("unhandled section", .{}); continue; }; - // TODO allocate section here. // Read section's code var code = try allocator.alloc(u8, @intCast(usize, sect.size)); @@ -569,12 +568,6 @@ pub fn parseTextBlocks( const block_size = block_code.len; const block = try macho_file.createEmptyAtom(block_local_sym_index, block_size, sect.@"align"); - if (use_stage1) { - try macho_file.allocateAtomStage1(block, match); - } else { - _ = try macho_file.allocateAtom(block, match); - } - mem.copy(u8, block.code.items, block_code); try block.parseRelocs(relocs, .{ @@ -597,6 +590,11 @@ pub fn parseTextBlocks( } } + if (use_stage1) { + try macho_file.allocateAtomStage1(block, match); + } else { + _ = try macho_file.allocateAtom(block, match); + } try self.text_blocks.append(allocator, block); } @@ -648,7 +646,6 @@ pub fn parseTextBlocks( } else { _ = try macho_file.allocateAtom(block, match); } - try self.text_blocks.append(allocator, block); } @@ -679,12 +676,6 @@ pub fn parseTextBlocks( }; const block = try macho_file.createEmptyAtom(block_local_sym_index, sect.size, sect.@"align"); - if (use_stage1) { - try macho_file.allocateAtomStage1(block, match); - } else { - _ = try macho_file.allocateAtom(block, match); - } - mem.copy(u8, block.code.items, code); try block.parseRelocs(relocs, .{ @@ -743,6 +734,11 @@ pub fn parseTextBlocks( }); } + if (use_stage1) { + try macho_file.allocateAtomStage1(block, match); + } else { + _ = try macho_file.allocateAtom(block, match); + } try self.text_blocks.append(allocator, block); } } diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig index e2552a8b4f..d753fe29f4 100644 --- a/src/link/MachO/TextBlock.zig +++ b/src/link/MachO/TextBlock.zig @@ -1183,9 +1183,22 @@ pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void { }, .undef => { const atom = macho_file.stubs_map.get(rel.where_index) orelse { + // TODO this is required for incremental when we don't have every symbol + // resolved when creating relocations. In this case, we will insert a branch + // reloc to an undef symbol which may happen to be defined within the binary. + // Then, the undef we point at will be a null symbol (free symbol) which we + // should remove/repurpose. To circumvent this (for now), we check if the symbol + // we point to is garbage, and if so we fall back to symbol resolver to find by name. + const n_strx = macho_file.undefs.items[rel.where_index].n_strx; + if (macho_file.symbol_resolver.get(n_strx)) |resolv| inner: { + if (resolv.where != .global) break :inner; + break :blk macho_file.globals.items[resolv.where_index].n_value; + } + // TODO verify in TextBlock that the symbol is indeed dynamically bound. break :blk 0; // Dynamically bound by dyld. }; + break :blk macho_file.locals.items[atom.local_sym_index].n_value; }, } diff --git a/src/link/MachO/commands.zig b/src/link/MachO/commands.zig index 6e75af08c4..b50ce95acf 100644 --- a/src/link/MachO/commands.zig +++ b/src/link/MachO/commands.zig @@ -337,7 +337,7 @@ pub const SegmentCommand = struct { return null; } - pub fn findFreeSpace(self: SegmentCommand, object_size: u64, min_alignment: u16, start: ?u64) u64 { + pub fn findFreeSpace(self: SegmentCommand, object_size: u64, min_alignment: u32, start: ?u64) u64 { var st: u64 = if (start) |v| v else self.inner.fileoff; while (self.detectAllocCollision(st, object_size)) |item_end| { st = mem.alignForwardGeneric(u64, item_end, min_alignment);