macho: do not assume every object has a symtab

For example, building stage2 requires an empty `empty.cc` source file
compiling which generates a valid translation unit with no symtab/strtab.
In this case, we cannot simply assume that every translation unit will
have a valid symtab; instead, we cautiously default the input symtab
and strtab fields to optional `null` to signal symtab's presence or its lack of.
In case the symtab is not present, we catch this fact when splitting
input sections into subsections and create a synthetic symbol per every
suitable section.
This commit is contained in:
Jakub Konka 2022-09-17 16:37:26 +02:00
parent dfcadd22bb
commit 5391541f11
2 changed files with 63 additions and 10 deletions

View File

@ -24,8 +24,11 @@ mtime: u64,
contents: []align(@alignOf(u64)) const u8,
header: macho.mach_header_64 = undefined,
in_symtab: []align(1) const macho.nlist_64 = undefined,
in_strtab: []const u8 = undefined,
/// Symtab and strtab might not exist for empty object files so we use an optional
/// to signal this.
in_symtab: ?[]align(1) const macho.nlist_64 = null,
in_strtab: ?[]const u8 = null,
symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{},
sections: std.ArrayListUnmanaged(macho.section_64) = .{},
@ -105,7 +108,7 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch)
self.contents.ptr + symtab.symoff,
)[0..symtab.nsyms];
self.in_strtab = self.contents[symtab.stroff..][0..symtab.strsize];
try self.symtab.appendUnalignedSlice(allocator, self.in_symtab);
try self.symtab.appendUnalignedSlice(allocator, self.in_symtab.?);
},
else => {},
}
@ -243,6 +246,50 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32)
log.debug("splitting object({d}, {s}) into atoms: one-shot mode", .{ object_id, self.name });
const in_symtab = self.in_symtab orelse {
for (self.sections.items) |sect, id| {
if (sect.isDebug()) continue;
const match = (try macho_file.getOutputSection(sect)) orelse {
log.debug(" unhandled section", .{});
continue;
};
if (sect.size == 0) continue;
const sect_id = @intCast(u8, id);
const sym_index = self.sections_as_symbols.get(sect_id) orelse blk: {
const sym_index = @intCast(u32, self.symtab.items.len);
try self.symtab.append(gpa, .{
.n_strx = 0,
.n_type = macho.N_SECT,
.n_sect = match + 1,
.n_desc = 0,
.n_value = sect.addr,
});
try self.sections_as_symbols.putNoClobber(gpa, sect_id, sym_index);
break :blk sym_index;
};
const code: ?[]const u8 = if (!sect.isZerofill()) try self.getSectionContents(sect) else null;
const relocs = @ptrCast(
[*]align(1) const macho.relocation_info,
self.contents.ptr + sect.reloff,
)[0..sect.nreloc];
const atom = try self.createAtomFromSubsection(
macho_file,
object_id,
sym_index,
sect.size,
sect.@"align",
code,
relocs,
&.{},
match,
sect,
);
try macho_file.addAtomToSection(atom, match);
}
return;
};
// You would expect that the symbol table is at least pre-sorted based on symbol's type:
// local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance,
// the GO compiler does not necessarily respect that therefore we sort immediately by type
@ -250,10 +297,10 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32)
const context = Context{
.object = self,
};
var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(gpa, self.in_symtab.len);
var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(gpa, in_symtab.len);
defer sorted_all_syms.deinit();
for (self.in_symtab) |_, index| {
for (in_symtab) |_, index| {
sorted_all_syms.appendAssumeCapacity(.{ .index = @intCast(u32, index) });
}
@ -282,6 +329,8 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32)
const subsections_via_symbols = self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0;
for (self.sections.items) |sect, id| {
if (sect.isDebug()) continue;
const sect_id = @intCast(u8, id);
log.debug("splitting section '{s},{s}' into atoms", .{ sect.segName(), sect.sectName() });
@ -530,8 +579,9 @@ fn createAtomFromSubsection(
}
pub fn getSourceSymbol(self: Object, index: u32) ?macho.nlist_64 {
if (index >= self.in_symtab.len) return null;
return self.in_symtab[index];
const symtab = self.in_symtab.?;
if (index >= symtab.len) return null;
return symtab[index];
}
pub fn getSourceSection(self: Object, index: u16) macho.section_64 {
@ -636,8 +686,9 @@ pub fn getSectionContents(self: Object, sect: macho.section_64) error{Overflow}!
}
pub fn getString(self: Object, off: u32) []const u8 {
assert(off < self.in_strtab.len);
return mem.sliceTo(@ptrCast([*:0]const u8, self.in_strtab.ptr + off), 0);
const strtab = self.in_strtab.?;
assert(off < strtab.len);
return mem.sliceTo(@ptrCast([*:0]const u8, strtab.ptr + off), 0);
}
pub fn getAtomForSymbol(self: Object, sym_index: u32) ?*Atom {

View File

@ -179,7 +179,9 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac
loop = false;
for (macho_file.objects.items) |object| {
for (object.in_symtab) |_, source_index| {
const in_symtab = object.in_symtab orelse continue;
for (in_symtab) |_, source_index| {
const atom = object.getAtomForSymbol(@intCast(u32, source_index)) orelse continue;
if (alive.contains(atom)) continue;