macho: some docs

This commit is contained in:
Jakub Konka 2022-10-22 14:53:53 +02:00
parent 086cee844a
commit 593b75b109
4 changed files with 54 additions and 16 deletions

View File

@ -1,3 +1,7 @@
//! Represents an input relocatable Object file.
//! Each Object is fully loaded into memory for easier
//! access into different data within.
const Object = @This();
const std = @import("std");
@ -278,6 +282,9 @@ fn sectionLessThanByAddress(ctx: void, lhs: SortedSection, rhs: SortedSection) b
return lhs.header.addr < rhs.header.addr;
}
/// Splits input sections into Atoms.
/// If the Object was compiled with `MH_SUBSECTIONS_VIA_SYMBOLS`, splits section
/// into subsections where each subsection then represents an Atom.
pub fn splitIntoAtoms(self: *Object, zld: *Zld, object_id: u31) !void {
const gpa = zld.gpa;
@ -514,6 +521,7 @@ pub fn getSourceSymbol(self: Object, index: u32) ?macho.nlist_64 {
return symtab[mapped_index];
}
/// Expects an arena allocator.
/// Caller owns memory.
pub fn createReverseSymbolLookup(self: Object, arena: Allocator) ![]u32 {
const symtab = self.in_symtab orelse return &[0]u32{};

View File

@ -1,3 +1,9 @@
//! An atom is a single smallest unit of measure that will get an
//! allocated virtual memory address in the final linked image.
//! For example, we parse each input section within an input relocatable
//! object file into a set of atoms which are then laid out contiguously
//! as they were defined in the input file.
const Atom = @This();
const std = @import("std");
@ -17,33 +23,38 @@ const Object = @import("Object.zig");
const SymbolWithLoc = @import("zld.zig").SymbolWithLoc;
const Zld = @import("zld.zig").Zld;
/// Each decl always gets a local symbol with the fully qualified name.
/// The vaddr and size are found here directly.
/// The file offset is found by computing the vaddr offset from the section vaddr
/// the symbol references, and adding that to the file offset of the section.
/// If this field is 0, it means the codegen size = 0 and there is no symbol or
/// offset table entry.
/// Each Atom always gets a symbol with the fully qualified name.
/// The symbol can reside in any object file context structure in `symtab` array
/// (see `Object`), or if the symbol is a synthetic symbol such as a GOT cell or
/// a stub trampoline, it can be found in the linkers `locals` arraylist.
sym_index: u32,
/// If this Atom references a subsection in an Object file, `nsyms_trailing`
/// tells how many symbols trailing `sym_index` fall within this Atom's address
/// range.
/// -1 means an Atom is a synthetic Atom such as a GOT cell defined by the linker.
/// Otherwise, it is the index into appropriate object file.
/// Prefer using `getFile()` helper to get the file index out rather than using
/// the field directly.
file: i32,
/// If this Atom is not a synthetic Atom, i.e., references a subsection in an
/// Object file, `inner_sym_index` and `inner_nsyms_trailing` tell where and if
/// this Atom contains any additional symbol references that fall within this Atom's
/// address range. These could for example be an alias symbol which can be used
/// internally by the relocation records, or if the Object file couldn't be split
/// into subsections, this Atom may encompass an entire input section.
inner_sym_index: u32,
inner_nsyms_trailing: u32,
/// -1 means symbol defined by the linker.
/// Otherwise, it is the index into appropriate object file.
file: i32,
/// Size and alignment of this atom
/// Unlike in Elf, we need to store the size of this symbol as part of
/// the atom since macho.nlist_64 lacks this information.
/// Size of this atom.
size: u64,
/// Alignment of this atom as a power of 2.
/// For instance, aligmment of 0 should be read as 2^0 = 1 byte aligned.
alignment: u32,
/// Cached index and length into the relocations records array that correspond to
/// this Atom and need to be resolved before the Atom can be committed into the
/// final linked image.
/// Do not use these fields directly. Instead, use `getAtomRelocs()` helper.
cached_relocs_start: i32,
cached_relocs_len: u32,
@ -64,6 +75,8 @@ pub const empty = Atom{
.next_index = null,
};
/// Returns `null` if the Atom is a synthetic Atom.
/// Otherwise, returns an index into an array of Objects.
pub inline fn getFile(self: Atom) ?u31 {
if (self.file == -1) return null;
return @intCast(u31, self.file);
@ -90,6 +103,8 @@ const InnerSymIterator = struct {
}
};
/// Returns an iterator over potentially contained symbols.
/// Panics when called on a synthetic Atom.
pub fn getInnerSymbolsIterator(zld: *Zld, atom_index: AtomIndex) InnerSymIterator {
const atom = zld.getAtom(atom_index);
assert(atom.getFile() != null);
@ -100,6 +115,10 @@ pub fn getInnerSymbolsIterator(zld: *Zld, atom_index: AtomIndex) InnerSymIterato
};
}
/// Returns a section alias symbol if one is defined.
/// An alias symbol is used to represent the start of an input section
/// if there were no symbols defined within that range.
/// Alias symbols are only used on x86_64.
pub fn getSectionAlias(zld: *Zld, atom_index: AtomIndex) ?SymbolWithLoc {
const atom = zld.getAtom(atom_index);
assert(atom.getFile() != null);
@ -119,6 +138,8 @@ pub fn getSectionAlias(zld: *Zld, atom_index: AtomIndex) ?SymbolWithLoc {
return null;
}
/// Given an index into a contained symbol within, calculates an offset wrt
/// the start of this Atom.
pub fn calcInnerSymbolOffset(zld: *Zld, atom_index: AtomIndex, sym_index: u32) u64 {
const atom = zld.getAtom(atom_index);
assert(atom.getFile() != null);

View File

@ -1,3 +1,5 @@
//! An algorithm for dead stripping of unreferenced Atoms.
const std = @import("std");
const assert = std.debug.assert;
const log = std.log.scoped(.dead_strip);

View File

@ -1,3 +1,10 @@
//! An algorithm for allocating output machine code section (aka `__TEXT,__text`),
//! and insertion of range extending thunks. As such, this algorithm is only run
//! for a target that requires range extenders such as arm64.
//!
//! The algorithm works pessimistically and assumes that any reference to an Atom in
//! another output section is out of range.
const std = @import("std");
const assert = std.debug.assert;
const log = std.log.scoped(.thunks);