macho: cleanup export trie generation and parsing

Now, ExportTrie is becoming usable for larger linking contexts such
as linking in multiple object files, or relinking dylibs, etc.
This commit is contained in:
Jakub Konka 2020-12-08 16:52:50 +01:00
parent ae3fd86dcc
commit 4c3e6c5bff
3 changed files with 263 additions and 115 deletions

View File

@ -1333,6 +1333,15 @@ pub const N_WEAK_DEF: u16 = 0x80;
/// This bit is only available in .o files (MH_OBJECT filetype)
pub const N_SYMBOL_RESOLVER: u16 = 0x100;
// The following are used on the flags byte of a terminal node // in the export information.
pub const EXPORT_SYMBOL_FLAGS_KIND_MASK: u8 = 0x03;
pub const EXPORT_SYMBOL_FLAGS_KIND_REGULAR: u8 = 0x00;
pub const EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL: u8 = 0x01;
pub const EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE: u8 = 0x02;
pub const EXPORT_SYMBOL_FLAGS_KIND_WEAK_DEFINITION: u8 = 0x04;
pub const EXPORT_SYMBOL_FLAGS_REEXPORT: u8 = 0x08;
pub const EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER: u8 = 0x10;
// Codesign consts and structs taken from:
// https://opensource.apple.com/source/xnu/xnu-6153.81.5/osfmk/kern/cs_blobs.h.auto.html

View File

@ -754,13 +754,9 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void {
const after_last_cmd_offset = self.header.?.sizeofcmds + @sizeOf(macho.mach_header_64);
const needed_size = @sizeOf(macho.linkedit_data_command);
if (needed_size + after_last_cmd_offset > text_section.offset) {
// TODO We are in the position to be able to increase the padding by moving all sections
// by the required offset, but this requires a little bit more thinking and bookkeeping.
// For now, return an error informing the user of the problem.
log.err("Not enough padding between load commands and start of __text section:\n", .{});
log.err("Offset after last load command: 0x{x}\n", .{after_last_cmd_offset});
log.err("Beginning of __text section: 0x{x}\n", .{text_section.offset});
log.err("Needed size: 0x{x}\n", .{needed_size});
std.log.err("Unable to extend padding between load commands and start of __text section.", .{});
std.log.err("Re-run the linker with '-headerpad 0x{x}' option if available, or", .{needed_size * alloc_num / alloc_den});
std.log.err("fall back to the system linker.", .{});
return error.NotEnoughPadding;
}
const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
@ -1799,38 +1795,36 @@ fn writeCodeSignature(self: *MachO) !void {
fn writeExportTrie(self: *MachO) !void {
if (self.global_symbols.items.len == 0) return;
var trie: Trie = .{};
defer trie.deinit(self.base.allocator);
var trie = Trie.init(self.base.allocator);
defer trie.deinit();
const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
for (self.global_symbols.items) |symbol| {
// TODO figure out if we should put all global symbols into the export trie
const name = self.getString(symbol.n_strx);
assert(symbol.n_value >= text_segment.inner.vmaddr);
try trie.put(self.base.allocator, .{
try trie.put(.{
.name = name,
.vmaddr_offset = symbol.n_value - text_segment.inner.vmaddr,
.export_flags = 0, // TODO workout creation of export flags
});
}
var buffer: std.ArrayListUnmanaged(u8) = .{};
defer buffer.deinit(self.base.allocator);
try trie.writeULEB128Mem(self.base.allocator, &buffer);
var buffer = try trie.writeULEB128Mem();
defer self.base.allocator.free(buffer);
const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
const export_size = @intCast(u32, mem.alignForward(buffer.items.len, @sizeOf(u64)));
const export_size = @intCast(u32, mem.alignForward(buffer.len, @sizeOf(u64)));
dyld_info.export_off = self.linkedit_segment_next_offset.?;
dyld_info.export_size = export_size;
log.debug("writing export trie from 0x{x} to 0x{x}\n", .{ dyld_info.export_off, dyld_info.export_off + export_size });
if (export_size > buffer.items.len) {
if (export_size > buffer.len) {
// Pad out to align(8).
try self.base.file.?.pwriteAll(&[_]u8{0}, dyld_info.export_off + export_size);
}
try self.base.file.?.pwriteAll(buffer.items, dyld_info.export_off);
try self.base.file.?.pwriteAll(buffer, dyld_info.export_off);
self.linkedit_segment_next_offset = dyld_info.export_off + dyld_info.export_size;
// Advance size of __LINKEDIT segment
@ -1917,7 +1911,9 @@ fn parseFromFile(self: *MachO, file: fs.File) !void {
switch (cmd.cmd()) {
macho.LC_SEGMENT_64 => {
const x = cmd.Segment;
if (isSegmentOrSection(&x.inner.segname, "__LINKEDIT")) {
if (isSegmentOrSection(&x.inner.segname, "__PAGEZERO")) {
self.pagezero_segment_cmd_index = i;
} else if (isSegmentOrSection(&x.inner.segname, "__LINKEDIT")) {
self.linkedit_segment_cmd_index = i;
} else if (isSegmentOrSection(&x.inner.segname, "__TEXT")) {
self.text_segment_cmd_index = i;
@ -1926,16 +1922,48 @@ fn parseFromFile(self: *MachO, file: fs.File) !void {
self.text_section_index = @intCast(u16, j);
}
}
} else if (isSegmentOrSection(&x.inner.segname, "__DATA")) {
self.data_segment_cmd_index = i;
}
},
macho.LC_DYLD_INFO_ONLY => {
self.dyld_info_cmd_index = i;
},
macho.LC_SYMTAB => {
self.symtab_cmd_index = i;
},
macho.LC_DYSYMTAB => {
self.dysymtab_cmd_index = i;
},
macho.LC_LOAD_DYLINKER => {
self.dylinker_cmd_index = i;
},
macho.LC_VERSION_MIN_MACOSX, macho.LC_VERSION_MIN_IPHONEOS, macho.LC_VERSION_MIN_WATCHOS, macho.LC_VERSION_MIN_TVOS => {
self.version_min_cmd_index = i;
},
macho.LC_SOURCE_VERSION => {
self.source_version_cmd_index = i;
},
macho.LC_MAIN => {
self.main_cmd_index = i;
},
macho.LC_LOAD_DYLIB => {
self.libsystem_cmd_index = i; // TODO This is incorrect, but we'll fixup later.
},
macho.LC_FUNCTION_STARTS => {
self.function_starts_cmd_index = i;
},
macho.LC_DATA_IN_CODE => {
self.data_in_code_cmd_index = i;
},
macho.LC_CODE_SIGNATURE => {
self.code_signature_cmd_index = i;
},
// TODO populate more MachO fields
else => {},
else => {
std.log.err("Unknown load command detected: 0x{x}.", .{cmd.cmd()});
return error.UnknownLoadCommand;
},
}
self.load_commands.appendAssumeCapacity(cmd);
}

View File

@ -44,20 +44,23 @@ pub const Symbol = struct {
export_flags: u64,
};
const Edge = struct {
pub const Edge = struct {
from: *Node,
to: *Node,
label: []const u8,
label: []u8,
fn deinit(self: *Edge, alloc: *Allocator) void {
self.to.deinit(alloc);
alloc.destroy(self.to);
fn deinit(self: *Edge, allocator: *Allocator) void {
self.to.deinit();
allocator.destroy(self.to);
allocator.free(self.label);
self.from = undefined;
self.to = undefined;
self.label = undefined;
}
};
const Node = struct {
pub const Node = struct {
base: *Trie,
/// Export flags associated with this exported symbol (if any).
export_flags: ?u64 = null,
/// VM address offset wrt to the section this symbol is defined against (if any).
@ -67,73 +70,97 @@ const Node = struct {
/// List of all edges originating from this node.
edges: std.ArrayListUnmanaged(Edge) = .{},
fn deinit(self: *Node, alloc: *Allocator) void {
fn deinit(self: *Node) void {
for (self.edges.items) |*edge| {
edge.deinit(alloc);
edge.deinit(self.base.allocator);
}
self.edges.deinit(alloc);
self.edges.deinit(self.base.allocator);
}
const PutResult = struct {
/// Node reached at this stage of `put` op.
node: *Node,
/// Count of newly inserted nodes at this stage of `put` op.
node_count: usize,
};
/// Inserts a new node starting from `self`.
fn put(self: *Node, alloc: *Allocator, label: []const u8, node_count: usize) !PutResult {
var curr_node_count = node_count;
fn put(self: *Node, label: []const u8) !*Node {
// Check for match with edges from this node.
for (self.edges.items) |*edge| {
const match = mem.indexOfDiff(u8, edge.label, label) orelse return PutResult{
.node = edge.to,
.node_count = curr_node_count,
};
const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.to;
if (match == 0) continue;
if (match == edge.label.len) return edge.to.put(alloc, label[match..], curr_node_count);
if (match == edge.label.len) return edge.to.put(label[match..]);
// Found a match, need to splice up nodes.
// From: A -> B
// To: A -> C -> B
const mid = try alloc.create(Node);
mid.* = .{};
const to_label = edge.label;
const mid = try self.base.allocator.create(Node);
mid.* = .{ .base = self.base };
var to_label = try self.base.allocator.dupe(u8, edge.label[match..]);
self.base.allocator.free(edge.label);
const to_node = edge.to;
edge.to = mid;
edge.label = label[0..match];
curr_node_count += 1;
edge.label = try self.base.allocator.dupe(u8, label[0..match]);
self.base.node_count += 1;
try mid.edges.append(alloc, .{
try mid.edges.append(self.base.allocator, .{
.from = mid,
.to = to_node,
.label = to_label[match..],
.label = to_label,
});
if (match == label.len) {
return PutResult{ .node = to_node, .node_count = curr_node_count };
} else {
return mid.put(alloc, label[match..], curr_node_count);
}
return if (match == label.len) to_node else mid.put(label[match..]);
}
// Add a new node.
const node = try alloc.create(Node);
node.* = .{};
curr_node_count += 1;
const node = try self.base.allocator.create(Node);
node.* = .{ .base = self.base };
self.base.node_count += 1;
try self.edges.append(alloc, .{
try self.edges.append(self.base.allocator, .{
.from = self,
.to = node,
.label = label,
.label = try self.base.allocator.dupe(u8, label),
});
return PutResult{ .node = node, .node_count = curr_node_count };
return node;
}
fn fromByteStream(self: *Node, stream: anytype) Trie.FromByteStreamError!void {
self.trie_offset = try stream.getPos();
var reader = stream.reader();
const node_size = try leb.readULEB128(u64, reader);
if (node_size > 0) {
self.export_flags = try leb.readULEB128(u64, reader);
// TODO Parse flags.
self.vmaddr_offset = try leb.readULEB128(u64, reader);
}
const nedges = try reader.readByte();
self.base.node_count += nedges;
var i: usize = 0;
while (i < nedges) : (i += 1) {
var label = blk: {
var label_buf = std.ArrayList(u8).init(self.base.allocator);
while (true) {
const next = try reader.readByte();
if (next == @as(u8, 0))
break;
try label_buf.append(next);
}
break :blk label_buf.toOwnedSlice();
};
const seek_to = try leb.readULEB128(u64, reader);
const cur_pos = try stream.getPos();
try stream.seekTo(seek_to);
var node = try self.base.allocator.create(Node);
node.* = .{ .base = self.base };
try node.fromByteStream(stream);
try self.edges.append(self.base.allocator, .{
.from = self,
.to = node,
.label = label,
});
try stream.seekTo(cur_pos);
}
}
/// This method should only be called *after* updateOffset has been called!
/// In case this is not upheld, this method will panic.
fn writeULEB128Mem(self: Node, buffer: *std.ArrayListUnmanaged(u8)) !void {
fn writeULEB128Mem(self: Node, buffer: *std.ArrayList(u8)) !void {
assert(self.trie_offset != null); // You need to call updateOffset first.
if (self.vmaddr_offset) |offset| {
// Terminal node info: encode export flags and vmaddr offset of this symbol.
@ -221,64 +248,95 @@ const Node = struct {
/// the count always starts at 1.
node_count: usize = 1,
/// The root node of the trie.
root: Node = .{},
root: ?Node = null,
allocator: *Allocator,
pub fn init(allocator: *Allocator) Trie {
return .{ .allocator = allocator };
}
/// Insert a symbol into the trie, updating the prefixes in the process.
/// This operation may change the layout of the trie by splicing edges in
/// certain circumstances.
pub fn put(self: *Trie, alloc: *Allocator, symbol: Symbol) !void {
const res = try self.root.put(alloc, symbol.name, 0);
self.node_count += res.node_count;
res.node.vmaddr_offset = symbol.vmaddr_offset;
res.node.export_flags = symbol.export_flags;
pub fn put(self: *Trie, symbol: Symbol) !void {
if (self.root == null) {
self.root = .{ .base = self };
}
const node = try self.root.?.put(symbol.name);
node.vmaddr_offset = symbol.vmaddr_offset;
node.export_flags = symbol.export_flags;
}
const FromByteStreamError = error{
OutOfMemory,
EndOfStream,
Overflow,
};
/// Parse the trie from a byte stream.
pub fn fromByteStream(self: *Trie, stream: anytype) FromByteStreamError!void {
if (self.root == null) {
self.root = .{ .base = self };
}
return self.root.?.fromByteStream(stream);
}
/// Write the trie to a buffer ULEB128 encoded.
pub fn writeULEB128Mem(self: *Trie, alloc: *Allocator, buffer: *std.ArrayListUnmanaged(u8)) !void {
var ordered_nodes: std.ArrayListUnmanaged(*Node) = .{};
defer ordered_nodes.deinit(alloc);
try ordered_nodes.ensureCapacity(alloc, self.node_count);
walkInOrder(&self.root, &ordered_nodes);
/// Caller owns the memory and needs to free it.
pub fn writeULEB128Mem(self: *Trie) ![]u8 {
var ordered_nodes = try self.nodes();
defer self.allocator.free(ordered_nodes);
var offset: usize = 0;
var more: bool = true;
while (more) {
offset = 0;
more = false;
for (ordered_nodes.items) |node| {
for (ordered_nodes) |node| {
const res = node.updateOffset(offset);
offset += res.node_size;
if (res.updated) more = true;
}
}
try buffer.ensureCapacity(alloc, buffer.items.len + offset);
for (ordered_nodes.items) |node| {
try node.writeULEB128Mem(buffer);
var buffer = std.ArrayList(u8).init(self.allocator);
try buffer.ensureCapacity(offset);
for (ordered_nodes) |node| {
try node.writeULEB128Mem(&buffer);
}
return buffer.toOwnedSlice();
}
/// Walks the trie in DFS order gathering all nodes into a linear stream of nodes.
fn walkInOrder(node: *Node, list: *std.ArrayListUnmanaged(*Node)) void {
list.appendAssumeCapacity(node);
for (node.edges.items) |*edge| {
walkInOrder(edge.to, list);
pub fn nodes(self: *Trie) ![]*Node {
var ordered_nodes = std.ArrayList(*Node).init(self.allocator);
try ordered_nodes.ensureCapacity(self.node_count);
comptime const Fifo = std.fifo.LinearFifo(*Node, .{ .Static = std.math.maxInt(u8) });
var fifo = Fifo.init();
try fifo.writeItem(&self.root.?);
while (fifo.readItem()) |next| {
for (next.edges.items) |*edge| {
try fifo.writeItem(edge.to);
}
ordered_nodes.appendAssumeCapacity(next);
}
return ordered_nodes.toOwnedSlice();
}
pub fn deinit(self: *Trie, alloc: *Allocator) void {
self.root.deinit(alloc);
pub fn deinit(self: *Trie) void {
self.root.?.deinit();
}
test "Trie node count" {
var gpa = testing.allocator;
var trie: Trie = .{};
defer trie.deinit(gpa);
var trie = Trie.init(gpa);
defer trie.deinit();
testing.expectEqual(trie.node_count, 1);
try trie.put(gpa, .{
try trie.put(.{
.name = "_main",
.vmaddr_offset = 0,
.export_flags = 0,
@ -286,14 +344,14 @@ test "Trie node count" {
testing.expectEqual(trie.node_count, 2);
// Inserting the same node shouldn't update the trie.
try trie.put(gpa, .{
try trie.put(.{
.name = "_main",
.vmaddr_offset = 0,
.export_flags = 0,
});
testing.expectEqual(trie.node_count, 2);
try trie.put(gpa, .{
try trie.put(.{
.name = "__mh_execute_header",
.vmaddr_offset = 0x1000,
.export_flags = 0,
@ -301,13 +359,13 @@ test "Trie node count" {
testing.expectEqual(trie.node_count, 4);
// Inserting the same node shouldn't update the trie.
try trie.put(gpa, .{
try trie.put(.{
.name = "__mh_execute_header",
.vmaddr_offset = 0x1000,
.export_flags = 0,
});
testing.expectEqual(trie.node_count, 4);
try trie.put(gpa, .{
try trie.put(.{
.name = "_main",
.vmaddr_offset = 0,
.export_flags = 0,
@ -317,31 +375,28 @@ test "Trie node count" {
test "Trie basic" {
var gpa = testing.allocator;
var trie: Trie = .{};
defer trie.deinit(gpa);
// root
testing.expect(trie.root.edges.items.len == 0);
var trie = Trie.init(gpa);
defer trie.deinit();
// root --- _st ---> node
try trie.put(gpa, .{
try trie.put(.{
.name = "_st",
.vmaddr_offset = 0,
.export_flags = 0,
});
testing.expect(trie.root.edges.items.len == 1);
testing.expect(mem.eql(u8, trie.root.edges.items[0].label, "_st"));
testing.expect(trie.root.?.edges.items.len == 1);
testing.expect(mem.eql(u8, trie.root.?.edges.items[0].label, "_st"));
{
// root --- _st ---> node --- art ---> node
try trie.put(gpa, .{
try trie.put(.{
.name = "_start",
.vmaddr_offset = 0,
.export_flags = 0,
});
testing.expect(trie.root.edges.items.len == 1);
testing.expect(trie.root.?.edges.items.len == 1);
const nextEdge = &trie.root.edges.items[0];
const nextEdge = &trie.root.?.edges.items[0];
testing.expect(mem.eql(u8, nextEdge.label, "_st"));
testing.expect(nextEdge.to.edges.items.len == 1);
testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "art"));
@ -350,14 +405,14 @@ test "Trie basic" {
// root --- _ ---> node --- st ---> node --- art ---> node
// |
// | --- main ---> node
try trie.put(gpa, .{
try trie.put(.{
.name = "_main",
.vmaddr_offset = 0,
.export_flags = 0,
});
testing.expect(trie.root.edges.items.len == 1);
testing.expect(trie.root.?.edges.items.len == 1);
const nextEdge = &trie.root.edges.items[0];
const nextEdge = &trie.root.?.edges.items[0];
testing.expect(mem.eql(u8, nextEdge.label, "_"));
testing.expect(nextEdge.to.edges.items.len == 2);
testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "st"));
@ -370,24 +425,22 @@ test "Trie basic" {
test "Trie.writeULEB128Mem" {
var gpa = testing.allocator;
var trie: Trie = .{};
defer trie.deinit(gpa);
var trie = Trie.init(gpa);
defer trie.deinit();
try trie.put(gpa, .{
try trie.put(.{
.name = "__mh_execute_header",
.vmaddr_offset = 0,
.export_flags = 0,
});
try trie.put(gpa, .{
try trie.put(.{
.name = "_main",
.vmaddr_offset = 0x1000,
.export_flags = 0,
});
var buffer: std.ArrayListUnmanaged(u8) = .{};
defer buffer.deinit(gpa);
try trie.writeULEB128Mem(gpa, &buffer);
var buffer = try trie.writeULEB128Mem();
defer gpa.free(buffer);
const exp_buffer = [_]u8{
0x0,
@ -434,6 +487,64 @@ test "Trie.writeULEB128Mem" {
0x0,
};
testing.expect(buffer.items.len == exp_buffer.len);
testing.expect(mem.eql(u8, buffer.items, exp_buffer[0..]));
testing.expect(buffer.len == exp_buffer.len);
testing.expect(mem.eql(u8, buffer, exp_buffer[0..]));
}
test "parse Trie from byte stream" {
var gpa = testing.allocator;
const in_buffer = [_]u8{
0x0,
0x1,
0x5f,
0x0,
0x5,
0x0,
0x2,
0x5f,
0x6d,
0x68,
0x5f,
0x65,
0x78,
0x65,
0x63,
0x75,
0x74,
0x65,
0x5f,
0x68,
0x65,
0x61,
0x64,
0x65,
0x72,
0x0,
0x21,
0x6d,
0x61,
0x69,
0x6e,
0x0,
0x25,
0x2,
0x0,
0x0,
0x0,
0x3,
0x0,
0x80,
0x20,
0x0,
};
var stream = std.io.fixedBufferStream(in_buffer[0..]);
var trie = Trie.init(gpa);
defer trie.deinit();
try trie.fromByteStream(&stream);
var out_buffer = try trie.writeULEB128Mem();
defer gpa.free(out_buffer);
testing.expect(mem.eql(u8, in_buffer[0..], out_buffer));
}