zld: refactor object and archive parsing

This commit is contained in:
Jakub Konka 2021-03-26 21:39:07 +01:00
parent 988b184d03
commit b0105029ca
3 changed files with 241 additions and 184 deletions

View File

@ -9,16 +9,14 @@ const mem = std.mem;
const Allocator = mem.Allocator;
const Object = @import("Object.zig");
const parseName = @import("Zld.zig").parseName;
usingnamespace @import("commands.zig");
allocator: *Allocator,
file: fs.File,
header: ar_hdr,
name: []u8,
objects: std.ArrayListUnmanaged(Object) = .{},
arch: ?std.Target.Cpu.Arch = null,
file: ?fs.File = null,
header: ?ar_hdr = null,
name: ?[]u8 = null,
/// Parsed table of contents.
/// Each symbol name points to a list of all definition
@ -29,14 +27,14 @@ toc: std.StringArrayHashMapUnmanaged(std.ArrayListUnmanaged(u32)) = .{},
// `struct ar_hdr', and as many bytes of member file data as its `ar_size'
// member indicates, for each member file.
/// String that begins an archive file.
const ARMAG: *const [SARMAG:0]u8 = "!<arch>\n";
pub const ARMAG: *const [SARMAG:0]u8 = "!<arch>\n";
/// Size of that string.
const SARMAG: u4 = 8;
pub const SARMAG: u4 = 8;
/// String in ar_fmag at the end of each header.
const ARFMAG: *const [2:0]u8 = "`\n";
pub const ARFMAG: *const [2:0]u8 = "`\n";
const ar_hdr = extern struct {
pub const ar_hdr = extern struct {
/// Member file name, sometimes / terminated.
ar_name: [16]u8,
@ -87,64 +85,91 @@ const ar_hdr = extern struct {
}
};
pub fn init(allocator: *Allocator) Archive {
return .{
.allocator = allocator,
};
}
pub fn deinit(self: *Archive) void {
self.allocator.free(self.name);
for (self.objects.items) |*object| {
object.deinit();
}
self.objects.deinit(self.allocator);
for (self.toc.items()) |*entry| {
self.allocator.free(entry.key);
entry.value.deinit(self.allocator);
}
self.toc.deinit(self.allocator);
if (self.name) |n| {
self.allocator.free(n);
}
}
/// Caller owns the returned Archive instance and is responsible for calling
/// `deinit` to free allocated memory.
pub fn initFromFile(allocator: *Allocator, arch: std.Target.Cpu.Arch, ar_name: []const u8, file: fs.File) !Archive {
var reader = file.reader();
var magic = try readMagic(allocator, reader);
defer allocator.free(magic);
pub fn closeFile(self: Archive) void {
if (self.file) |f| {
f.close();
}
}
if (!mem.eql(u8, magic, ARMAG)) {
// Reset file cursor.
try file.seekTo(0);
return error.NotArchive;
pub fn parse(self: *Archive) !void {
var reader = self.file.?.reader();
const magic = try reader.readBytesNoEof(SARMAG);
if (!mem.eql(u8, &magic, ARMAG)) {
log.err("invalid magic: expected '{s}', found '{s}'", .{ ARMAG, magic });
return error.MalformedArchive;
}
const header = try reader.readStruct(ar_hdr);
self.header = try reader.readStruct(ar_hdr);
if (!mem.eql(u8, &header.ar_fmag, ARFMAG))
if (!mem.eql(u8, &self.header.?.ar_fmag, ARFMAG)) {
log.err("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, self.header.?.ar_fmag });
return error.MalformedArchive;
}
var embedded_name = try getName(allocator, header, reader);
log.debug("parsing archive '{s}' at '{s}'", .{ embedded_name, ar_name });
defer allocator.free(embedded_name);
var name = try allocator.dupe(u8, ar_name);
var self = Archive{
.allocator = allocator,
.file = file,
.header = header,
.name = name,
};
var embedded_name = try parseName(self.allocator, self.header.?, reader);
log.warn("parsing archive '{s}' at '{s}'", .{ embedded_name, self.name.? });
defer self.allocator.free(embedded_name);
try self.parseTableOfContents(reader);
return self;
try reader.context.seekTo(0);
}
fn parseName(allocator: *Allocator, header: ar_hdr, reader: anytype) ![]u8 {
const name_or_length = try header.nameOrLength();
var name: []u8 = undefined;
switch (name_or_length) {
.Name => |n| {
name = try allocator.dupe(u8, n);
},
.Length => |len| {
var n = try allocator.alloc(u8, len);
defer allocator.free(n);
try reader.readNoEof(n);
const actual_len = mem.indexOfScalar(u8, n, @as(u8, 0)) orelse n.len;
name = try allocator.dupe(u8, n[0..actual_len]);
},
}
return name;
}
fn parseTableOfContents(self: *Archive, reader: anytype) !void {
const symtab_size = try reader.readIntLittle(u32);
var symtab = try self.allocator.alloc(u8, symtab_size);
defer self.allocator.free(symtab);
try reader.readNoEof(symtab);
reader.readNoEof(symtab) catch {
log.err("incomplete symbol table: expected symbol table of length 0x{x}", .{symtab_size});
return error.MalformedArchive;
};
const strtab_size = try reader.readIntLittle(u32);
var strtab = try self.allocator.alloc(u8, strtab_size);
defer self.allocator.free(strtab);
try reader.readNoEof(strtab);
reader.readNoEof(strtab) catch {
log.err("incomplete symbol table: expected string table of length 0x{x}", .{strtab_size});
return error.MalformedArchive;
};
var symtab_stream = std.io.fixedBufferStream(symtab);
var symtab_reader = symtab_stream.reader();
@ -169,85 +194,29 @@ fn parseTableOfContents(self: *Archive, reader: anytype) !void {
}
}
fn readObject(self: *Archive, arch: std.Target.Cpu.Arch, ar_name: []const u8, reader: anytype) !void {
/// Caller owns the Object instance.
pub fn parseObject(self: Archive, offset: u32) !Object {
var reader = self.file.?.reader();
try reader.context.seekTo(offset);
const object_header = try reader.readStruct(ar_hdr);
if (!mem.eql(u8, &object_header.ar_fmag, ARFMAG))
if (!mem.eql(u8, &object_header.ar_fmag, ARFMAG)) {
log.err("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, object_header.ar_fmag });
return error.MalformedArchive;
var object_name = try getName(self.allocator, object_header, reader);
log.debug("extracting object '{s}' from archive '{s}'", .{ object_name, self.name });
const offset = @intCast(u32, try reader.context.getPos());
const header = try reader.readStruct(macho.mach_header_64);
const this_arch: std.Target.Cpu.Arch = switch (header.cputype) {
macho.CPU_TYPE_ARM64 => .aarch64,
macho.CPU_TYPE_X86_64 => .x86_64,
else => |value| {
log.err("unsupported cpu architecture 0x{x}", .{value});
return error.UnsupportedCpuArchitecture;
},
};
if (this_arch != arch) {
log.err("mismatched cpu architecture: found {s}, expected {s}", .{ this_arch, arch });
return error.MismatchedCpuArchitecture;
}
// TODO Implement std.fs.File.clone() or similar.
var new_file = try fs.cwd().openFile(ar_name, .{});
var object = Object{
.allocator = self.allocator,
.name = object_name,
.ar_name = try mem.dupe(self.allocator, u8, ar_name),
.file = new_file,
.header = header,
};
const object_name = try parseName(self.allocator, object_header, reader);
log.warn("extracting object '{s}' from archive '{s}'", .{ object_name, self.name.? });
try object.readLoadCommands(reader, .{ .offset = offset });
var object = Object.init(self.allocator);
object.arch = self.arch.?;
object.file = try fs.cwd().openFile(self.name.?, .{});
object.name = object_name;
object.file_offset = @intCast(u32, try reader.context.getPos());
try object.parse();
if (object.symtab_cmd_index != null) {
try object.readSymtab();
try object.readStrtab();
}
try reader.context.seekTo(0);
if (object.data_in_code_cmd_index != null) try object.readDataInCode();
log.debug("\n\n", .{});
log.debug("{s} defines symbols", .{object.name});
for (object.symtab.items) |sym| {
const symname = object.getString(sym.n_strx);
log.debug("'{s}': {}", .{ symname, sym });
}
try self.objects.append(self.allocator, object);
}
fn readMagic(allocator: *Allocator, reader: anytype) ![]u8 {
var magic = std.ArrayList(u8).init(allocator);
try magic.ensureCapacity(SARMAG);
var i: usize = 0;
while (i < SARMAG) : (i += 1) {
const next = try reader.readByte();
magic.appendAssumeCapacity(next);
}
return magic.toOwnedSlice();
}
fn getName(allocator: *Allocator, header: ar_hdr, reader: anytype) ![]u8 {
const name_or_length = try header.nameOrLength();
var name: []u8 = undefined;
switch (name_or_length) {
.Name => |n| {
name = try allocator.dupe(u8, n);
},
.Length => |len| {
var n = try allocator.alloc(u8, len);
defer allocator.free(n);
try reader.readNoEof(n);
const actual_len = mem.indexOfScalar(u8, n, @as(u8, 0)) orelse n.len;
name = try allocator.dupe(u8, n[0..actual_len]);
},
}
return name;
return object;
}

View File

@ -15,11 +15,11 @@ const parseName = @import("Zld.zig").parseName;
usingnamespace @import("commands.zig");
allocator: *Allocator,
file: fs.File,
name: []u8,
ar_name: ?[]u8 = null,
header: macho.mach_header_64,
arch: ?std.Target.Cpu.Arch = null,
header: ?macho.mach_header_64 = null,
file: ?fs.File = null,
file_offset: ?u32 = null,
name: ?[]u8 = null,
load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
@ -42,6 +42,12 @@ strtab: std.ArrayListUnmanaged(u8) = .{},
data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
pub fn init(allocator: *Allocator) Object {
return .{
.allocator = allocator,
};
}
pub fn deinit(self: *Object) void {
for (self.load_commands.items) |*lc| {
lc.deinit(self.allocator);
@ -50,25 +56,32 @@ pub fn deinit(self: *Object) void {
self.symtab.deinit(self.allocator);
self.strtab.deinit(self.allocator);
self.data_in_code_entries.deinit(self.allocator);
self.allocator.free(self.name);
if (self.ar_name) |v| {
self.allocator.free(v);
if (self.name) |n| {
self.allocator.free(n);
}
}
/// Caller owns the returned Object instance and is responsible for calling
/// `deinit` to free allocated memory.
pub fn initFromFile(allocator: *Allocator, arch: std.Target.Cpu.Arch, name: []const u8, file: fs.File) !Object {
var reader = file.reader();
const header = try reader.readStruct(macho.mach_header_64);
pub fn closeFile(self: Object) void {
if (self.file) |f| {
f.close();
}
}
if (header.filetype != macho.MH_OBJECT) {
// Reset file cursor.
try file.seekTo(0);
return error.NotObject;
pub fn parse(self: *Object) !void {
var reader = self.file.?.reader();
if (self.file_offset) |offset| {
try reader.context.seekTo(offset);
}
const this_arch: std.Target.Cpu.Arch = switch (header.cputype) {
self.header = try reader.readStruct(macho.mach_header_64);
if (self.header.?.filetype != macho.MH_OBJECT) {
log.err("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_OBJECT, self.header.?.filetype });
return error.MalformedObject;
}
const this_arch: std.Target.Cpu.Arch = switch (self.header.?.cputype) {
macho.CPU_TYPE_ARM64 => .aarch64,
macho.CPU_TYPE_X86_64 => .x86_64,
else => |value| {
@ -76,35 +89,22 @@ pub fn initFromFile(allocator: *Allocator, arch: std.Target.Cpu.Arch, name: []co
return error.UnsupportedCpuArchitecture;
},
};
if (this_arch != arch) {
log.err("mismatched cpu architecture: found {s}, expected {s}", .{ this_arch, arch });
if (this_arch != self.arch.?) {
log.err("mismatched cpu architecture: expected {s}, found {s}", .{ self.arch.?, this_arch });
return error.MismatchedCpuArchitecture;
}
var self = Object{
.allocator = allocator,
.name = try allocator.dupe(u8, name),
.file = file,
.header = header,
};
try self.readLoadCommands(reader, .{});
try self.readLoadCommands(reader);
if (self.symtab_cmd_index != null) try self.parseSymtab();
if (self.data_in_code_cmd_index != null) try self.readDataInCode();
return self;
}
pub const ReadOffset = struct {
offset: ?u32 = null,
};
pub fn readLoadCommands(self: *Object, reader: anytype, offset: ReadOffset) !void {
const offset_mod = offset.offset orelse 0;
try self.load_commands.ensureCapacity(self.allocator, self.header.ncmds);
pub fn readLoadCommands(self: *Object, reader: anytype) !void {
const offset = self.file_offset orelse 0;
try self.load_commands.ensureCapacity(self.allocator, self.header.?.ncmds);
var i: u16 = 0;
while (i < self.header.ncmds) : (i += 1) {
while (i < self.header.?.ncmds) : (i += 1) {
var cmd = try LoadCommand.read(self.allocator, reader);
switch (cmd.cmd()) {
macho.LC_SEGMENT_64 => {
@ -132,17 +132,18 @@ pub fn readLoadCommands(self: *Object, reader: anytype, offset: ReadOffset) !voi
}
}
sect.offset += offset_mod;
if (sect.reloff > 0)
sect.reloff += offset_mod;
sect.offset += offset;
if (sect.reloff > 0) {
sect.reloff += offset;
}
}
seg.inner.fileoff += offset_mod;
seg.inner.fileoff += offset;
},
macho.LC_SYMTAB => {
self.symtab_cmd_index = i;
cmd.Symtab.symoff += offset_mod;
cmd.Symtab.stroff += offset_mod;
cmd.Symtab.symoff += offset;
cmd.Symtab.stroff += offset;
},
macho.LC_DYSYMTAB => {
self.dysymtab_cmd_index = i;
@ -152,7 +153,7 @@ pub fn readLoadCommands(self: *Object, reader: anytype, offset: ReadOffset) !voi
},
macho.LC_DATA_IN_CODE => {
self.data_in_code_cmd_index = i;
cmd.LinkeditData.dataoff += offset_mod;
cmd.LinkeditData.dataoff += offset;
},
else => {
log.debug("Unknown load command detected: 0x{x}.", .{cmd.cmd()});
@ -168,7 +169,7 @@ pub fn parseSymtab(self: *Object) !void {
var symtab = try self.allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms);
defer self.allocator.free(symtab);
_ = try self.file.preadAll(symtab, symtab_cmd.symoff);
_ = try self.file.?.preadAll(symtab, symtab_cmd.symoff);
try self.symtab.ensureCapacity(self.allocator, symtab_cmd.nsyms);
var stream = std.io.fixedBufferStream(symtab);
@ -187,7 +188,7 @@ pub fn parseSymtab(self: *Object) !void {
var strtab = try self.allocator.alloc(u8, symtab_cmd.strsize);
defer self.allocator.free(strtab);
_ = try self.file.preadAll(strtab, symtab_cmd.stroff);
_ = try self.file.?.preadAll(strtab, symtab_cmd.stroff);
try self.strtab.appendSlice(self.allocator, strtab);
}
@ -200,7 +201,7 @@ pub fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 {
const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
const sect = seg.sections.items[index];
var buffer = try allocator.alloc(u8, sect.size);
_ = try self.file.preadAll(buffer, sect.offset);
_ = try self.file.?.preadAll(buffer, sect.offset);
return buffer;
}
@ -211,7 +212,7 @@ pub fn readDataInCode(self: *Object) !void {
var buffer = try self.allocator.alloc(u8, data_in_code.datasize);
defer self.allocator.free(buffer);
_ = try self.file.preadAll(buffer, data_in_code.dataoff);
_ = try self.file.?.preadAll(buffer, data_in_code.dataoff);
var stream = io.fixedBufferStream(buffer);
var reader = stream.reader();

View File

@ -225,14 +225,14 @@ pub fn deinit(self: *Zld) void {
self.undefs.deinit(self.allocator);
}
pub fn closeFiles(self: *Zld) void {
for (self.objects.items) |*object| {
object.file.close();
pub fn closeFiles(self: Zld) void {
for (self.objects.items) |object| {
object.closeFile();
}
for (self.archives.items) |*archive| {
archive.file.close();
for (self.archives.items) |archive| {
archive.closeFile();
}
if (self.file) |*f| f.close();
if (self.file) |f| f.close();
}
pub fn link(self: *Zld, files: []const []const u8, out_path: []const u8) !void {
@ -272,6 +272,7 @@ pub fn link(self: *Zld, files: []const []const u8, out_path: []const u8) !void {
try self.populateMetadata();
try self.parseInputFiles(files);
try self.resolveSymbols();
self.printSymtab();
// try self.sortSections();
// try self.allocateTextSegment();
@ -284,31 +285,76 @@ pub fn link(self: *Zld, files: []const []const u8, out_path: []const u8) !void {
}
fn parseInputFiles(self: *Zld, files: []const []const u8) !void {
const Input = struct {
kind: enum {
object,
archive,
},
file: fs.File,
name: []const u8,
};
var classified = std.ArrayList(Input).init(self.allocator);
defer classified.deinit();
// First, classify input files as either object or archive.
for (files) |file_name| {
const file = try fs.cwd().openFile(file_name, .{});
try_object: {
var object = Object.initFromFile(self.allocator, self.arch.?, file_name, file) catch |err| switch (err) {
error.NotObject => break :try_object,
else => |e| return e,
};
const index = @intCast(u16, self.objects.items.len);
try self.objects.append(self.allocator, object);
try self.resolveSymbols(index);
const header = try file.reader().readStruct(macho.mach_header_64);
if (header.filetype != macho.MH_OBJECT) {
try file.seekTo(0);
break :try_object;
}
try file.seekTo(0);
try classified.append(.{
.kind = .object,
.file = file,
.name = file_name,
});
continue;
}
try_archive: {
var archive = Archive.initFromFile(self.allocator, self.arch.?, file_name, file) catch |err| switch (err) {
error.NotArchive => break :try_archive,
else => |e| return e,
};
try self.archives.append(self.allocator, archive);
const magic = try file.reader().readBytesNoEof(Archive.SARMAG);
if (!mem.eql(u8, &magic, Archive.ARMAG)) {
try file.seekTo(0);
break :try_archive;
}
try file.seekTo(0);
try classified.append(.{
.kind = .archive,
.file = file,
.name = file_name,
});
continue;
}
log.err("unexpected file type: expected object '.o' or archive '.a': {s}", .{file_name});
return error.UnexpectedInputFileType;
log.warn("unexpected input file of unknown type '{s}'", .{file_name});
}
// Based on our classification, proceed with parsing.
for (classified.items) |input| {
switch (input.kind) {
.object => {
var object = Object.init(self.allocator);
object.arch = self.arch.?;
object.name = try self.allocator.dupe(u8, input.name);
object.file = input.file;
try object.parse();
try self.objects.append(self.allocator, object);
},
.archive => {
var archive = Archive.init(self.allocator);
archive.arch = self.arch.?;
archive.name = try self.allocator.dupe(u8, input.name);
archive.file = input.file;
try archive.parse();
try self.archives.append(self.allocator, archive);
},
}
}
}
@ -1153,7 +1199,7 @@ fn writeStubInStubHelper(self: *Zld, index: u32) !void {
try self.file.?.pwriteAll(code, stub_off);
}
fn resolveSymbols(self: *Zld, object_id: u16) !void {
fn resolveSymbolsInObject(self: *Zld, object_id: u16) !void {
const object = self.objects.items[object_id];
log.warn("resolving symbols in '{s}'", .{object.name});
@ -1200,6 +1246,47 @@ fn resolveSymbols(self: *Zld, object_id: u16) !void {
}
}
fn resolveSymbols(self: *Zld) !void {
// First pass, resolve symbols in provided objects.
for (self.objects.items) |object, object_id| {
try self.resolveSymbolsInObject(@intCast(u16, object_id));
}
var next: usize = 0;
while (true) {
var archive = &self.archives.items[next];
var hit: bool = false;
for (self.undefs.items()) |entry| {
const sym_name = entry.key;
// Check if the entry exists in a static archive.
const offsets = archive.toc.get(sym_name) orelse {
// No hit.
continue;
};
assert(offsets.items.len > 0);
const object = try archive.parseObject(offsets.items[0]);
const object_id = @intCast(u16, self.objects.items.len);
try self.objects.append(self.allocator, object);
try self.resolveSymbolsInObject(object_id);
hit = true;
break;
}
if (!hit) {
// Next archive.
next += 1;
if (next == self.archives.items.len) {
break;
}
archive = &self.archives.items[next];
}
}
}
fn doRelocs(self: *Zld) !void {
for (self.objects.items) |object, object_id| {
log.debug("\n\n", .{});