macho: start upstreaming zld

This commit is contained in:
Jakub Konka 2021-02-23 23:45:36 +01:00
parent 0f7b036eb7
commit f52f23618d
9 changed files with 2950 additions and 0 deletions

View File

@ -447,6 +447,7 @@ pub const InitOptions = struct {
want_lto: ?bool = null,
use_llvm: ?bool = null,
use_lld: ?bool = null,
use_zld: ?bool = null,
use_clang: ?bool = null,
rdynamic: bool = false,
strip: bool = false,
@ -1020,6 +1021,7 @@ pub fn create(gpa: *Allocator, options: InitOptions) !*Compilation {
.link_mode = link_mode,
.object_format = ofmt,
.optimize_mode = options.optimize_mode,
.use_zld = options.use_zld orelse false,
.use_lld = use_lld,
.use_llvm = use_llvm,
.system_linker_hack = darwin_options.system_linker_hack,

View File

@ -61,6 +61,8 @@ pub const Options = struct {
/// Darwin-only. If this is true, `use_llvm` is true, and `is_native_os` is true, this link code will
/// use system linker `ld` instead of the LLD.
system_linker_hack: bool,
/// Experimental Zig linker.
use_zld: bool,
link_libc: bool,
link_libcpp: bool,
function_sections: bool,

View File

@ -26,6 +26,7 @@ const target_util = @import("../target.zig");
const DebugSymbols = @import("MachO/DebugSymbols.zig");
const Trie = @import("MachO/Trie.zig");
const CodeSignature = @import("MachO/CodeSignature.zig");
const Zld = @import("MachO/Zld.zig");
usingnamespace @import("MachO/commands.zig");
@ -637,6 +638,31 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void {
var argv = std.ArrayList([]const u8).init(self.base.allocator);
defer argv.deinit();
if (true) {
// if (self.base.options.use_zld) {
var zld = Zld.init(self.base.allocator);
defer zld.deinit();
zld.arch = target.cpu.arch;
var input_files = std.ArrayList([]const u8).init(self.base.allocator);
defer input_files.deinit();
// Positional arguments to the linker such as object files.
try input_files.appendSlice(self.base.options.objects);
for (comp.c_object_table.items()) |entry| {
try input_files.append(entry.key.status.success.object_path);
}
if (module_obj_path) |p| {
try input_files.append(p);
}
try input_files.append(comp.compiler_rt_static_lib.?.full_object_path);
// libc++ dep
if (self.base.options.link_libcpp) {
try input_files.append(comp.libcxxabi_static_lib.?.full_object_path);
try input_files.append(comp.libcxx_static_lib.?.full_object_path);
}
return zld.link(input_files.items, full_out_path);
}
// TODO https://github.com/ziglang/zig/issues/6971
// Note that there is no need to check if running natively since we do that already
// when setting `system_linker_hack` in Compilation struct.

247
src/link/MachO/Archive.zig Normal file
View File

@ -0,0 +1,247 @@
const Archive = @This();
const std = @import("std");
const assert = std.debug.assert;
const fs = std.fs;
const log = std.log.scoped(.archive);
const macho = std.macho;
const mem = std.mem;
const Allocator = mem.Allocator;
const Object = @import("Object.zig");
const parseName = @import("Zld.zig").parseName;
usingnamespace @import("commands.zig");
allocator: *Allocator,
file: fs.File,
header: ar_hdr,
name: []u8,
objects: std.ArrayListUnmanaged(Object) = .{},
// Archive files start with the ARMAG identifying string. Then follows a
// `struct ar_hdr', and as many bytes of member file data as its `ar_size'
// member indicates, for each member file.
/// String that begins an archive file.
const ARMAG: *const [SARMAG:0]u8 = "!<arch>\n";
/// Size of that string.
const SARMAG: u4 = 8;
/// String in ar_fmag at the end of each header.
const ARFMAG: *const [2:0]u8 = "`\n";
const ar_hdr = extern struct {
/// Member file name, sometimes / terminated.
ar_name: [16]u8,
/// File date, decimal seconds since Epoch.
ar_date: [12]u8,
/// User ID, in ASCII format.
ar_uid: [6]u8,
/// Group ID, in ASCII format.
ar_gid: [6]u8,
/// File mode, in ASCII octal.
ar_mode: [8]u8,
/// File size, in ASCII decimal.
ar_size: [10]u8,
/// Always contains ARFMAG.
ar_fmag: [2]u8,
const NameOrLength = union(enum) {
Name: []const u8,
Length: u64,
};
pub fn nameOrLength(self: ar_hdr) !NameOrLength {
const value = getValue(&self.ar_name);
const slash_index = mem.indexOf(u8, value, "/") orelse return error.MalformedArchive;
const len = value.len;
if (slash_index == len - 1) {
// Name stored directly
return NameOrLength{ .Name = value };
} else {
// Name follows the header directly and its length is encoded in
// the name field.
const length = try std.fmt.parseInt(u64, value[slash_index + 1 ..], 10);
return NameOrLength{ .Length = length };
}
}
pub fn size(self: ar_hdr) !u64 {
const value = getValue(&self.ar_size);
return std.fmt.parseInt(u64, value, 10);
}
fn getValue(raw: []const u8) []const u8 {
return mem.trimRight(u8, raw, &[_]u8{@as(u8, 0x20)});
}
};
pub fn deinit(self: *Archive) void {
self.allocator.free(self.name);
for (self.objects.items) |*object| {
object.deinit();
}
self.objects.deinit(self.allocator);
self.file.close();
}
/// Caller owns the returned Archive instance and is responsible for calling
/// `deinit` to free allocated memory.
pub fn initFromFile(allocator: *Allocator, arch: std.Target.Cpu.Arch, ar_name: []const u8, file: fs.File) !Archive {
var reader = file.reader();
var magic = try readMagic(allocator, reader);
defer allocator.free(magic);
if (!mem.eql(u8, magic, ARMAG)) {
// Reset file cursor.
try file.seekTo(0);
return error.NotArchive;
}
const header = try reader.readStruct(ar_hdr);
if (!mem.eql(u8, &header.ar_fmag, ARFMAG))
return error.MalformedArchive;
var embedded_name = try getName(allocator, header, reader);
log.debug("parsing archive '{s}' at '{s}'", .{ embedded_name, ar_name });
defer allocator.free(embedded_name);
var name = try allocator.dupe(u8, ar_name);
var self = Archive{
.allocator = allocator,
.file = file,
.header = header,
.name = name,
};
var object_offsets = try self.readTableOfContents(reader);
defer self.allocator.free(object_offsets);
var i: usize = 1;
while (i < object_offsets.len) : (i += 1) {
const offset = object_offsets[i];
try reader.context.seekTo(offset);
try self.readObject(arch, ar_name, reader);
}
return self;
}
fn readTableOfContents(self: *Archive, reader: anytype) ![]u32 {
const symtab_size = try reader.readIntLittle(u32);
var symtab = try self.allocator.alloc(u8, symtab_size);
defer self.allocator.free(symtab);
try reader.readNoEof(symtab);
const strtab_size = try reader.readIntLittle(u32);
var strtab = try self.allocator.alloc(u8, strtab_size);
defer self.allocator.free(strtab);
try reader.readNoEof(strtab);
var symtab_stream = std.io.fixedBufferStream(symtab);
var symtab_reader = symtab_stream.reader();
var object_offsets = std.ArrayList(u32).init(self.allocator);
try object_offsets.append(0);
var last: usize = 0;
while (true) {
const n_strx = symtab_reader.readIntLittle(u32) catch |err| switch (err) {
error.EndOfStream => break,
else => |e| return e,
};
const object_offset = try symtab_reader.readIntLittle(u32);
// TODO Store the table of contents for later reuse.
// Here, we assume that symbols are NOT sorted in any way, and
// they point to objects in sequence.
if (object_offsets.items[last] != object_offset) {
try object_offsets.append(object_offset);
last += 1;
}
}
return object_offsets.toOwnedSlice();
}
fn readObject(self: *Archive, arch: std.Target.Cpu.Arch, ar_name: []const u8, reader: anytype) !void {
const object_header = try reader.readStruct(ar_hdr);
if (!mem.eql(u8, &object_header.ar_fmag, ARFMAG))
return error.MalformedArchive;
var object_name = try getName(self.allocator, object_header, reader);
log.debug("extracting object '{s}' from archive '{s}'", .{ object_name, self.name });
const offset = @intCast(u32, try reader.context.getPos());
const header = try reader.readStruct(macho.mach_header_64);
const this_arch: std.Target.Cpu.Arch = switch (header.cputype) {
macho.CPU_TYPE_ARM64 => .aarch64,
macho.CPU_TYPE_X86_64 => .x86_64,
else => |value| {
log.err("unsupported cpu architecture 0x{x}", .{value});
return error.UnsupportedCpuArchitecture;
},
};
if (this_arch != arch) {
log.err("mismatched cpu architecture: found {s}, expected {s}", .{ this_arch, arch });
return error.MismatchedCpuArchitecture;
}
// TODO Implement std.fs.File.clone() or similar.
var new_file = try fs.cwd().openFile(ar_name, .{});
var object = Object{
.allocator = self.allocator,
.name = object_name,
.file = new_file,
.header = header,
};
try object.readLoadCommands(reader, .{ .offset = offset });
try object.readSymtab();
try object.readStrtab();
log.debug("\n\n", .{});
log.debug("{s} defines symbols", .{object.name});
for (object.symtab.items) |sym| {
const symname = object.getString(sym.n_strx);
log.debug("'{s}': {}", .{ symname, sym });
}
try self.objects.append(self.allocator, object);
}
fn readMagic(allocator: *Allocator, reader: anytype) ![]u8 {
var magic = std.ArrayList(u8).init(allocator);
try magic.ensureCapacity(SARMAG);
var i: usize = 0;
while (i < SARMAG) : (i += 1) {
const next = try reader.readByte();
magic.appendAssumeCapacity(next);
}
return magic.toOwnedSlice();
}
fn getName(allocator: *Allocator, header: ar_hdr, reader: anytype) ![]u8 {
const name_or_length = try header.nameOrLength();
var name: []u8 = undefined;
switch (name_or_length) {
.Name => |n| {
name = try allocator.dupe(u8, n);
},
.Length => |len| {
name = try allocator.alloc(u8, len);
try reader.readNoEof(name);
},
}
return name;
}

204
src/link/MachO/Object.zig Normal file
View File

@ -0,0 +1,204 @@
const Object = @This();
const std = @import("std");
const assert = std.debug.assert;
const fs = std.fs;
const log = std.log.scoped(.object);
const macho = std.macho;
const mem = std.mem;
const Allocator = mem.Allocator;
const parseName = @import("Zld.zig").parseName;
usingnamespace @import("commands.zig");
allocator: *Allocator,
file: fs.File,
name: []u8,
header: macho.mach_header_64,
load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
segment_cmd_index: ?u16 = null,
symtab_cmd_index: ?u16 = null,
dysymtab_cmd_index: ?u16 = null,
build_version_cmd_index: ?u16 = null,
text_section_index: ?u16 = null,
dwarf_debug_info_index: ?u16 = null,
dwarf_debug_abbrev_index: ?u16 = null,
dwarf_debug_str_index: ?u16 = null,
dwarf_debug_line_index: ?u16 = null,
dwarf_debug_ranges_index: ?u16 = null,
symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{},
strtab: std.ArrayListUnmanaged(u8) = .{},
directory: std.AutoHashMapUnmanaged(DirectoryKey, u16) = .{},
pub const DirectoryKey = struct {
segname: [16]u8,
sectname: [16]u8,
};
pub fn deinit(self: *Object) void {
for (self.load_commands.items) |*lc| {
lc.deinit(self.allocator);
}
self.load_commands.deinit(self.allocator);
self.symtab.deinit(self.allocator);
self.strtab.deinit(self.allocator);
self.directory.deinit(self.allocator);
self.allocator.free(self.name);
self.file.close();
}
/// Caller owns the returned Object instance and is responsible for calling
/// `deinit` to free allocated memory.
pub fn initFromFile(allocator: *Allocator, arch: std.Target.Cpu.Arch, name: []const u8, file: fs.File) !Object {
var reader = file.reader();
const header = try reader.readStruct(macho.mach_header_64);
if (header.filetype != macho.MH_OBJECT) {
// Reset file cursor.
try file.seekTo(0);
return error.NotObject;
}
const this_arch: std.Target.Cpu.Arch = switch (header.cputype) {
macho.CPU_TYPE_ARM64 => .aarch64,
macho.CPU_TYPE_X86_64 => .x86_64,
else => |value| {
log.err("unsupported cpu architecture 0x{x}", .{value});
return error.UnsupportedCpuArchitecture;
},
};
if (this_arch != arch) {
log.err("mismatched cpu architecture: found {s}, expected {s}", .{ this_arch, arch });
return error.MismatchedCpuArchitecture;
}
var self = Object{
.allocator = allocator,
.name = try allocator.dupe(u8, name),
.file = file,
.header = header,
};
try self.readLoadCommands(reader, .{});
try self.readSymtab();
try self.readStrtab();
log.debug("\n\n", .{});
log.debug("{s} defines symbols", .{self.name});
for (self.symtab.items) |sym| {
const symname = self.getString(sym.n_strx);
log.debug("'{s}': {}", .{ symname, sym });
}
return self;
}
pub const ReadOffset = struct {
offset: ?u32 = null,
};
pub fn readLoadCommands(self: *Object, reader: anytype, offset: ReadOffset) !void {
const offset_mod = offset.offset orelse 0;
try self.load_commands.ensureCapacity(self.allocator, self.header.ncmds);
var i: u16 = 0;
while (i < self.header.ncmds) : (i += 1) {
var cmd = try LoadCommand.read(self.allocator, reader);
switch (cmd.cmd()) {
macho.LC_SEGMENT_64 => {
self.segment_cmd_index = i;
var seg = cmd.Segment;
for (seg.sections.items) |*sect, j| {
const index = @intCast(u16, j);
const segname = parseName(&sect.segname);
const sectname = parseName(&sect.sectname);
if (mem.eql(u8, segname, "__DWARF")) {
if (mem.eql(u8, sectname, "__debug_info")) {
self.dwarf_debug_info_index = index;
} else if (mem.eql(u8, sectname, "__debug_abbrev")) {
self.dwarf_debug_abbrev_index = index;
} else if (mem.eql(u8, sectname, "__debug_str")) {
self.dwarf_debug_str_index = index;
} else if (mem.eql(u8, sectname, "__debug_line")) {
self.dwarf_debug_line_index = index;
} else if (mem.eql(u8, sectname, "__debug_ranges")) {
self.dwarf_debug_ranges_index = index;
}
} else if (mem.eql(u8, segname, "__TEXT")) {
if (mem.eql(u8, sectname, "__text")) {
self.text_section_index = index;
}
}
try self.directory.putNoClobber(self.allocator, .{
.segname = sect.segname,
.sectname = sect.sectname,
}, index);
sect.offset += offset_mod;
if (sect.reloff > 0)
sect.reloff += offset_mod;
}
seg.inner.fileoff += offset_mod;
},
macho.LC_SYMTAB => {
self.symtab_cmd_index = i;
cmd.Symtab.symoff += offset_mod;
cmd.Symtab.stroff += offset_mod;
},
macho.LC_DYSYMTAB => {
self.dysymtab_cmd_index = i;
},
macho.LC_BUILD_VERSION => {
self.build_version_cmd_index = i;
},
else => {
log.info("Unknown load command detected: 0x{x}.", .{cmd.cmd()});
},
}
self.load_commands.appendAssumeCapacity(cmd);
}
}
pub fn readSymtab(self: *Object) !void {
const symtab_cmd = self.load_commands.items[self.symtab_cmd_index.?].Symtab;
var buffer = try self.allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms);
defer self.allocator.free(buffer);
_ = try self.file.preadAll(buffer, symtab_cmd.symoff);
try self.symtab.ensureCapacity(self.allocator, symtab_cmd.nsyms);
// TODO this align case should not be needed.
// Probably a bug in stage1.
const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, buffer));
self.symtab.appendSliceAssumeCapacity(slice);
}
pub fn readStrtab(self: *Object) !void {
const symtab_cmd = self.load_commands.items[self.symtab_cmd_index.?].Symtab;
var buffer = try self.allocator.alloc(u8, symtab_cmd.strsize);
defer self.allocator.free(buffer);
_ = try self.file.preadAll(buffer, symtab_cmd.stroff);
try self.strtab.ensureCapacity(self.allocator, symtab_cmd.strsize);
self.strtab.appendSliceAssumeCapacity(buffer);
}
pub fn getString(self: *const Object, str_off: u32) []const u8 {
assert(str_off < self.strtab.items.len);
return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + str_off));
}
pub fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 {
const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
const sect = seg.sections.items[index];
var buffer = try allocator.alloc(u8, sect.size);
_ = try self.file.preadAll(buffer, sect.offset);
return buffer;
}

2301
src/link/MachO/Zld.zig Normal file

File diff suppressed because it is too large Load Diff

View File

@ -166,6 +166,11 @@ pub const SegmentCommand = struct {
return .{ .inner = inner };
}
// TODO remove me, I'm just a temp!
pub fn append(self: *SegmentCommand, alloc: *Allocator, section: macho.section_64) !void {
return self.addSection(alloc, section);
}
pub fn addSection(self: *SegmentCommand, alloc: *Allocator, section: macho.section_64) !void {
try self.sections.append(alloc, section);
self.inner.cmdsize += @sizeOf(macho.section_64);

159
src/link/MachO/reloc.zig Normal file
View File

@ -0,0 +1,159 @@
const std = @import("std");
const log = std.log.scoped(.reloc);
pub const Arm64 = union(enum) {
Branch: packed struct {
disp: u26,
fixed: u5 = 0b00101,
link: u1,
},
BranchRegister: packed struct {
_1: u5 = 0b0000_0,
reg: u5,
_2: u11 = 0b1111_1000_000,
link: u1,
_3: u10 = 0b1101_0110_00,
},
Address: packed struct {
reg: u5,
immhi: u19,
_1: u5 = 0b10000,
immlo: u2,
page: u1,
},
LoadRegister: packed struct {
rt: u5,
rn: u5,
offset: u12,
_1: u8 = 0b111_0_01_01,
size: u1,
_2: u1 = 0b1,
},
LoadLiteral: packed struct {
reg: u5,
literal: u19,
_1: u6 = 0b011_0_00,
size: u1,
_2: u1 = 0b0,
},
Add: packed struct {
rt: u5,
rn: u5,
offset: u12,
_1: u9 = 0b0_0_100010_0,
size: u1,
},
pub fn toU32(self: Arm64) u32 {
const as_u32 = switch (self) {
.Branch => |x| @bitCast(u32, x),
.BranchRegister => |x| @bitCast(u32, x),
.Address => |x| @bitCast(u32, x),
.LoadRegister => |x| @bitCast(u32, x),
.LoadLiteral => |x| @bitCast(u32, x),
.Add => |x| @bitCast(u32, x),
};
return as_u32;
}
pub fn b(disp: i28) Arm64 {
return Arm64{
.Branch = .{
.disp = @truncate(u26, @bitCast(u28, disp) >> 2),
.link = 0,
},
};
}
pub fn bl(disp: i28) Arm64 {
return Arm64{
.Branch = .{
.disp = @truncate(u26, @bitCast(u28, disp) >> 2),
.link = 1,
},
};
}
pub fn br(reg: u5) Arm64 {
return Arm64{
.BranchRegister = .{
.reg = reg,
.link = 0,
},
};
}
pub fn blr(reg: u5) Arm64 {
return Arm64{
.BranchRegister = .{
.reg = reg,
.link = 1,
},
};
}
pub fn adr(reg: u5, disp: u21) Arm64 {
return Arm64{
.Address = .{
.reg = reg,
.immhi = @truncate(u19, disp >> 2),
.immlo = @truncate(u2, disp),
.page = 0,
},
};
}
pub fn adrp(reg: u5, disp: u21) Arm64 {
return Arm64{
.Address = .{
.reg = reg,
.immhi = @truncate(u19, disp >> 2),
.immlo = @truncate(u2, disp),
.page = 1,
},
};
}
pub fn ldr(reg: u5, literal: u19, size: u1) Arm64 {
return Arm64{
.LoadLiteral = .{
.reg = reg,
.literal = literal,
.size = size,
},
};
}
pub fn add(rt: u5, rn: u5, offset: u12, size: u1) Arm64 {
return Arm64{
.Add = .{
.rt = rt,
.rn = rn,
.offset = offset,
.size = size,
},
};
}
pub fn ldrr(rt: u5, rn: u5, offset: u12, size: u1) Arm64 {
return Arm64{
.LoadRegister = .{
.rt = rt,
.rn = rn,
.offset = offset,
.size = size,
},
};
}
pub fn isArithmetic(inst: *const [4]u8) bool {
const group_decode = @truncate(u5, inst[3]);
log.debug("{b}", .{group_decode});
return ((group_decode >> 2) == 4);
// if ((group_decode >> 2) == 4) {
// log.debug("Arithmetic imm", .{});
// } else if (((group_decode & 0b01010) >> 3) == 1) {
// log.debug("Load/store", .{});
// }
}
};

View File

@ -547,6 +547,7 @@ fn buildOutputType(
var image_base_override: ?u64 = null;
var use_llvm: ?bool = null;
var use_lld: ?bool = null;
var use_zld: ?bool = null;
var use_clang: ?bool = null;
var link_eh_frame_hdr = false;
var link_emit_relocs = false;
@ -906,6 +907,8 @@ fn buildOutputType(
use_lld = true;
} else if (mem.eql(u8, arg, "-fno-LLD")) {
use_lld = false;
} else if (mem.eql(u8, arg, "-fZLD")) {
use_zld = true;
} else if (mem.eql(u8, arg, "-fClang")) {
use_clang = true;
} else if (mem.eql(u8, arg, "-fno-Clang")) {
@ -1864,6 +1867,7 @@ fn buildOutputType(
.want_compiler_rt = want_compiler_rt,
.use_llvm = use_llvm,
.use_lld = use_lld,
.use_zld = use_zld,
.use_clang = use_clang,
.rdynamic = rdynamic,
.linker_script = linker_script,