From 177b1b6bf9a7402eb688159dfa94ea5a5ea6f550 Mon Sep 17 00:00:00 2001 From: Tom Maenan Read Cutting Date: Thu, 24 Jun 2021 23:29:39 +0100 Subject: [PATCH] Add fat/universal dylib support to zig ld With this change zig ld can link with dynamic libraries contained within a fat/universal file that had multiple seperate binaries embedded within it for multi-arch support (in macOS). Whilst zig can still only create single-architecture executables - the ability to link with fat libraries is useful for cases where they are the easiest (or only) option to link against. --- lib/std/elf.zig | 28 +++------------- lib/std/macho.zig | 27 ++++++++++++++++ lib/std/mem.zig | 28 ++++++++++++++++ src/link/Elf.zig | 24 +++++++------- src/link/MachO/Dylib.zig | 68 ++++++++++++++++++++++++++++++++++----- src/link/MachO/Object.zig | 14 ++++---- 6 files changed, 137 insertions(+), 52 deletions(-) diff --git a/lib/std/elf.zig b/lib/std/elf.zig index b74ea6e837..6c90dff929 100644 --- a/lib/std/elf.zig +++ b/lib/std/elf.zig @@ -429,7 +429,7 @@ pub fn ProgramHeaderIterator(ParseSource: anytype) type { if (self.elf_header.endian == native_endian) return phdr; // Convert fields to native endianness. - bswapAllFields(Elf64_Phdr, &phdr); + mem.bswapAllFields(Elf64_Phdr, &phdr); return phdr; } @@ -441,7 +441,7 @@ pub fn ProgramHeaderIterator(ParseSource: anytype) type { // ELF endianness does NOT match native endianness. if (self.elf_header.endian != native_endian) { // Convert fields to native endianness. - bswapAllFields(Elf32_Phdr, &phdr); + mem.bswapAllFields(Elf32_Phdr, &phdr); } // Convert 32-bit header to 64-bit. @@ -479,7 +479,7 @@ pub fn SectionHeaderIterator(ParseSource: anytype) type { if (self.elf_header.endian == native_endian) return shdr; // Convert fields to native endianness. - bswapAllFields(Elf64_Shdr, &shdr); + mem.bswapAllFields(Elf64_Shdr, &shdr); return shdr; } @@ -491,7 +491,7 @@ pub fn SectionHeaderIterator(ParseSource: anytype) type { // ELF endianness does NOT match native endianness. if (self.elf_header.endian != native_endian) { // Convert fields to native endianness. - bswapAllFields(Elf32_Shdr, &shdr); + mem.bswapAllFields(Elf32_Shdr, &shdr); } // Convert 32-bit header to 64-bit. @@ -531,26 +531,6 @@ pub fn int32(need_bswap: bool, int_32: anytype, comptime Int64: anytype) Int64 { } } -pub fn bswapAllFields(comptime S: type, ptr: *S) void { - if (@typeInfo(S) != .Struct) @compileError("bswapAllFields expects a struct as the first argument"); - inline for (std.meta.fields(S)) |f| { - @field(ptr, f.name) = @byteSwap(f.field_type, @field(ptr, f.name)); - } -} -test "bswapAllFields" { - var s: Elf32_Chdr = .{ - .ch_type = 0x12341234, - .ch_size = 0x56785678, - .ch_addralign = 0x12124242, - }; - bswapAllFields(Elf32_Chdr, &s); - try std.testing.expectEqual(Elf32_Chdr{ - .ch_type = 0x34123412, - .ch_size = 0x78567856, - .ch_addralign = 0x42421212, - }, s); -} - pub const EI_NIDENT = 16; pub const EI_CLASS = 4; diff --git a/lib/std/macho.zig b/lib/std/macho.zig index 20620b1fbf..cb030e941e 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -24,6 +24,19 @@ pub const mach_header_64 = extern struct { reserved: u32, }; +pub const fat_header = extern struct { + magic: u32, + nfat_arch: u32, +}; + +pub const fat_arch = extern struct { + cputype: cpu_type_t, + cpusubtype: cpu_subtype_t, + offset: u32, + size: u32, + @"align": u32, +}; + pub const load_command = extern struct { cmd: u32, cmdsize: u32, @@ -1040,6 +1053,20 @@ pub const MH_APP_EXTENSION_SAFE = 0x02000000; /// The external symbols listed in the nlist symbol table do not include all the symbols listed in the dyld info. pub const MH_NLIST_OUTOFSYNC_WITH_DYLDINFO = 0x04000000; +// Constants for the flags field of the fat_header + +/// the fat magic number +pub const FAT_MAGIC = 0xcafebabe; + +/// NXSwapLong(FAT_MAGIC) +pub const FAT_CIGAM = 0xbebafeca; + +/// the 64-bit fat magic number +pub const FAT_MAGIC_64 = 0xcafebabf; + +/// NXSwapLong(FAT_MAGIC_64) +pub const FAT_CIGAM_64 = 0xbfbafeca; + /// The flags field of a section structure is separated into two parts a section /// type and section attributes. The section types are mutually exclusive (it /// can only have one type) but the section attributes are not (it may have more diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 2273db34cb..b7b9d92165 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -1539,6 +1539,34 @@ test "writeIntBig and writeIntLittle" { try testing.expect(eql(u8, buf2[0..], &[_]u8{ 0xfc, 0xff })); } +/// Swap the byte order of all the members of the fields of a struct +/// (Changing their endianess) +pub fn bswapAllFields(comptime S: type, ptr: *S) void { + if (@typeInfo(S) != .Struct) @compileError("bswapAllFields expects a struct as the first argument"); + inline for (std.meta.fields(S)) |f| { + @field(ptr, f.name) = @byteSwap(f.field_type, @field(ptr, f.name)); + } +} + +test "bswapAllFields" { + const T = extern struct { + f0: u8, + f1: u16, + f2: u32, + }; + var s = T{ + .f0 = 0x12, + .f1 = 0x1234, + .f2 = 0x12345678, + }; + bswapAllFields(T, &s); + try std.testing.expectEqual(T{ + .f0 = 0x12, + .f1 = 0x3412, + .f2 = 0x78563412, + }, s); +} + /// Returns an iterator that iterates over the slices of `buffer` that are not /// any of the bytes in `delimiter_bytes`. /// tokenize(" abc def ghi ", " ") diff --git a/src/link/Elf.zig b/src/link/Elf.zig index e2e48b9871..6f839c7694 100644 --- a/src/link/Elf.zig +++ b/src/link/Elf.zig @@ -1108,7 +1108,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation) !void { for (buf) |*phdr, i| { phdr.* = progHeaderTo32(self.program_headers.items[i]); if (foreign_endian) { - std.elf.bswapAllFields(elf.Elf32_Phdr, phdr); + mem.bswapAllFields(elf.Elf32_Phdr, phdr); } } try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), self.phdr_table_offset.?); @@ -1120,7 +1120,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation) !void { for (buf) |*phdr, i| { phdr.* = self.program_headers.items[i]; if (foreign_endian) { - std.elf.bswapAllFields(elf.Elf64_Phdr, phdr); + mem.bswapAllFields(elf.Elf64_Phdr, phdr); } } try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), self.phdr_table_offset.?); @@ -1197,7 +1197,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation) !void { shdr.* = sectHeaderTo32(self.sections.items[i]); log.debug("writing section {}", .{shdr.*}); if (foreign_endian) { - std.elf.bswapAllFields(elf.Elf32_Shdr, shdr); + mem.bswapAllFields(elf.Elf32_Shdr, shdr); } } try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), self.shdr_table_offset.?); @@ -1210,7 +1210,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation) !void { shdr.* = self.sections.items[i]; log.debug("writing section {}", .{shdr.*}); if (foreign_endian) { - std.elf.bswapAllFields(elf.Elf64_Shdr, shdr); + mem.bswapAllFields(elf.Elf64_Shdr, shdr); } } try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), self.shdr_table_offset.?); @@ -2740,14 +2740,14 @@ fn writeProgHeader(self: *Elf, index: usize) !void { .p32 => { var phdr = [1]elf.Elf32_Phdr{progHeaderTo32(self.program_headers.items[index])}; if (foreign_endian) { - std.elf.bswapAllFields(elf.Elf32_Phdr, &phdr[0]); + mem.bswapAllFields(elf.Elf32_Phdr, &phdr[0]); } return self.base.file.?.pwriteAll(mem.sliceAsBytes(&phdr), offset); }, .p64 => { var phdr = [1]elf.Elf64_Phdr{self.program_headers.items[index]}; if (foreign_endian) { - std.elf.bswapAllFields(elf.Elf64_Phdr, &phdr[0]); + mem.bswapAllFields(elf.Elf64_Phdr, &phdr[0]); } return self.base.file.?.pwriteAll(mem.sliceAsBytes(&phdr), offset); }, @@ -2761,7 +2761,7 @@ fn writeSectHeader(self: *Elf, index: usize) !void { var shdr: [1]elf.Elf32_Shdr = undefined; shdr[0] = sectHeaderTo32(self.sections.items[index]); if (foreign_endian) { - std.elf.bswapAllFields(elf.Elf32_Shdr, &shdr[0]); + mem.bswapAllFields(elf.Elf32_Shdr, &shdr[0]); } const offset = self.shdr_table_offset.? + index * @sizeOf(elf.Elf32_Shdr); return self.base.file.?.pwriteAll(mem.sliceAsBytes(&shdr), offset); @@ -2769,7 +2769,7 @@ fn writeSectHeader(self: *Elf, index: usize) !void { .p64 => { var shdr = [1]elf.Elf64_Shdr{self.sections.items[index]}; if (foreign_endian) { - std.elf.bswapAllFields(elf.Elf64_Shdr, &shdr[0]); + mem.bswapAllFields(elf.Elf64_Shdr, &shdr[0]); } const offset = self.shdr_table_offset.? + index * @sizeOf(elf.Elf64_Shdr); return self.base.file.?.pwriteAll(mem.sliceAsBytes(&shdr), offset); @@ -2867,7 +2867,7 @@ fn writeSymbol(self: *Elf, index: usize) !void { }, }; if (foreign_endian) { - std.elf.bswapAllFields(elf.Elf32_Sym, &sym[0]); + mem.bswapAllFields(elf.Elf32_Sym, &sym[0]); } const off = syms_sect.sh_offset + @sizeOf(elf.Elf32_Sym) * index; try self.base.file.?.pwriteAll(mem.sliceAsBytes(sym[0..1]), off); @@ -2875,7 +2875,7 @@ fn writeSymbol(self: *Elf, index: usize) !void { .p64 => { var sym = [1]elf.Elf64_Sym{self.local_symbols.items[index]}; if (foreign_endian) { - std.elf.bswapAllFields(elf.Elf64_Sym, &sym[0]); + mem.bswapAllFields(elf.Elf64_Sym, &sym[0]); } const off = syms_sect.sh_offset + @sizeOf(elf.Elf64_Sym) * index; try self.base.file.?.pwriteAll(mem.sliceAsBytes(sym[0..1]), off); @@ -2906,7 +2906,7 @@ fn writeAllGlobalSymbols(self: *Elf) !void { .st_shndx = self.global_symbols.items[i].st_shndx, }; if (foreign_endian) { - std.elf.bswapAllFields(elf.Elf32_Sym, sym); + mem.bswapAllFields(elf.Elf32_Sym, sym); } } try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), global_syms_off); @@ -2925,7 +2925,7 @@ fn writeAllGlobalSymbols(self: *Elf) !void { .st_shndx = self.global_symbols.items[i].st_shndx, }; if (foreign_endian) { - std.elf.bswapAllFields(elf.Elf64_Sym, sym); + mem.bswapAllFields(elf.Elf64_Sym, sym); } } try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), global_syms_off); diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index e91ff30ad2..2ecd2a20ed 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -1,6 +1,7 @@ const Dylib = @This(); const std = @import("std"); +const builtin = std.builtin; const assert = std.debug.assert; const fs = std.fs; const fmt = std.fmt; @@ -8,6 +9,7 @@ const log = std.log.scoped(.dylib); const macho = std.macho; const math = std.math; const mem = std.mem; +const native_endian = builtin.target.cpu.arch.endian(); const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; @@ -26,6 +28,10 @@ syslibroot: ?[]const u8 = null, ordinal: ?u16 = null, +// The actual dylib contents we care about linking with will be embedded at +// an offset within a file if we are linking against a fat lib +library_offset: u64 = 0, + load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, symtab_cmd_index: ?u16 = null, @@ -205,9 +211,45 @@ pub fn closeFile(self: Dylib) void { } } +fn decodeArch(cputype: macho.cpu_type_t) !std.Target.Cpu.Arch { + const arch: Arch = switch (cputype) { + macho.CPU_TYPE_ARM64 => .aarch64, + macho.CPU_TYPE_X86_64 => .x86_64, + else => { + return error.UnsupportedCpuArchitecture; + }, + }; + return arch; +} + pub fn parse(self: *Dylib) !void { log.debug("parsing shared library '{s}'", .{self.name.?}); + self.library_offset = offset: { + const fat_header = try readFatStruct(self.file.?.reader(), macho.fat_header); + if (fat_header.magic != macho.FAT_MAGIC) break :offset 0; + + var fat_arch_index: u32 = 0; + while (fat_arch_index < fat_header.nfat_arch) : (fat_arch_index += 1) { + const fat_arch = try readFatStruct(self.file.?.reader(), macho.fat_arch); + // If we come across an architecture that we do not know how to handle, that's + // fine because we can keep looking for one that might match. + const lib_arch = decodeArch(fat_arch.cputype) catch |err| switch (err) { + error.UnsupportedCpuArchitecture => continue, + else => |e| return e, + }; + if (lib_arch == self.arch.?) { + // We have found a matching architecture! + break :offset fat_arch.offset; + } + } else { + log.err("Could not find matching cpu architecture in fat library: expected {s}", .{self.arch.?}); + return error.MismatchedCpuArchitecture; + } + }; + + try self.file.?.seekTo(self.library_offset); + var reader = self.file.?.reader(); self.header = try reader.readStruct(macho.mach_header_64); @@ -216,14 +258,14 @@ pub fn parse(self: *Dylib) !void { return error.NotDylib; } - const this_arch: std.Target.Cpu.Arch = switch (self.header.?.cputype) { - macho.CPU_TYPE_ARM64 => .aarch64, - macho.CPU_TYPE_X86_64 => .x86_64, - else => |value| { - log.err("unsupported cpu architecture 0x{x}", .{value}); - return error.UnsupportedCpuArchitecture; + const this_arch: Arch = decodeArch(self.header.?.cputype) catch |err| switch (err) { + error.UnsupportedCpuArchitecture => |e| { + log.err("unsupported cpu architecture 0x{x}", .{self.header.?.cputype}); + return e; }, + else => |e| return e, }; + if (this_arch != self.arch.?) { log.err("mismatched cpu architecture: expected {s}, found {s}", .{ self.arch.?, this_arch }); return error.MismatchedCpuArchitecture; @@ -234,6 +276,16 @@ pub fn parse(self: *Dylib) !void { try self.parseSymbols(); } +fn readFatStruct(reader: anytype, comptime T: type) !T { + // Fat structures (fat_header & fat_arch) are always written and read to/from + // disk in big endian order. + var res: T = try reader.readStruct(T); + if (native_endian != builtin.Endian.Big) { + mem.bswapAllFields(T, &res); + } + return res; +} + fn readLoadCommands(self: *Dylib, reader: anytype) !void { try self.load_commands.ensureCapacity(self.allocator, self.header.?.ncmds); @@ -285,12 +337,12 @@ fn parseSymbols(self: *Dylib) !void { var symtab = try self.allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms); defer self.allocator.free(symtab); - _ = try self.file.?.preadAll(symtab, symtab_cmd.symoff); + _ = try self.file.?.preadAll(symtab, symtab_cmd.symoff + self.library_offset); const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, symtab)); var strtab = try self.allocator.alloc(u8, symtab_cmd.strsize); defer self.allocator.free(strtab); - _ = try self.file.?.preadAll(strtab, symtab_cmd.stroff); + _ = try self.file.?.preadAll(strtab, symtab_cmd.stroff + self.library_offset); for (slice) |sym| { const sym_name = mem.spanZ(@ptrCast([*:0]const u8, strtab.ptr + sym.n_strx)); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 747adaab87..671199670f 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -247,18 +247,14 @@ pub fn parse(self: *Object) !void { try reader.context.seekTo(offset); } - self.header = try reader.readStruct(macho.mach_header_64); - - if (self.header.?.filetype != macho.MH_OBJECT) { - log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ - macho.MH_OBJECT, - self.header.?.filetype, - }); + const header = try reader.readStruct(macho.mach_header_64); + if (header.filetype != macho.MH_OBJECT) { + log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_OBJECT, header.filetype }); return error.NotObject; } - const this_arch: Arch = switch (self.header.?.cputype) { + const this_arch: Arch = switch (header.cputype) { macho.CPU_TYPE_ARM64 => .aarch64, macho.CPU_TYPE_X86_64 => .x86_64, else => |value| { @@ -271,6 +267,8 @@ pub fn parse(self: *Object) !void { return error.MismatchedCpuArchitecture; } + self.header = header; + try self.readLoadCommands(reader); try self.parseSymbols(); try self.parseSections();