Merge pull request #7746 from kubkon/macho-extern-fn

macho: extern functions come to MachO!
This commit is contained in:
Andrew Kelley 2021-01-13 15:11:35 -08:00 committed by GitHub
commit ec1541de26
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 975 additions and 409 deletions

View File

@ -1859,8 +1859,47 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
},
else => unreachable, // unsupported architecture on MachO
}
} else if (func_value.castTag(.extern_fn)) |_| {
return self.fail(inst.base.src, "TODO implement calling extern functions", .{});
} else if (func_value.castTag(.extern_fn)) |func_payload| {
const decl = func_payload.data;
// We don't free the decl_name immediately unless it already exists.
// If it doesn't, it will get autofreed when we clean up the extern symbol table.
const decl_name = try std.fmt.allocPrint(self.bin_file.allocator, "_{s}", .{decl.name});
const already_defined = macho_file.extern_lazy_symbols.contains(decl_name);
const symbol: u32 = if (macho_file.extern_lazy_symbols.getIndex(decl_name)) |index| blk: {
self.bin_file.allocator.free(decl_name);
break :blk @intCast(u32, index);
} else blk: {
const index = @intCast(u32, macho_file.extern_lazy_symbols.items().len);
try macho_file.extern_lazy_symbols.putNoClobber(self.bin_file.allocator, decl_name, .{
.name = decl_name,
.dylib_ordinal = 1, // TODO this is now hardcoded, since we only support libSystem.
});
break :blk index;
};
const start = self.code.items.len;
const len: usize = blk: {
switch (arch) {
.x86_64 => {
// callq
try self.code.ensureCapacity(self.code.items.len + 5);
self.code.appendSliceAssumeCapacity(&[5]u8{ 0xe8, 0x0, 0x0, 0x0, 0x0 });
break :blk 5;
},
.aarch64 => {
// bl
writeInt(u32, try self.code.addManyAsArray(4), 0);
break :blk 4;
},
else => unreachable, // unsupported architecture on MachO
}
};
try macho_file.stub_fixups.append(self.bin_file.allocator, .{
.symbol = symbol,
.already_defined = already_defined,
.start = start,
.len = len,
});
// We mark the space and fix it up later.
} else {
return self.fail(inst.base.src, "TODO implement calling bitcasted functions", .{});
}

File diff suppressed because it is too large Load Diff

View File

@ -39,6 +39,8 @@ load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
pagezero_segment_cmd_index: ?u16 = null,
/// __TEXT segment
text_segment_cmd_index: ?u16 = null,
/// __DATA_CONST segment
data_const_segment_cmd_index: ?u16 = null,
/// __DATA segment
data_segment_cmd_index: ?u16 = null,
/// __LINKEDIT segment
@ -171,6 +173,15 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void
self.header_dirty = true;
self.load_commands_dirty = true;
}
if (self.data_const_segment_cmd_index == null) outer: {
if (self.base.data_const_segment_cmd_index == null) break :outer; // __DATA_CONST is optional
self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len);
const base_cmd = self.base.load_commands.items[self.base.data_const_segment_cmd_index.?].Segment;
const cmd = try self.copySegmentCommand(allocator, base_cmd);
try self.load_commands.append(allocator, .{ .Segment = cmd });
self.header_dirty = true;
self.load_commands_dirty = true;
}
if (self.data_segment_cmd_index == null) outer: {
if (self.base.data_segment_cmd_index == null) break :outer; // __DATA is optional
self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len);

View File

@ -6,323 +6,156 @@ const mem = std.mem;
const assert = std.debug.assert;
const Allocator = mem.Allocator;
/// Table of binding info entries used to tell the dyld which
/// symbols to bind at loading time.
pub const BindingInfoTable = struct {
pub const ExternSymbol = struct {
/// Symbol name.
/// We own the memory, therefore we'll need to free it by calling `deinit`.
/// In self-hosted, we don't expect it to be null ever.
/// However, this is for backwards compatibility with LLD when
/// we'll be patching things up post mortem.
name: ?[]u8 = null,
/// Id of the dynamic library where the specified entries can be found.
/// Id of 0 means self.
/// TODO this should really be an id into the table of all defined
/// dylibs.
dylib_ordinal: i64 = 0,
/// Binding type; defaults to pointer type.
binding_type: u8 = macho.BIND_TYPE_POINTER,
/// Id of the segment where this symbol is defined (will have its address
/// resolved).
segment: u16 = 0,
symbols: std.ArrayListUnmanaged(Symbol) = .{},
/// Offset relative to the start address of the `segment`.
offset: u32 = 0,
pub const Symbol = struct {
/// Symbol name.
name: ?[]u8 = null,
/// Id of the segment where to bind this symbol to.
segment: u8,
/// Offset of this symbol wrt to the segment id encoded in `segment`.
offset: i64,
/// Addend value (if any).
addend: ?i64 = null,
};
pub fn deinit(self: *BindingInfoTable, allocator: *Allocator) void {
for (self.symbols.items) |*symbol| {
if (symbol.name) |name| {
allocator.free(name);
}
pub fn deinit(self: *ExternSymbol, allocator: *Allocator) void {
if (self.name) |name| {
allocator.free(name);
}
self.symbols.deinit(allocator);
}
};
pub fn rebaseInfoSize(symbols: []*const ExternSymbol) !u64 {
var stream = std.io.countingWriter(std.io.null_writer);
var writer = stream.writer();
var size: u64 = 0;
for (symbols) |symbol| {
size += 2;
try leb.writeILEB128(writer, symbol.offset);
size += 1;
}
/// Parse the binding info table from byte stream.
pub fn read(self: *BindingInfoTable, reader: anytype, allocator: *Allocator) !void {
var symbol: Symbol = .{
.segment = 0,
.offset = 0,
};
size += 1 + stream.bytes_written;
return size;
}
var dylib_ordinal_set = false;
var done = false;
while (true) {
const inst = reader.readByte() catch |err| switch (err) {
error.EndOfStream => break,
else => return err,
};
const imm: u8 = inst & macho.BIND_IMMEDIATE_MASK;
const opcode: u8 = inst & macho.BIND_OPCODE_MASK;
switch (opcode) {
macho.BIND_OPCODE_DO_BIND => {
try self.symbols.append(allocator, symbol);
symbol = .{
.segment = 0,
.offset = 0,
};
},
macho.BIND_OPCODE_DONE => {
done = true;
break;
},
macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => {
var name = std.ArrayList(u8).init(allocator);
var next = try reader.readByte();
while (next != @as(u8, 0)) {
try name.append(next);
next = try reader.readByte();
}
symbol.name = name.toOwnedSlice();
},
macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => {
symbol.segment = imm;
symbol.offset = try leb.readILEB128(i64, reader);
},
macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => {
assert(!dylib_ordinal_set);
self.dylib_ordinal = imm;
},
macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => {
assert(!dylib_ordinal_set);
self.dylib_ordinal = try leb.readILEB128(i64, reader);
},
macho.BIND_OPCODE_SET_TYPE_IMM => {
self.binding_type = imm;
},
macho.BIND_OPCODE_SET_ADDEND_SLEB => {
symbol.addend = try leb.readILEB128(i64, reader);
},
else => {
std.log.warn("unhandled BIND_OPCODE_: 0x{x}", .{opcode});
},
}
}
assert(done);
pub fn writeRebaseInfo(symbols: []*const ExternSymbol, writer: anytype) !void {
for (symbols) |symbol| {
try writer.writeByte(macho.REBASE_OPCODE_SET_TYPE_IMM | @truncate(u4, macho.REBASE_TYPE_POINTER));
try writer.writeByte(macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
try leb.writeILEB128(writer, symbol.offset);
try writer.writeByte(macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | @truncate(u4, 1));
}
try writer.writeByte(macho.REBASE_OPCODE_DONE);
}
/// Write the binding info table to byte stream.
pub fn write(self: BindingInfoTable, writer: anytype) !void {
if (self.dylib_ordinal > 15) {
try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
try leb.writeULEB128(writer, @bitCast(u64, self.dylib_ordinal));
} else if (self.dylib_ordinal > 0) {
try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, self.dylib_ordinal)));
} else {
try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, self.dylib_ordinal)));
pub fn bindInfoSize(symbols: []*const ExternSymbol) !u64 {
var stream = std.io.countingWriter(std.io.null_writer);
var writer = stream.writer();
var size: u64 = 0;
for (symbols) |symbol| {
size += 1;
if (symbol.dylib_ordinal > 15) {
try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
}
try writer.writeByte(macho.BIND_OPCODE_SET_TYPE_IMM | @truncate(u4, self.binding_type));
size += 1;
for (self.symbols.items) |symbol| {
if (symbol.name) |name| {
try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
try writer.writeAll(name);
try writer.writeByte(0);
}
try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
try leb.writeILEB128(writer, symbol.offset);
if (symbol.addend) |addend| {
try writer.writeByte(macho.BIND_OPCODE_SET_ADDEND_SLEB);
try leb.writeILEB128(writer, addend);
}
try writer.writeByte(macho.BIND_OPCODE_DO_BIND);
}
try writer.writeByte(macho.BIND_OPCODE_DONE);
}
/// Calculate size in bytes of this binding info table.
pub fn calcSize(self: *BindingInfoTable) !u64 {
var stream = std.io.countingWriter(std.io.null_writer);
var writer = stream.writer();
var size: u64 = 1;
if (self.dylib_ordinal > 15) {
try leb.writeULEB128(writer, @bitCast(u64, self.dylib_ordinal));
if (symbol.name) |name| {
size += 1;
size += name.len;
size += 1;
}
size += 1;
try leb.writeILEB128(writer, symbol.offset);
size += 2;
}
for (self.symbols.items) |symbol| {
if (symbol.name) |name| {
size += 1;
size += name.len;
size += 1;
}
size += stream.bytes_written;
return size;
}
pub fn writeBindInfo(symbols: []*const ExternSymbol, writer: anytype) !void {
for (symbols) |symbol| {
if (symbol.dylib_ordinal > 15) {
try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
} else if (symbol.dylib_ordinal > 0) {
try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
} else {
try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
}
try writer.writeByte(macho.BIND_OPCODE_SET_TYPE_IMM | @truncate(u4, macho.BIND_TYPE_POINTER));
if (symbol.name) |name| {
try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
try writer.writeAll(name);
try writer.writeByte(0);
}
try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
try leb.writeILEB128(writer, symbol.offset);
try writer.writeByte(macho.BIND_OPCODE_DO_BIND);
try writer.writeByte(macho.BIND_OPCODE_DONE);
}
}
pub fn lazyBindInfoSize(symbols: []*const ExternSymbol) !u64 {
var stream = std.io.countingWriter(std.io.null_writer);
var writer = stream.writer();
var size: u64 = 0;
for (symbols) |symbol| {
size += 1;
try leb.writeILEB128(writer, symbol.offset);
size += 1;
if (symbol.dylib_ordinal > 15) {
try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
}
if (symbol.name) |name| {
size += 1;
try leb.writeILEB128(writer, symbol.offset);
if (symbol.addend) |addend| {
size += 1;
try leb.writeILEB128(writer, addend);
}
size += name.len;
size += 1;
}
size += 1 + stream.bytes_written;
return size;
}
};
/// Table of lazy binding info entries used to tell the dyld which
/// symbols to lazily bind at first load of a dylib.
pub const LazyBindingInfoTable = struct {
symbols: std.ArrayListUnmanaged(Symbol) = .{},
pub const Symbol = struct {
/// Symbol name.
name: ?[]u8 = null,
/// Offset of this symbol wrt to the segment id encoded in `segment`.
offset: i64,
/// Id of the dylib where this symbol is expected to reside.
/// Positive ordinals point at dylibs imported with LC_LOAD_DYLIB,
/// 0 means this binary, -1 the main executable, and -2 flat lookup.
dylib_ordinal: i64,
/// Id of the segment where to bind this symbol to.
segment: u8,
/// Addend value (if any).
addend: ?i64 = null,
};
pub fn deinit(self: *LazyBindingInfoTable, allocator: *Allocator) void {
for (self.symbols.items) |*symbol| {
if (symbol.name) |name| {
allocator.free(name);
}
}
self.symbols.deinit(allocator);
size += 2;
}
/// Parse the binding info table from byte stream.
pub fn read(self: *LazyBindingInfoTable, reader: anytype, allocator: *Allocator) !void {
var symbol: Symbol = .{
.offset = 0,
.segment = 0,
.dylib_ordinal = 0,
};
size += stream.bytes_written;
return size;
}
var done = false;
while (true) {
const inst = reader.readByte() catch |err| switch (err) {
error.EndOfStream => break,
else => return err,
};
const imm: u8 = inst & macho.BIND_IMMEDIATE_MASK;
const opcode: u8 = inst & macho.BIND_OPCODE_MASK;
pub fn writeLazyBindInfo(symbols: []*const ExternSymbol, writer: anytype) !void {
for (symbols) |symbol| {
try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
try leb.writeILEB128(writer, symbol.offset);
switch (opcode) {
macho.BIND_OPCODE_DO_BIND => {
try self.symbols.append(allocator, symbol);
},
macho.BIND_OPCODE_DONE => {
done = true;
symbol = .{
.offset = 0,
.segment = 0,
.dylib_ordinal = 0,
};
},
macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => {
var name = std.ArrayList(u8).init(allocator);
var next = try reader.readByte();
while (next != @as(u8, 0)) {
try name.append(next);
next = try reader.readByte();
}
symbol.name = name.toOwnedSlice();
},
macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => {
symbol.segment = imm;
symbol.offset = try leb.readILEB128(i64, reader);
},
macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => {
symbol.dylib_ordinal = imm;
},
macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => {
symbol.dylib_ordinal = try leb.readILEB128(i64, reader);
},
macho.BIND_OPCODE_SET_ADDEND_SLEB => {
symbol.addend = try leb.readILEB128(i64, reader);
},
else => {
std.log.warn("unhandled BIND_OPCODE_: 0x{x}", .{opcode});
},
}
}
assert(done);
}
/// Write the binding info table to byte stream.
pub fn write(self: LazyBindingInfoTable, writer: anytype) !void {
for (self.symbols.items) |symbol| {
try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
try leb.writeILEB128(writer, symbol.offset);
if (symbol.addend) |addend| {
try writer.writeByte(macho.BIND_OPCODE_SET_ADDEND_SLEB);
try leb.writeILEB128(writer, addend);
}
if (symbol.dylib_ordinal > 15) {
try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
} else if (symbol.dylib_ordinal > 0) {
try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
} else {
try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
}
if (symbol.name) |name| {
try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
try writer.writeAll(name);
try writer.writeByte(0);
}
try writer.writeByte(macho.BIND_OPCODE_DO_BIND);
try writer.writeByte(macho.BIND_OPCODE_DONE);
}
}
/// Calculate size in bytes of this binding info table.
pub fn calcSize(self: *LazyBindingInfoTable) !u64 {
var stream = std.io.countingWriter(std.io.null_writer);
var writer = stream.writer();
var size: u64 = 0;
for (self.symbols.items) |symbol| {
size += 1;
try leb.writeILEB128(writer, symbol.offset);
if (symbol.addend) |addend| {
size += 1;
try leb.writeILEB128(writer, addend);
}
size += 1;
if (symbol.dylib_ordinal > 15) {
try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
}
if (symbol.name) |name| {
size += 1;
size += name.len;
size += 1;
}
size += 2;
if (symbol.dylib_ordinal > 15) {
try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
} else if (symbol.dylib_ordinal > 0) {
try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
} else {
try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
}
size += stream.bytes_written;
return size;
if (symbol.name) |name| {
try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
try writer.writeAll(name);
try writer.writeByte(0);
}
try writer.writeByte(macho.BIND_OPCODE_DO_BIND);
try writer.writeByte(macho.BIND_OPCODE_DONE);
}
};
}

View File

@ -199,4 +199,22 @@ pub fn addCases(ctx: *TestContext) !void {
"",
);
}
{
var case = ctx.exe("hello world linked to libc", macos_aarch64);
// TODO rewrite this test once we handle more int conversions and return args.
case.addCompareOutput(
\\extern "c" fn write(usize, usize, usize) void;
\\extern "c" fn exit(usize) noreturn;
\\
\\export fn _start() noreturn {
\\ write(1, @ptrToInt("Hello,"), 6);
\\ write(1, @ptrToInt(" World!\n,"), 8);
\\ exit(0);
\\}
,
"Hello, World!\n",
);
}
}

View File

@ -1495,4 +1495,22 @@ pub fn addCases(ctx: *TestContext) !void {
\\}
, &[_][]const u8{":8:10: error: evaluation exceeded 1000 backwards branches"});
}
{
var case = ctx.exe("hello world linked to libc", macosx_x64);
// TODO rewrite this test once we handle more int conversions and return args.
case.addCompareOutput(
\\extern "c" fn write(usize, usize, usize) void;
\\extern "c" fn exit(usize) noreturn;
\\
\\export fn _start() noreturn {
\\ write(1, @ptrToInt("Hello,"), 6);
\\ write(1, @ptrToInt(" World!\n,"), 8);
\\ exit(0);
\\}
,
"Hello, World!\n",
);
}
}