zld: store a single global symtab

This commit is contained in:
Jakub Konka 2021-04-02 07:45:11 +02:00
parent 1b5bceec91
commit d5c2f8ed32
3 changed files with 125 additions and 110 deletions

View File

@ -237,7 +237,23 @@ pub fn parseSymtab(self: *Object) !void {
error.EndOfStream => break,
else => |e| return e,
};
const tag: Symbol.Tag = tag: {
if (Symbol.isLocal(symbol)) {
if (Symbol.isStab(symbol))
break :tag .Stab
else
break :tag .Local;
} else if (Symbol.isGlobal(symbol)) {
if (Symbol.isWeakDef(symbol))
break :tag .Weak
else
break :tag .Strong;
} else {
break :tag .Undef;
}
};
self.symtab.appendAssumeCapacity(.{
.tag = tag,
.inner = symbol,
});
}

View File

@ -3,6 +3,17 @@ const Symbol = @This();
const std = @import("std");
const macho = std.macho;
pub const Tag = enum {
Stab,
Local,
Weak,
Strong,
Import,
Undef,
};
tag: Tag,
/// MachO representation of this symbol.
inner: macho.nlist_64,
@ -13,43 +24,43 @@ file: ?u16 = null,
/// Index of this symbol within the file's symbol table.
index: ?u32 = null,
pub fn isStab(self: Symbol) bool {
return (macho.N_STAB & self.inner.n_type) != 0;
pub fn isStab(sym: macho.nlist_64) bool {
return (macho.N_STAB & sym.n_type) != 0;
}
pub fn isPext(self: Symbol) bool {
return (macho.N_PEXT & self.inner.n_type) != 0;
pub fn isPext(sym: macho.nlist_64) bool {
return (macho.N_PEXT & sym.n_type) != 0;
}
pub fn isExt(self: Symbol) bool {
return (macho.N_EXT & self.inner.n_type) != 0;
pub fn isExt(sym: macho.nlist_64) bool {
return (macho.N_EXT & sym.n_type) != 0;
}
pub fn isSect(self: Symbol) bool {
const type_ = macho.N_TYPE & self.inner.n_type;
pub fn isSect(sym: macho.nlist_64) bool {
const type_ = macho.N_TYPE & sym.n_type;
return type_ == macho.N_SECT;
}
pub fn isUndf(self: Symbol) bool {
const type_ = macho.N_TYPE & self.inner.n_type;
pub fn isUndf(sym: macho.nlist_64) bool {
const type_ = macho.N_TYPE & sym.n_type;
return type_ == macho.N_UNDF;
}
pub fn isWeakDef(self: Symbol) bool {
return self.inner.n_desc == macho.N_WEAK_DEF;
pub fn isWeakDef(sym: macho.nlist_64) bool {
return sym.n_desc == macho.N_WEAK_DEF;
}
/// Symbol is local if it is either a stab or it is defined and not an extern.
pub fn isLocal(self: Symbol) bool {
return self.isStab() or (self.isSect() and !self.isExt());
pub fn isLocal(sym: macho.nlist_64) bool {
return isStab(sym) or (isSect(sym) and !isExt(sym));
}
/// Symbol is global if it is defined and an extern.
pub fn isGlobal(self: Symbol) bool {
return self.isSect() and self.isExt();
pub fn isGlobal(sym: macho.nlist_64) bool {
return isSect(sym) and isExt(sym);
}
/// Symbol is undefined if it is not defined and an extern.
pub fn isUndef(self: Symbol) bool {
return self.isUndf() and self.isExt();
pub fn isUndef(sym: macho.nlist_64) bool {
return isUndf(sym) and isExt(sym);
}

View File

@ -73,9 +73,7 @@ la_symbol_ptr_section_index: ?u16 = null,
data_section_index: ?u16 = null,
bss_section_index: ?u16 = null,
globals: std.StringArrayHashMapUnmanaged(Symbol) = .{},
undefs: std.StringArrayHashMapUnmanaged(Symbol) = .{},
externs: std.StringArrayHashMapUnmanaged(Symbol) = .{},
symtab: std.StringArrayHashMapUnmanaged(Symbol) = .{},
strtab: std.ArrayListUnmanaged(u8) = .{},
threadlocal_offsets: std.ArrayListUnmanaged(u64) = .{},
@ -210,20 +208,11 @@ pub fn deinit(self: *Zld) void {
self.mappings.deinit(self.allocator);
self.unhandled_sections.deinit(self.allocator);
for (self.globals.items()) |*entry| {
for (self.symtab.items()) |*entry| {
self.allocator.free(entry.key);
}
self.globals.deinit(self.allocator);
for (self.undefs.items()) |*entry| {
self.allocator.free(entry.key);
}
self.undefs.deinit(self.allocator);
for (self.externs.items()) |*entry| {
self.allocator.free(entry.key);
}
self.externs.deinit(self.allocator);
self.symtab.deinit(self.allocator);
self.strtab.deinit(self.allocator);
}
pub fn closeFiles(self: Zld) void {
@ -276,10 +265,11 @@ pub fn link(self: *Zld, files: []const []const u8, out_path: []const u8) !void {
try self.resolveSymbols();
try self.updateMetadata();
try self.sortSections();
try self.allocateTextSegment();
try self.allocateDataConstSegment();
try self.allocateDataSegment();
self.allocateLinkeditSegment();
self.printSymtab();
// try self.allocateTextSegment();
// try self.allocateDataConstSegment();
// try self.allocateDataSegment();
// self.allocateLinkeditSegment();
// try self.writeStubHelperCommon();
// try self.doRelocs();
// try self.flush();
@ -1216,48 +1206,64 @@ fn resolveSymbolsInObject(self: *Zld, object_id: u16) !void {
log.warn("resolving symbols in '{s}'", .{object.name});
for (object.symtab.items) |sym, sym_id| {
if (sym.isLocal()) continue; // If symbol is local to CU, we don't put it in the global symbol table.
switch (sym.tag) {
.Local, .Stab => continue, // If symbol is local to CU, we don't put it in the global symbol table.
.Weak, .Strong => {
const sym_name = object.getString(sym.inner.n_strx);
const global = self.symtab.getEntry(sym_name) orelse {
// Put new global symbol into the symbol table.
const name = try self.allocator.dupe(u8, sym_name);
try self.symtab.putNoClobber(self.allocator, name, .{
.tag = sym.tag,
.inner = .{
.n_strx = 0, // This will be populated later.
.n_value = 0, // This will be populated later,
.n_type = macho.N_SECT | macho.N_EXT,
.n_desc = 0,
.n_sect = 0, // This will be populated later.
},
.file = object_id,
.index = @intCast(u32, sym_id),
});
continue;
};
const sym_name = object.getString(sym.inner.n_strx);
if (sym.isGlobal()) {
const global = self.globals.getEntry(sym_name) orelse {
const name = try self.allocator.dupe(u8, sym_name);
try self.globals.putNoClobber(self.allocator, name, .{
.inner = sym.inner,
.file = object_id,
.index = @intCast(u32, sym_id),
});
if (self.undefs.swapRemove(sym_name)) |undef| {
self.allocator.free(undef.key);
if (sym.tag == .Weak) continue; // If symbol is weak, nothing to do.
if (global.value.tag == .Strong) { // If both symbols are strong, we have a collision.
log.err("symbol '{s}' defined multiple times", .{sym_name});
return error.MultipleSymbolDefinitions;
}
continue;
};
global.value = .{
.tag = .Strong,
.inner = .{
.n_strx = 0, // This will be populated later.
.n_value = 0, // This will be populated later,
.n_type = macho.N_SECT | macho.N_EXT,
.n_desc = 0,
.n_sect = 0, // This will be populated later.
},
.file = object_id,
.index = @intCast(u32, sym_id),
};
},
.Undef => {
const sym_name = object.getString(sym.inner.n_strx);
if (self.symtab.contains(sym_name)) continue; // Nothing to do if we already found a definition.
if (sym.isWeakDef()) continue; // If symbol is weak, nothing to do.
if (!global.value.isWeakDef()) { // If both symbols are strong, we have a collision.
log.err("symbol '{s}' defined multiple times", .{sym_name});
return error.MultipleSymbolDefinitions;
}
global.value = .{
.inner = sym.inner,
.file = object_id,
.index = @intCast(u32, sym_id),
};
} else if (sym.isUndef()) {
if (self.globals.contains(sym_name)) continue; // Nothing to do if we already found a definition.
if (self.undefs.contains(sym_name)) continue; // No need to reinsert the undef ref.
const name = try self.allocator.dupe(u8, sym_name);
try self.undefs.putNoClobber(self.allocator, name, .{
.inner = sym.inner,
});
} else {
// Oh no, unhandled symbol type, report back to the user.
log.err("unhandled symbol type for symbol {any}", .{sym});
return error.UnhandledSymbolType;
const name = try self.allocator.dupe(u8, sym_name);
try self.symtab.putNoClobber(self.allocator, name, .{
.tag = .Undef,
.inner = .{
.n_strx = 0,
.n_value = 0,
.n_type = 0,
.n_desc = 0,
.n_sect = 0,
},
});
},
.Import => unreachable, // We don't expect any imports just yet.
}
}
}
@ -1274,7 +1280,9 @@ fn resolveSymbols(self: *Zld) !void {
var archive = &self.archives.items[next];
var hit: bool = false;
for (self.undefs.items()) |entry| {
for (self.symtab.items()) |entry| {
if (entry.value.tag != .Undef) continue;
const sym_name = entry.key;
// Check if the entry exists in a static archive.
@ -1306,9 +1314,10 @@ fn resolveSymbols(self: *Zld) !void {
// Third pass, resolve symbols in dynamic libraries.
// TODO Implement libSystem as a hard-coded library, or ship with
// a libSystem.B.tbd definition file?
while (self.undefs.items().len > 0) {
const entry = self.undefs.pop();
try self.externs.putNoClobber(self.allocator, entry.key, .{
for (self.symtab.items()) |*entry| {
if (entry.value.tag != .Undef) continue;
entry.value = .{
.tag = .Import,
.inner = .{
.n_strx = 0, // This will be populated once we write the string table.
.n_type = macho.N_UNDF | macho.N_EXT,
@ -1317,30 +1326,19 @@ fn resolveSymbols(self: *Zld) !void {
.n_value = 0,
},
.file = 0,
});
};
}
// If there are any undefs left, flag an error.
if (self.undefs.items().len > 0) {
for (self.undefs.items()) |entry| {
log.err("undefined reference to symbol '{s}'", .{entry.key});
}
var has_unresolved = false;
for (self.symtab.items()) |entry| {
if (entry.value.tag != .Undef) continue;
has_unresolved = true;
log.err("undefined reference to symbol '{s}'", .{entry.key});
}
if (has_unresolved) {
return error.UndefinedSymbolReference;
}
// Finally, put in a reference to 'dyld_stub_binder'.
const name = try self.allocator.dupe(u8, "dyld_stub_binder");
try self.externs.putNoClobber(self.allocator, name, .{
.inner = .{
.n_strx = 0, // This will be populated once we write the string table.
.n_type = std.macho.N_UNDF | std.macho.N_EXT,
.n_sect = 0,
.n_desc = std.macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | std.macho.N_SYMBOL_RESOLVER,
.n_value = 0,
},
.file = 0,
});
}
fn doRelocs(self: *Zld) !void {
@ -3261,18 +3259,8 @@ fn aarch64IsArithmetic(inst: *const [4]u8) callconv(.Inline) bool {
}
fn printSymtab(self: Zld) void {
log.warn("globals", .{});
for (self.globals.items()) |entry| {
log.warn(" | {s} => {any}", .{ entry.key, entry.value });
}
log.warn("externs", .{});
for (self.externs.items()) |entry| {
log.warn(" | {s} => {any}", .{ entry.key, entry.value });
}
log.warn("undefs", .{});
for (self.undefs.items()) |entry| {
log.warn("symtab", .{});
for (self.symtab.items()) |entry| {
log.warn(" | {s} => {any}", .{ entry.key, entry.value });
}
}