macho: cache string len

This commit is contained in:
Jakub Konka 2024-07-22 09:37:32 +02:00
parent 79fefec599
commit 06a0da3e8a
7 changed files with 93 additions and 73 deletions

View File

@ -4578,6 +4578,11 @@ pub const SymbolResolver = struct {
pub const Index = u32;
};
pub const String = struct {
pos: u32 = 0,
len: u32 = 0,
};
const MachO = @This();
const std = @import("std");

View File

@ -2,7 +2,7 @@
value: u64 = 0,
/// Name of this Atom.
name: u32 = 0,
name: MachO.String = .{},
/// Index into linker's input file table.
file: File.Index = 0,
@ -42,7 +42,7 @@ extra: u32 = 0,
pub fn getName(self: Atom, macho_file: *MachO) [:0]const u8 {
return switch (self.getFile(macho_file)) {
.dylib => unreachable,
.zig_object => |x| x.strtab.getAssumeExists(self.name),
.zig_object => |x| x.strtab.buffer.items[self.name.pos..][0 .. self.name.len - 1 :0],
inline else => |x| x.getString(self.name),
};
}

View File

@ -610,15 +610,18 @@ pub inline fn getUmbrella(self: Dylib, macho_file: *MachO) *Dylib {
return macho_file.getFile(self.umbrella).?.dylib;
}
fn addString(self: *Dylib, allocator: Allocator, name: []const u8) !u32 {
fn addString(self: *Dylib, allocator: Allocator, name: []const u8) !MachO.String {
const off = @as(u32, @intCast(self.strtab.items.len));
try self.strtab.writer(allocator).print("{s}\x00", .{name});
return off;
try self.strtab.ensureUnusedCapacity(allocator, name.len + 1);
self.strtab.appendSliceAssumeCapacity(name);
self.strtab.appendAssumeCapacity(0);
return .{ .pos = off, .len = @intCast(name.len + 1) };
}
pub fn getString(self: Dylib, off: u32) [:0]const u8 {
assert(off < self.strtab.items.len);
return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + off)), 0);
pub fn getString(self: Dylib, string: MachO.String) [:0]const u8 {
assert(string.pos < self.strtab.items.len and string.pos + string.len <= self.strtab.items.len);
if (string.len == 0) return "";
return self.strtab.items[string.pos..][0 .. string.len - 1 :0];
}
pub fn asFile(self: *Dylib) File {
@ -932,7 +935,7 @@ pub const Id = struct {
};
const Export = struct {
name: u32,
name: MachO.String,
flags: Flags,
const Flags = packed struct {

View File

@ -53,7 +53,7 @@ pub fn init(self: *InternalObject, allocator: Allocator) !void {
pub fn initSymbols(self: *InternalObject, macho_file: *MachO) !void {
const newSymbolAssumeCapacity = struct {
fn newSymbolAssumeCapacity(obj: *InternalObject, name: u32, args: struct {
fn newSymbolAssumeCapacity(obj: *InternalObject, name: MachO.String, args: struct {
type: u8 = macho.N_UNDF | macho.N_EXT,
desc: u16 = 0,
}) Symbol.Index {
@ -69,7 +69,7 @@ pub fn initSymbols(self: *InternalObject, macho_file: *MachO) !void {
const nlist_idx: u32 = @intCast(obj.symtab.items.len);
const nlist = obj.symtab.addOneAssumeCapacity();
nlist.* = .{
.n_strx = name,
.n_strx = name.pos,
.n_type = args.type,
.n_sect = 0,
.n_desc = args.desc,
@ -197,16 +197,16 @@ pub fn resolveBoundarySymbols(self: *InternalObject, macho_file: *MachO) !void {
try self.globals.ensureUnusedCapacity(gpa, nsyms);
for (boundary_symbols.keys(), boundary_symbols.values()) |name, ref| {
const name_off = try self.addString(gpa, name);
const name_str = try self.addString(gpa, name);
const sym_index = self.addSymbolAssumeCapacity();
self.boundary_symbols.appendAssumeCapacity(sym_index);
const sym = &self.symbols.items[sym_index];
sym.name = name_off;
sym.name = name_str;
sym.visibility = .local;
const nlist_idx: u32 = @intCast(self.symtab.items.len);
const nlist = self.symtab.addOneAssumeCapacity();
nlist.* = .{
.n_strx = name_off,
.n_strx = name_str.pos,
.n_type = macho.N_SECT,
.n_sect = 0,
.n_desc = 0,
@ -273,7 +273,7 @@ fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_fil
const nlist_idx: u32 = @intCast(self.symtab.items.len);
const nlist = try self.symtab.addOne(gpa);
nlist.* = .{
.n_strx = name_str,
.n_strx = name_str.pos,
.n_type = macho.N_SECT,
.n_sect = @intCast(n_sect + 1),
.n_desc = 0,
@ -373,15 +373,15 @@ pub fn resolveObjcMsgSendSymbols(self: *InternalObject, macho_file: *MachO) !voi
const name = MachO.eatPrefix(sym_name, "_objc_msgSend$").?;
const selrefs_index = try self.addObjcMsgsendSections(name, macho_file);
const name_off = try self.addString(gpa, sym_name);
const name_str = try self.addString(gpa, sym_name);
const sym_index = try self.addSymbol(gpa);
const sym = &self.symbols.items[sym_index];
sym.name = name_off;
sym.name = name_str;
sym.visibility = .hidden;
const nlist_idx: u32 = @intCast(self.symtab.items.len);
const nlist = try self.symtab.addOne(gpa);
nlist.* = .{
.n_strx = name_off,
.n_strx = name_str.pos,
.n_type = macho.N_SECT | macho.N_EXT | macho.N_PEXT,
.n_sect = 0,
.n_desc = 0,
@ -624,17 +624,18 @@ fn getSectionData(self: *const InternalObject, index: u32) error{Overflow}![]con
@panic("ref to non-existent section");
}
pub fn addString(self: *InternalObject, allocator: Allocator, name: []const u8) !u32 {
pub fn addString(self: *InternalObject, allocator: Allocator, string: []const u8) !MachO.String {
const off: u32 = @intCast(self.strtab.items.len);
try self.strtab.ensureUnusedCapacity(allocator, name.len + 1);
self.strtab.appendSliceAssumeCapacity(name);
try self.strtab.ensureUnusedCapacity(allocator, string.len + 1);
self.strtab.appendSliceAssumeCapacity(string);
self.strtab.appendAssumeCapacity(0);
return off;
return .{ .pos = off, .len = @intCast(string.len + 1) };
}
pub fn getString(self: InternalObject, off: u32) [:0]const u8 {
assert(off < self.strtab.items.len);
return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + off)), 0);
pub fn getString(self: InternalObject, string: MachO.String) [:0]const u8 {
assert(string.pos < self.strtab.items.len and string.pos + string.len <= self.strtab.items.len);
if (string.len == 0) return "";
return self.strtab.items[string.pos..][0 .. string.len - 1 :0];
}
pub fn asFile(self: *InternalObject) File {

View File

@ -178,7 +178,7 @@ pub fn parse(self: *Object, macho_file: *MachO) !void {
fn rank(ctx: *const Object, nl: macho.nlist_64) u8 {
if (!nl.ext()) {
const name = ctx.getString(nl.n_strx);
const name = ctx.getNStrx(nl.n_strx);
if (name.len == 0) return 5;
if (name[0] == 'l' or name[0] == 'L') return 4;
return 3;
@ -341,7 +341,7 @@ fn initSubsections(self: *Object, allocator: Allocator, nlists: anytype) !void {
else
sect.@"align";
const atom_index = try self.addAtom(allocator, .{
.name = nlist.nlist.n_strx,
.name = .{ .pos = nlist.nlist.n_strx, .len = @intCast(self.getNStrx(nlist.nlist.n_strx).len + 1) },
.n_sect = @intCast(n_sect),
.off = nlist.nlist.n_value - sect.addr,
.size = size,
@ -465,7 +465,7 @@ fn initCstringLiterals(self: *Object, allocator: Allocator, file: File.Handle, m
const nlist_index: u32 = @intCast(try self.symtab.addOne(allocator));
self.symtab.set(nlist_index, .{
.nlist = .{
.n_strx = name_str,
.n_strx = name_str.pos,
.n_type = macho.N_SECT,
.n_sect = @intCast(atom.n_sect + 1),
.n_desc = 0,
@ -532,7 +532,7 @@ fn initFixedSizeLiterals(self: *Object, allocator: Allocator, macho_file: *MachO
const nlist_index: u32 = @intCast(try self.symtab.addOne(allocator));
self.symtab.set(nlist_index, .{
.nlist = .{
.n_strx = name_str,
.n_strx = name_str.pos,
.n_type = macho.N_SECT,
.n_sect = @intCast(atom.n_sect + 1),
.n_desc = 0,
@ -590,7 +590,7 @@ fn initPointerLiterals(self: *Object, allocator: Allocator, macho_file: *MachO)
const nlist_index: u32 = @intCast(try self.symtab.addOne(allocator));
self.symtab.set(nlist_index, .{
.nlist = .{
.n_strx = name_str,
.n_strx = name_str.pos,
.n_type = macho.N_SECT,
.n_sect = @intCast(atom.n_sect + 1),
.n_desc = 0,
@ -796,7 +796,7 @@ fn linkNlistToAtom(self: *Object, macho_file: *MachO) !void {
atom.* = atom_index;
} else {
try macho_file.reportParseError2(self.index, "symbol {s} not attached to any (sub)section", .{
self.getString(nlist.n_strx),
self.getNStrx(nlist.n_strx),
});
return error.MalformedObject;
}
@ -821,7 +821,7 @@ fn initSymbols(self: *Object, allocator: Allocator, macho_file: *MachO) !void {
const index = self.addSymbolAssumeCapacity();
const symbol = &self.symbols.items[index];
symbol.value = nlist.n_value;
symbol.name = nlist.n_strx;
symbol.name = .{ .pos = nlist.n_strx, .len = @intCast(self.getNStrx(nlist.n_strx).len + 1) };
symbol.nlist_idx = @intCast(i);
symbol.extra = self.addSymbolExtraAssumeCapacity(.{});
@ -894,7 +894,7 @@ fn initSymbolStabs(self: *Object, allocator: Allocator, nlists: anytype, macho_f
defer addr_lookup.deinit();
for (syms) |sym| {
if (sym.sect() and (sym.ext() or sym.pext())) {
try addr_lookup.putNoClobber(self.getString(sym.n_strx), sym.n_value);
try addr_lookup.putNoClobber(self.getNStrx(sym.n_strx), sym.n_value);
}
}
@ -926,7 +926,7 @@ fn initSymbolStabs(self: *Object, allocator: Allocator, nlists: anytype, macho_f
},
macho.N_GSYM => {
stab.is_func = false;
stab.index = sym_lookup.find(addr_lookup.get(self.getString(nlist.n_strx)).?);
stab.index = sym_lookup.find(addr_lookup.get(self.getNStrx(nlist.n_strx)).?);
},
macho.N_STSYM => {
stab.is_func = false;
@ -1708,7 +1708,7 @@ pub fn updateArSymtab(self: Object, ar_symtab: *Archive.ArSymtab, macho_file: *M
const gpa = macho_file.base.comp.gpa;
for (self.symtab.items(.nlist)) |nlist| {
if (!nlist.ext() or (nlist.undf() and !nlist.tentative())) continue;
const off = try ar_symtab.strtab.insert(gpa, self.getString(nlist.n_strx));
const off = try ar_symtab.strtab.insert(gpa, self.getNStrx(nlist.n_strx));
try ar_symtab.entries.append(gpa, .{ .off = off, .file = self.index });
}
}
@ -2292,17 +2292,23 @@ pub fn getAtomRelocs(self: *const Object, atom: Atom, macho_file: *MachO) []cons
return relocs.items[extra.rel_index..][0..extra.rel_count];
}
fn addString(self: *Object, allocator: Allocator, name: [:0]const u8) error{OutOfMemory}!u32 {
fn addString(self: *Object, allocator: Allocator, string: [:0]const u8) error{OutOfMemory}!MachO.String {
const off: u32 = @intCast(self.strtab.items.len);
try self.strtab.ensureUnusedCapacity(allocator, name.len + 1);
self.strtab.appendSliceAssumeCapacity(name);
try self.strtab.ensureUnusedCapacity(allocator, string.len + 1);
self.strtab.appendSliceAssumeCapacity(string);
self.strtab.appendAssumeCapacity(0);
return off;
return .{ .pos = off, .len = @intCast(string.len + 1) };
}
pub fn getString(self: Object, off: u32) [:0]const u8 {
assert(off < self.strtab.items.len);
return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + off)), 0);
pub fn getString(self: Object, string: MachO.String) [:0]const u8 {
assert(string.pos < self.strtab.items.len and string.pos + string.len <= self.strtab.items.len);
if (string.len == 0) return "";
return self.strtab.items[string.pos..][0 .. string.len - 1 :0];
}
fn getNStrx(self: Object, n_strx: u32) [:0]const u8 {
assert(n_strx < self.strtab.items.len);
return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + n_strx)), 0);
}
pub fn hasUnwindRecords(self: Object) bool {
@ -2323,7 +2329,7 @@ fn hasSymbolStabs(self: Object) bool {
fn hasObjC(self: Object) bool {
for (self.symtab.items(.nlist)) |nlist| {
const name = self.getString(nlist.n_strx);
const name = self.getNStrx(nlist.n_strx);
if (mem.startsWith(u8, name, "_OBJC_CLASS_$_")) return true;
}
for (self.sections.items(.header)) |sect| {
@ -2346,7 +2352,7 @@ pub fn asFile(self: *Object) File {
}
const AddAtomArgs = struct {
name: u32,
name: MachO.String,
n_sect: u8,
off: u64,
size: u64,
@ -2690,17 +2696,17 @@ const StabFile = struct {
fn getCompDir(sf: StabFile, object: Object) [:0]const u8 {
const nlist = object.symtab.items(.nlist)[sf.comp_dir];
return object.getString(nlist.n_strx);
return object.getNStrx(nlist.n_strx);
}
fn getTuName(sf: StabFile, object: Object) [:0]const u8 {
const nlist = object.symtab.items(.nlist)[sf.comp_dir + 1];
return object.getString(nlist.n_strx);
return object.getNStrx(nlist.n_strx);
}
fn getOsoPath(sf: StabFile, object: Object) [:0]const u8 {
const nlist = object.symtab.items(.nlist)[sf.comp_dir + 2];
return object.getString(nlist.n_strx);
return object.getNStrx(nlist.n_strx);
}
fn getOsoModTime(sf: StabFile, object: Object) u64 {
@ -2758,8 +2764,8 @@ const StabFile = struct {
};
const CompileUnit = struct {
comp_dir: u32,
tu_name: u32,
comp_dir: MachO.String,
tu_name: MachO.String,
fn getCompDir(cu: CompileUnit, object: Object) [:0]const u8 {
return object.getString(cu.comp_dir);

View File

@ -4,7 +4,7 @@
value: u64 = 0,
/// Offset into the linker's intern table.
name: u32 = 0,
name: MachO.String = .{},
/// File where this symbol is defined.
file: File.Index = 0,
@ -57,7 +57,7 @@ pub fn weakRef(symbol: Symbol, macho_file: *MachO) bool {
pub fn getName(symbol: Symbol, macho_file: *MachO) [:0]const u8 {
return switch (symbol.getFile(macho_file).?) {
.zig_object => |x| x.strtab.getAssumeExists(symbol.name),
.zig_object => |x| x.strtab.buffer.items[symbol.name.pos..][0 .. symbol.name.len - 1 :0],
inline else => |x| x.getString(symbol.name),
};
}

View File

@ -141,7 +141,7 @@ pub fn deinit(self: *ZigObject, allocator: Allocator) void {
}
}
fn newSymbol(self: *ZigObject, allocator: Allocator, name: u32, args: struct {
fn newSymbol(self: *ZigObject, allocator: Allocator, name: MachO.String, args: struct {
type: u8 = macho.N_UNDF | macho.N_EXT,
desc: u16 = 0,
}) !Symbol.Index {
@ -158,7 +158,7 @@ fn newSymbol(self: *ZigObject, allocator: Allocator, name: u32, args: struct {
const nlist_idx: u32 = @intCast(self.symtab.addOneAssumeCapacity());
self.symtab.set(nlist_idx, .{
.nlist = .{
.n_strx = name,
.n_strx = name.pos,
.n_type = args.type,
.n_sect = 0,
.n_desc = args.desc,
@ -174,7 +174,7 @@ fn newSymbol(self: *ZigObject, allocator: Allocator, name: u32, args: struct {
return index;
}
fn newAtom(self: *ZigObject, allocator: Allocator, name: u32, macho_file: *MachO) !Atom.Index {
fn newAtom(self: *ZigObject, allocator: Allocator, name: MachO.String, macho_file: *MachO) !Atom.Index {
try self.atoms.ensureUnusedCapacity(allocator, 1);
try self.atoms_extra.ensureUnusedCapacity(allocator, @sizeOf(Atom.Extra));
try self.atoms_indexes.ensureUnusedCapacity(allocator, 1);
@ -192,7 +192,7 @@ fn newAtom(self: *ZigObject, allocator: Allocator, name: u32, macho_file: *MachO
return index;
}
fn newSymbolWithAtom(self: *ZigObject, allocator: Allocator, name: u32, macho_file: *MachO) !Symbol.Index {
fn newSymbolWithAtom(self: *ZigObject, allocator: Allocator, name: MachO.String, macho_file: *MachO) !Symbol.Index {
const atom_index = try self.newAtom(allocator, name, macho_file);
const sym_index = try self.newSymbol(allocator, name, .{ .type = macho.N_SECT });
const sym = &self.symbols.items[sym_index];
@ -992,10 +992,10 @@ fn updateDeclCode(
const sym_name = try std.fmt.allocPrintZ(gpa, "_{s}", .{decl.fqn.toSlice(ip)});
defer gpa.free(sym_name);
sym.name = try self.strtab.insert(gpa, sym_name);
sym.name = try self.addString(gpa, sym_name);
atom.setAlive(true);
atom.name = sym.name;
nlist.n_strx = sym.name;
nlist.n_strx = sym.name.pos;
nlist.n_type = macho.N_SECT;
nlist.n_sect = sect_index + 1;
self.symtab.items(.size)[sym.nlist_idx] = code.len;
@ -1090,9 +1090,9 @@ fn createTlvInitializer(
const gpa = macho_file.base.comp.gpa;
const sym_name = try std.fmt.allocPrint(gpa, "{s}$tlv$init", .{name});
defer gpa.free(sym_name);
const off = try self.strtab.insert(gpa, sym_name);
const string = try self.addString(gpa, sym_name);
const sym_index = try self.newSymbolWithAtom(gpa, off, macho_file);
const sym_index = try self.newSymbolWithAtom(gpa, string, macho_file);
const sym = &self.symbols.items[sym_index];
const nlist = &self.symtab.items(.nlist)[sym.nlist_idx];
const atom = sym.getAtom(macho_file).?;
@ -1142,10 +1142,10 @@ fn createTlvDescriptor(
atom.out_n_sect = sect_index;
sym.value = 0;
sym.name = try self.strtab.insert(gpa, name);
sym.name = try self.addString(gpa, name);
atom.setAlive(true);
atom.name = sym.name;
nlist.n_strx = sym.name;
nlist.n_strx = sym.name.pos;
nlist.n_sect = sect_index + 1;
nlist.n_type = macho.N_SECT;
nlist.n_value = 0;
@ -1296,8 +1296,8 @@ fn lowerConst(
var code_buffer = std.ArrayList(u8).init(gpa);
defer code_buffer.deinit();
const name_str_index = try self.strtab.insert(gpa, name);
const sym_index = try self.newSymbolWithAtom(gpa, name_str_index, macho_file);
const name_str = try self.addString(gpa, name);
const sym_index = try self.newSymbolWithAtom(gpa, name_str, macho_file);
const res = try codegen.generateSymbol(&macho_file.base, pt, src_loc, val, &code_buffer, .{
.none = {},
@ -1447,13 +1447,13 @@ fn updateLazySymbol(
var code_buffer = std.ArrayList(u8).init(gpa);
defer code_buffer.deinit();
const name_str_index = blk: {
const name_str = blk: {
const name = try std.fmt.allocPrint(gpa, "__lazy_{s}_{}", .{
@tagName(lazy_sym.kind),
lazy_sym.ty.fmt(pt),
});
defer gpa.free(name);
break :blk try self.strtab.insert(gpa, name);
break :blk try self.addString(gpa, name);
};
const src = lazy_sym.ty.srcLocOrNull(mod) orelse Module.LazySrcLoc.unneeded;
@ -1480,18 +1480,18 @@ fn updateLazySymbol(
.const_data => macho_file.zig_const_sect_index.?,
};
const sym = &self.symbols.items[symbol_index];
sym.name = name_str_index;
sym.name = name_str;
sym.out_n_sect = output_section_index;
const nlist = &self.symtab.items(.nlist)[sym.nlist_idx];
nlist.n_strx = name_str_index;
nlist.n_strx = name_str.pos;
nlist.n_type = macho.N_SECT;
nlist.n_sect = output_section_index + 1;
self.symtab.items(.size)[sym.nlist_idx] = code.len;
const atom = sym.getAtom(macho_file).?;
atom.setAlive(true);
atom.name = name_str_index;
atom.name = name_str;
atom.alignment = required_alignment;
atom.size = code.len;
atom.out_n_sect = output_section_index;
@ -1553,10 +1553,10 @@ pub fn getGlobalSymbol(self: *ZigObject, macho_file: *MachO, name: []const u8, l
const gpa = macho_file.base.comp.gpa;
const sym_name = try std.fmt.allocPrint(gpa, "_{s}", .{name});
defer gpa.free(sym_name);
const off = try self.strtab.insert(gpa, sym_name);
const lookup_gop = try self.globals_lookup.getOrPut(gpa, off);
const name_str = try self.addString(gpa, sym_name);
const lookup_gop = try self.globals_lookup.getOrPut(gpa, name_str.pos);
if (!lookup_gop.found_existing) {
const sym_index = try self.newSymbol(gpa, off, .{});
const sym_index = try self.newSymbol(gpa, name_str, .{});
const sym = &self.symbols.items[sym_index];
lookup_gop.value_ptr.* = sym.nlist_idx;
}
@ -1571,7 +1571,7 @@ pub fn getOrCreateMetadataForDecl(
const gpa = macho_file.base.comp.gpa;
const gop = try self.decls.getOrPut(gpa, decl_index);
if (!gop.found_existing) {
const sym_index = try self.newSymbolWithAtom(gpa, 0, macho_file);
const sym_index = try self.newSymbolWithAtom(gpa, .{}, macho_file);
const sym = &self.symbols.items[sym_index];
if (isThreadlocal(macho_file, decl_index)) {
sym.flags.tlv = true;
@ -1609,7 +1609,7 @@ pub fn getOrCreateMetadataForLazySymbol(
};
switch (metadata.state.*) {
.unused => {
const symbol_index = try self.newSymbolWithAtom(gpa, 0, macho_file);
const symbol_index = try self.newSymbolWithAtom(gpa, .{}, macho_file);
const sym = &self.symbols.items[symbol_index];
sym.setSectionFlags(.{ .needs_zig_got = true });
metadata.symbol_index.* = symbol_index;
@ -1762,6 +1762,11 @@ pub fn setSymbolExtra(self: *ZigObject, index: u32, extra: Symbol.Extra) void {
}
}
fn addString(self: *ZigObject, allocator: Allocator, string: []const u8) !MachO.String {
const off = try self.strtab.insert(allocator, string);
return .{ .pos = off, .len = @intCast(string.len + 1) };
}
pub fn asFile(self: *ZigObject) File {
return .{ .zig_object = self };
}