diff --git a/CMakeLists.txt b/CMakeLists.txt index b08dca6f04..700cc20cae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -595,6 +595,7 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/Elf/ZigModule.zig" "${CMAKE_SOURCE_DIR}/src/link/Elf/eh_frame.zig" "${CMAKE_SOURCE_DIR}/src/link/Elf/file.zig" + "${CMAKE_SOURCE_DIR}/src/link/Elf/gc.zig" "${CMAKE_SOURCE_DIR}/src/link/Elf/synthetic_sections.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/Archive.zig" diff --git a/src/link/Elf.zig b/src/link/Elf.zig index f98465bd52..804be920f7 100644 --- a/src/link/Elf.zig +++ b/src/link/Elf.zig @@ -53,7 +53,7 @@ phdr_load_tls_data_index: ?u16 = null, /// The index into the program headers of a PT_LOAD program header with TLS zerofill data. phdr_load_tls_zerofill_index: ?u16 = null, -entry_addr: ?u64 = null, +entry_index: ?Symbol.Index = null, page_size: u32, default_sym_version: elf.Elf64_Versym, @@ -665,7 +665,6 @@ pub fn populateMissingMetadata(self: *Elf) !void { .alignment = self.page_size, .flags = elf.PF_X | elf.PF_R | elf.PF_W, }); - self.entry_addr = null; } if (self.phdr_got_index == null) { @@ -1283,9 +1282,29 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node // Any qualifing unresolved symbol will be upgraded to an absolute, weak // symbol for potential resolution at load-time. self.resolveSymbols(); - try self.addLinkerDefinedSymbols(); self.markEhFrameAtomsDead(); self.markImportsExports(); + + // Look for entry address in objects if not set by the incremental compiler. + if (self.entry_index == null) { + const entry: ?[]const u8 = entry: { + if (self.base.options.entry) |entry| break :entry entry; + if (!self.isDynLib()) break :entry "_start"; + break :entry null; + }; + self.entry_index = if (entry) |name| self.globalByName(name) else null; + } + + const gc_sections = self.base.options.gc_sections orelse false; + if (gc_sections) { + try gc.gcAtoms(self); + + // if (self.base.options.print_gc_sections) { + // try gc.dumpPrunedAtoms(self); + // } + } + + try self.addLinkerDefinedSymbols(); self.claimUnresolved(); // Scan and create missing synthetic entries such as GOT indirection. @@ -1310,19 +1329,6 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node state_log.debug("{}", .{self.dumpState()}); } - // Look for entry address in objects if not set by the incremental compiler. - if (self.entry_addr == null) { - const entry: ?[]const u8 = entry: { - if (self.base.options.entry) |entry| break :entry entry; - if (!self.isDynLib()) break :entry "_start"; - break :entry null; - }; - self.entry_addr = if (entry) |name| entry_addr: { - const global_index = self.globalByName(name) orelse break :entry_addr null; - break :entry_addr self.symbol(global_index).value; - } else null; - } - // Beyond this point, everything has been allocated a virtual address and we can resolve // the relocations, and commit objects to file. if (self.zig_module_index) |index| { @@ -1538,7 +1544,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node try self.writeAtoms(); try self.writeSyntheticSections(); - if (self.entry_addr == null and self.base.options.effectiveOutputMode() == .Exe) { + if (self.entry_index == null and self.base.options.effectiveOutputMode() == .Exe) { log.debug("flushing. no_entry_point_found = true", .{}); self.error_flags.no_entry_point_found = true; } else { @@ -2580,7 +2586,7 @@ fn writeHeader(self: *Elf) !void { mem.writeInt(u32, hdr_buf[index..][0..4], 1, endian); index += 4; - const e_entry = if (elf_type == .REL) 0 else self.entry_addr.?; + const e_entry = if (self.entry_index) |entry_index| self.symbol(entry_index).value else 0; // TODO const phdr_table_offset = if (self.phdr_table_index) |ind| @@ -3199,13 +3205,7 @@ pub fn updateDeclExports( } const stb_bits: u8 = switch (exp.opts.linkage) { .Internal => elf.STB_LOCAL, - .Strong => blk: { - const entry_name = self.base.options.entry orelse "_start"; - if (mem.eql(u8, exp_name, entry_name)) { - self.entry_addr = decl_sym.value; - } - break :blk elf.STB_GLOBAL; - }, + .Strong => elf.STB_GLOBAL, .Weak => elf.STB_WEAK, .LinkOnce => { try mod.failed_exports.ensureUnusedCapacity(mod.gpa, 1); @@ -4693,6 +4693,16 @@ fn calcNumIRelativeRelocs(self: *Elf) usize { return count; } +pub fn isCIdentifier(name: []const u8) bool { + if (name.len == 0) return false; + const first_c = name[0]; + if (!std.ascii.isAlphabetic(first_c) and first_c != '_') return false; + for (name[1..]) |c| { + if (!std.ascii.isAlphanumeric(c) and c != '_') return false; + } + return true; +} + pub fn atom(self: *Elf, atom_index: Atom.Index) ?*Atom { if (atom_index == 0) return null; assert(atom_index < self.atoms.items.len); @@ -5202,6 +5212,7 @@ const mem = std.mem; const codegen = @import("../codegen.zig"); const eh_frame = @import("Elf/eh_frame.zig"); +const gc = @import("Elf/gc.zig"); const glibc = @import("../glibc.zig"); const link = @import("../link.zig"); const lldMain = @import("../main.zig").lldMain; diff --git a/src/link/Elf/gc.zig b/src/link/Elf/gc.zig new file mode 100644 index 0000000000..26489d3d1a --- /dev/null +++ b/src/link/Elf/gc.zig @@ -0,0 +1,161 @@ +pub fn gcAtoms(elf_file: *Elf) !void { + var roots = std.ArrayList(*Atom).init(elf_file.base.allocator); + defer roots.deinit(); + try collectRoots(&roots, elf_file); + mark(roots, elf_file); + prune(elf_file); +} + +fn collectRoots(roots: *std.ArrayList(*Atom), elf_file: *Elf) !void { + if (elf_file.entry_index) |index| { + const global = elf_file.symbol(index); + try markSymbol(global, roots, elf_file); + } + + for (elf_file.objects.items) |index| { + for (elf_file.file(index).?.object.globals()) |global_index| { + const global = elf_file.symbol(global_index); + if (global.file(elf_file)) |file| { + if (file.index() == index and global.flags.@"export") + try markSymbol(global, roots, elf_file); + } + } + } + + for (elf_file.objects.items) |index| { + const object = elf_file.file(index).?.object; + + for (object.atoms.items) |atom_index| { + const atom = elf_file.atom(atom_index) orelse continue; + if (!atom.flags.alive) continue; + + const shdr = atom.inputShdr(elf_file); + const name = atom.name(elf_file); + const is_gc_root = blk: { + if (shdr.sh_flags & elf.SHF_GNU_RETAIN != 0) break :blk true; + if (shdr.sh_type == elf.SHT_NOTE) break :blk true; + if (shdr.sh_type == elf.SHT_PREINIT_ARRAY) break :blk true; + if (shdr.sh_type == elf.SHT_INIT_ARRAY) break :blk true; + if (shdr.sh_type == elf.SHT_FINI_ARRAY) break :blk true; + if (mem.startsWith(u8, name, ".ctors")) break :blk true; + if (mem.startsWith(u8, name, ".dtors")) break :blk true; + if (mem.startsWith(u8, name, ".init")) break :blk true; + if (mem.startsWith(u8, name, ".fini")) break :blk true; + if (Elf.isCIdentifier(name)) break :blk true; + break :blk false; + }; + if (is_gc_root and markAtom(atom)) try roots.append(atom); + if (shdr.sh_flags & elf.SHF_ALLOC == 0) atom.flags.visited = true; + } + + // Mark every atom referenced by CIE as alive. + for (object.cies.items) |cie| { + for (cie.relocs(elf_file)) |rel| { + const sym = elf_file.symbol(object.symbols.items[rel.r_sym()]); + try markSymbol(sym, roots, elf_file); + } + } + } +} + +fn markSymbol(sym: *Symbol, roots: *std.ArrayList(*Atom), elf_file: *Elf) !void { + const atom = sym.atom(elf_file) orelse return; + if (markAtom(atom)) try roots.append(atom); +} + +fn markAtom(atom: *Atom) bool { + const already_visited = atom.flags.visited; + atom.flags.visited = true; + return atom.flags.alive and !already_visited; +} + +fn markLive(atom: *Atom, elf_file: *Elf) void { + if (@import("build_options").enable_logging) track_live_level.incr(); + + assert(atom.flags.visited); + const object = atom.file(elf_file).?.object; + + for (atom.fdes(elf_file)) |fde| { + for (fde.relocs(elf_file)[1..]) |rel| { + const target_sym = elf_file.symbol(object.symbols.items[rel.r_sym()]); + const target_atom = target_sym.atom(elf_file) orelse continue; + target_atom.flags.alive = true; + gc_track_live_log.debug("{}marking live atom({d})", .{ track_live_level, target_atom.atom_index }); + if (markAtom(target_atom)) markLive(target_atom, elf_file); + } + } + + for (atom.relocs(elf_file)) |rel| { + const target_sym = elf_file.symbol(object.symbols.items[rel.r_sym()]); + const target_atom = target_sym.atom(elf_file) orelse continue; + target_atom.flags.alive = true; + gc_track_live_log.debug("{}marking live atom({d})", .{ track_live_level, target_atom.atom_index }); + if (markAtom(target_atom)) markLive(target_atom, elf_file); + } +} + +fn mark(roots: std.ArrayList(*Atom), elf_file: *Elf) void { + for (roots.items) |root| { + gc_track_live_log.debug("root atom({d})", .{root.atom_index}); + markLive(root, elf_file); + } +} + +fn prune(elf_file: *Elf) void { + for (elf_file.objects.items) |index| { + for (elf_file.file(index).?.object.atoms.items) |atom_index| { + const atom = elf_file.atom(atom_index) orelse continue; + if (atom.flags.alive and !atom.flags.visited) { + atom.flags.alive = false; + atom.markFdesDead(elf_file); + } + } + } +} + +pub fn dumpPrunedAtoms(elf_file: *Elf) !void { + const stderr = std.io.getStdErr().writer(); + for (elf_file.objects.items) |index| { + for (elf_file.file(index).?.object.atoms.items) |atom_index| { + const atom = elf_file.atom(atom_index) orelse continue; + if (!atom.flags.alive) + // TODO should we simply print to stderr? + try stderr.print("link: removing unused section '{s}' in file '{}'\n", .{ + atom.name(elf_file), + atom.file(elf_file).?.fmtPath(), + }); + } + } +} + +const Level = struct { + value: usize = 0, + + fn incr(self: *@This()) void { + self.value += 1; + } + + pub fn format( + self: *const @This(), + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + try writer.writeByteNTimes(' ', self.value); + } +}; + +var track_live_level: Level = .{}; + +const std = @import("std"); +const assert = std.debug.assert; +const elf = std.elf; +const gc_track_live_log = std.log.scoped(.gc_track_live); +const mem = std.mem; + +const Allocator = mem.Allocator; +const Atom = @import("Atom.zig"); +const Elf = @import("../Elf.zig"); +const Symbol = @import("Symbol.zig"); diff --git a/test/link/elf.zig b/test/link/elf.zig index 8b0db9deba..694792bdcf 100644 --- a/test/link/elf.zig +++ b/test/link/elf.zig @@ -60,6 +60,7 @@ fn testGcSections(b: *Build, opts: Options) *Step { \\} ); obj.link_function_sections = true; + obj.link_data_sections = true; obj.is_linking_libc = true; obj.is_linking_libcpp = true;