From 7a9eba2f8597a94e4a6def62253e9bf5220a46af Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Mon, 11 Sep 2023 10:52:30 +0200 Subject: [PATCH] elf: emit relocation to an extern function --- src/arch/x86_64/CodeGen.zig | 22 ++++- src/arch/x86_64/Emit.zig | 10 +- src/link.zig | 2 +- src/link/Elf.zig | 33 ++++++- src/link/Elf/Atom.zig | 190 ++++++++++++++++++++++++++++++++---- src/link/Elf/Relocation.zig | 8 -- src/link/Elf/ZigModule.zig | 29 +++++- 7 files changed, 251 insertions(+), 43 deletions(-) delete mode 100644 src/link/Elf/Relocation.zig diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 07bc4c9200..bd63231890 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -125,7 +125,9 @@ const Owner = union(enum) { .func_index => |func_index| { const mod = ctx.bin_file.options.module.?; const decl_index = mod.funcOwnerDeclIndex(func_index); - if (ctx.bin_file.cast(link.File.MachO)) |macho_file| { + if (ctx.bin_file.cast(link.File.Elf)) |elf_file| { + return elf_file.getOrCreateMetadataForDecl(decl_index); + } else if (ctx.bin_file.cast(link.File.MachO)) |macho_file| { const atom = try macho_file.getOrCreateAtomForDecl(decl_index); return macho_file.getAtom(atom).getSymbolIndex().?; } else if (ctx.bin_file.cast(link.File.Coff)) |coff_file| { @@ -136,7 +138,10 @@ const Owner = union(enum) { } else unreachable; }, .lazy_sym => |lazy_sym| { - if (ctx.bin_file.cast(link.File.MachO)) |macho_file| { + if (ctx.bin_file.cast(link.File.Elf)) |elf_file| { + return elf_file.getOrCreateMetadataForLazySymbol(lazy_sym) catch |err| + ctx.fail("{s} creating lazy symbol", .{@errorName(err)}); + } else if (ctx.bin_file.cast(link.File.MachO)) |macho_file| { const atom = macho_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| return ctx.fail("{s} creating lazy symbol", .{@errorName(err)}); return macho_file.getAtom(atom).getSymbolIndex().?; @@ -8178,7 +8183,18 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier } else if (func_value.getExternFunc(mod)) |extern_func| { const decl_name = mod.intern_pool.stringToSlice(mod.declPtr(extern_func.decl).name); const lib_name = mod.intern_pool.stringToSliceUnwrap(extern_func.lib_name); - if (self.bin_file.cast(link.File.Coff)) |coff_file| { + if (self.bin_file.cast(link.File.Elf)) |elf_file| { + const atom_index = try self.owner.getSymbolIndex(self); + const sym_index = try elf_file.getGlobalSymbol(decl_name, lib_name); + _ = try self.addInst(.{ + .tag = .call, + .ops = .extern_fn_reloc, + .data = .{ .reloc = .{ + .atom_index = atom_index, + .sym_index = sym_index, + } }, + }); + } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { const atom_index = try self.owner.getSymbolIndex(self); const sym_index = try coff_file.getGlobalSymbol(decl_name, lib_name); _ = try self.addInst(.{ diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index 050772b2ce..51820740dc 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -41,7 +41,15 @@ pub fn emitMir(emit: *Emit) Error!void { .offset = end_offset - 4, .length = @as(u5, @intCast(end_offset - start_offset)), }), - .linker_extern_fn => |symbol| if (emit.bin_file.cast(link.File.MachO)) |macho_file| { + .linker_extern_fn => |symbol| if (emit.bin_file.cast(link.File.Elf)) |elf_file| { + // Add relocation to the decl. + const atom_ptr = elf_file.symbol(symbol.atom_index).atom(elf_file).?; + try atom_ptr.addReloc(elf_file, .{ + .r_offset = end_offset, + .r_info = (@as(u64, @intCast(symbol.sym_index)) << 32) | std.elf.R_X86_64_PLT32, + .r_addend = -4, + }); + } else if (emit.bin_file.cast(link.File.MachO)) |macho_file| { // Add relocation to the decl. const atom_index = macho_file.getAtomIndexForSymbol(.{ .sym_index = symbol.atom_index }).?; const target = macho_file.getGlobalByIndex(symbol.sym_index); diff --git a/src/link.zig b/src/link.zig index 634f9679c4..c7609eabb7 100644 --- a/src/link.zig +++ b/src/link.zig @@ -549,7 +549,7 @@ pub const File = struct { switch (base.tag) { // zig fmt: off .coff => return @fieldParentPtr(Coff, "base", base).getGlobalSymbol(name, lib_name), - .elf => unreachable, + .elf => return @fieldParentPtr(Elf, "base", base).getGlobalSymbol(name, lib_name), .macho => return @fieldParentPtr(MachO, "base", base).getGlobalSymbol(name, lib_name), .plan9 => unreachable, .spirv => unreachable, diff --git a/src/link/Elf.zig b/src/link/Elf.zig index e9b804801f..a71f70a1a1 100644 --- a/src/link/Elf.zig +++ b/src/link/Elf.zig @@ -302,9 +302,9 @@ pub fn getDeclVAddr(self: *Elf, decl_index: Module.Decl.Index, reloc_info: link. const vaddr = this_sym.value; const parent_atom = self.symbol(reloc_info.parent_atom_index).atom(self).?; try parent_atom.addReloc(self, .{ - .target = this_sym_index, - .offset = reloc_info.offset, - .addend = reloc_info.addend, + .r_offset = reloc_info.offset, + .r_info = (@as(u64, @intCast(this_sym_index)) << 32) | elf.R_X86_64_64, + .r_addend = reloc_info.addend, }); return vaddr; @@ -1020,8 +1020,17 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node // Beyond this point, everything has been allocated a virtual address and we can resolve // the relocations. if (self.zig_module_index) |index| { - for (self.file(index).?.zig_module.atoms.items) |atom_index| { - try self.atom(atom_index).?.resolveRelocs(self); + for (self.file(index).?.zig_module.atoms.keys()) |atom_index| { + const atom_ptr = self.atom(atom_index).?; + if (!atom_ptr.alive) continue; + const shdr = &self.shdrs.items[atom_ptr.output_section_index]; + const file_offset = shdr.sh_offset + atom_ptr.value - shdr.sh_addr; + const code = try gpa.alloc(u8, atom_ptr.size); + defer gpa.free(code); + const amt = try self.base.file.?.preadAll(code, file_offset); + if (amt != code.len) return error.InputOutput; + try atom_ptr.resolveRelocs(self, code); + try self.base.file.?.pwriteAll(code, file_offset); } } @@ -2185,6 +2194,7 @@ fn updateDeclCode( const shdr_index = sym.output_section_index; sym.name_offset = try self.strtab.insert(gpa, decl_name); + atom_ptr.alive = true; atom_ptr.name_offset = sym.name_offset; esym.st_name = sym.name_offset; esym.st_info |= stt_bits; @@ -2440,6 +2450,7 @@ fn updateLazySymbol(self: *Elf, sym: link.File.LazySymbol, symbol_index: Symbol. local_esym.st_info |= elf.STT_OBJECT; local_esym.st_size = code.len; const atom_ptr = local_sym.atom(self).?; + atom_ptr.alive = true; atom_ptr.name_offset = name_str_index; atom_ptr.alignment = math.log2_int(u64, required_alignment); atom_ptr.size = code.len; @@ -2515,6 +2526,7 @@ pub fn lowerUnnamedConst(self: *Elf, typed_value: TypedValue, decl_index: Module local_esym.st_info |= elf.STT_OBJECT; local_esym.st_size = code.len; const atom_ptr = local_sym.atom(self).?; + atom_ptr.alive = true; atom_ptr.name_offset = name_str_index; atom_ptr.alignment = math.log2_int(u64, required_alignment); atom_ptr.size = code.len; @@ -3374,6 +3386,17 @@ pub fn globalByName(self: *Elf, name: []const u8) ?Symbol.Index { return self.resolver.get(name_off); } +pub fn getGlobalSymbol(self: *Elf, name: []const u8, lib_name: ?[]const u8) !u32 { + _ = lib_name; + const gpa = self.base.allocator; + const name_off = try self.strtab.insert(gpa, name); + const gop = try self.getOrPutGlobal(name_off); + if (!gop.found_existing) { + try self.unresolved.putNoClobber(gpa, name_off, {}); + } + return gop.index; +} + fn dumpState(self: *Elf) std.fmt.Formatter(fmtDumpState) { return .{ .data = self }; } diff --git a/src/link/Elf/Atom.zig b/src/link/Elf/Atom.zig index 69950f9f32..dacbb369df 100644 --- a/src/link/Elf/Atom.zig +++ b/src/link/Elf/Atom.zig @@ -26,7 +26,7 @@ relocs_section_index: Index = 0, atom_index: Index = 0, /// Specifies whether this atom is alive or has been garbage collected. -alive: bool = true, +alive: bool = false, /// Specifies if the atom has been visited during garbage collection. visited: bool = false, @@ -192,6 +192,7 @@ pub fn free(self: *Atom, elf_file: *Elf) void { log.debug("freeAtom {d} ({s})", .{ self.atom_index, self.name(elf_file) }); const gpa = elf_file.base.allocator; + const zig_module = elf_file.file(self.file_index).?.zig_module; const shndx = self.output_section_index; const meta = elf_file.last_atom_and_free_list_table.getPtr(shndx).?; const free_list = &meta.free_list; @@ -242,17 +243,18 @@ pub fn free(self: *Atom, elf_file: *Elf) void { // TODO create relocs free list self.freeRelocs(elf_file); + assert(zig_module.atoms.swapRemove(self.atom_index)); self.* = .{}; } -pub fn relocs(self: Atom, elf_file: *Elf) []const Relocation { +pub fn relocs(self: Atom, elf_file: *Elf) []const elf.Elf64_Rela { const file_ptr = elf_file.file(self.file_index).?; if (file_ptr != .zig_module) @panic("TODO"); const zig_module = file_ptr.zig_module; return zig_module.relocs.items[self.relocs_section_index].items; } -pub fn addReloc(self: Atom, elf_file: *Elf, reloc: Relocation) !void { +pub fn addReloc(self: Atom, elf_file: *Elf, reloc: elf.Elf64_Rela) !void { const gpa = elf_file.base.allocator; const file_ptr = elf_file.file(self.file_index).?; assert(file_ptr == .zig_module); @@ -269,31 +271,178 @@ pub fn freeRelocs(self: Atom, elf_file: *Elf) void { } /// TODO mark relocs dirty -pub fn resolveRelocs(self: Atom, elf_file: *Elf) !void { +pub fn resolveRelocs(self: Atom, elf_file: *Elf, code: []u8) !void { relocs_log.debug("0x{x}: {s}", .{ self.value, self.name(elf_file) }); - const shdr = &elf_file.shdrs.items[self.output_section_index]; - for (self.relocs(elf_file)) |reloc| { - const target_sym = elf_file.symbol(reloc.target); - const target_vaddr = target_sym.value + reloc.addend; - const section_offset = (self.value + reloc.offset) - shdr.sh_addr; - const file_offset = shdr.sh_offset + section_offset; - relocs_log.debug(" ({x}: [() => 0x{x}] ({s}))", .{ - reloc.offset, - target_vaddr, - target_sym.name(elf_file), + var stream = std.io.fixedBufferStream(code); + const cwriter = stream.writer(); + + for (self.relocs(elf_file)) |rel| { + const r_type = rel.r_type(); + if (r_type == elf.R_X86_64_NONE) continue; + + const target = elf_file.symbol(rel.r_sym()); + + // We will use equation format to resolve relocations: + // https://intezer.com/blog/malware-analysis/executable-and-linkable-format-101-part-3-relocations/ + // + // Address of the source atom. + const P = @as(i64, @intCast(self.value + rel.r_offset)); + // Addend from the relocation. + const A = rel.r_addend; + // Address of the target symbol - can be address of the symbol within an atom or address of PLT stub. + const S = @as(i64, @intCast(target.address(.{}, elf_file))); + // Address of the global offset table. + const GOT = blk: { + const shndx = if (elf_file.got_plt_section_index) |shndx| + shndx + else if (elf_file.got_section_index) |shndx| + shndx + else + null; + break :blk if (shndx) |index| @as(i64, @intCast(elf_file.shdrs.items[index].sh_addr)) else 0; + }; + // Relative offset to the start of the global offset table. + const G = @as(i64, @intCast(target.gotAddress(elf_file))) - GOT; + // // Address of the thread pointer. + // const TP = @as(i64, @intCast(elf_file.getTpAddress())); + // // Address of the dynamic thread pointer. + // const DTP = @as(i64, @intCast(elf_file.getDtpAddress())); + + relocs_log.debug(" {s}: {x}: [{x} => {x}] G({x}) ({s})", .{ + fmtRelocType(r_type), + rel.r_offset, + P, + S + A, + G + GOT + A, + target.name(elf_file), }); - switch (elf_file.ptr_width) { - .p32 => try elf_file.base.file.?.pwriteAll( - std.mem.asBytes(&@as(u32, @intCast(target_vaddr))), - file_offset, - ), - .p64 => try elf_file.base.file.?.pwriteAll(std.mem.asBytes(&target_vaddr), file_offset), + try stream.seekTo(rel.r_offset); + + switch (rel.r_type()) { + elf.R_X86_64_NONE => unreachable, + elf.R_X86_64_64 => try cwriter.writeIntLittle(i64, S + A), + elf.R_X86_64_PLT32 => try cwriter.writeIntLittle(i32, @as(i32, @intCast(S + A - P))), + else => @panic("TODO"), } } } +pub fn fmtRelocType(r_type: u32) std.fmt.Formatter(formatRelocType) { + return .{ .data = r_type }; +} + +fn formatRelocType( + r_type: u32, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + const str = switch (r_type) { + elf.R_X86_64_NONE => "R_X86_64_NONE", + elf.R_X86_64_64 => "R_X86_64_64", + elf.R_X86_64_PC32 => "R_X86_64_PC32", + elf.R_X86_64_GOT32 => "R_X86_64_GOT32", + elf.R_X86_64_PLT32 => "R_X86_64_PLT32", + elf.R_X86_64_COPY => "R_X86_64_COPY", + elf.R_X86_64_GLOB_DAT => "R_X86_64_GLOB_DAT", + elf.R_X86_64_JUMP_SLOT => "R_X86_64_JUMP_SLOT", + elf.R_X86_64_RELATIVE => "R_X86_64_RELATIVE", + elf.R_X86_64_GOTPCREL => "R_X86_64_GOTPCREL", + elf.R_X86_64_32 => "R_X86_64_32", + elf.R_X86_64_32S => "R_X86_64_32S", + elf.R_X86_64_16 => "R_X86_64_16", + elf.R_X86_64_PC16 => "R_X86_64_PC16", + elf.R_X86_64_8 => "R_X86_64_8", + elf.R_X86_64_PC8 => "R_X86_64_PC8", + elf.R_X86_64_DTPMOD64 => "R_X86_64_DTPMOD64", + elf.R_X86_64_DTPOFF64 => "R_X86_64_DTPOFF64", + elf.R_X86_64_TPOFF64 => "R_X86_64_TPOFF64", + elf.R_X86_64_TLSGD => "R_X86_64_TLSGD", + elf.R_X86_64_TLSLD => "R_X86_64_TLSLD", + elf.R_X86_64_DTPOFF32 => "R_X86_64_DTPOFF32", + elf.R_X86_64_GOTTPOFF => "R_X86_64_GOTTPOFF", + elf.R_X86_64_TPOFF32 => "R_X86_64_TPOFF32", + elf.R_X86_64_PC64 => "R_X86_64_PC64", + elf.R_X86_64_GOTOFF64 => "R_X86_64_GOTOFF64", + elf.R_X86_64_GOTPC32 => "R_X86_64_GOTPC32", + elf.R_X86_64_GOT64 => "R_X86_64_GOT64", + elf.R_X86_64_GOTPCREL64 => "R_X86_64_GOTPCREL64", + elf.R_X86_64_GOTPC64 => "R_X86_64_GOTPC64", + elf.R_X86_64_GOTPLT64 => "R_X86_64_GOTPLT64", + elf.R_X86_64_PLTOFF64 => "R_X86_64_PLTOFF64", + elf.R_X86_64_SIZE32 => "R_X86_64_SIZE32", + elf.R_X86_64_SIZE64 => "R_X86_64_SIZE64", + elf.R_X86_64_GOTPC32_TLSDESC => "R_X86_64_GOTPC32_TLSDESC", + elf.R_X86_64_TLSDESC_CALL => "R_X86_64_TLSDESC_CALL", + elf.R_X86_64_TLSDESC => "R_X86_64_TLSDESC", + elf.R_X86_64_IRELATIVE => "R_X86_64_IRELATIVE", + elf.R_X86_64_RELATIVE64 => "R_X86_64_RELATIVE64", + elf.R_X86_64_GOTPCRELX => "R_X86_64_GOTPCRELX", + elf.R_X86_64_REX_GOTPCRELX => "R_X86_64_REX_GOTPCRELX", + elf.R_X86_64_NUM => "R_X86_64_NUM", + else => "R_X86_64_UNKNOWN", + }; + try writer.print("{s}", .{str}); +} + +pub fn format( + atom: Atom, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = atom; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format symbols directly"); +} + +pub fn fmt(atom: Atom, elf_file: *Elf) std.fmt.Formatter(format2) { + return .{ .data = .{ + .atom = atom, + .elf_file = elf_file, + } }; +} + +const FormatContext = struct { + atom: Atom, + elf_file: *Elf, +}; + +fn format2( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = options; + _ = unused_fmt_string; + const atom = ctx.atom; + const elf_file = ctx.elf_file; + try writer.print("atom({d}) : {s} : @{x} : sect({d}) : align({x}) : size({x})", .{ + atom.atom_index, atom.name(elf_file), atom.value, + atom.output_section_index, atom.alignment, atom.size, + }); + // if (atom.fde_start != atom.fde_end) { + // try writer.writeAll(" : fdes{ "); + // for (atom.getFdes(elf_file), atom.fde_start..) |fde, i| { + // try writer.print("{d}", .{i}); + // if (!fde.alive) try writer.writeAll("([*])"); + // if (i < atom.fde_end - 1) try writer.writeAll(", "); + // } + // try writer.writeAll(" }"); + // } + const gc_sections = if (elf_file.base.options.gc_sections) |gc_sections| gc_sections else false; + if (gc_sections and !atom.alive) { + try writer.writeAll(" : [*]"); + } +} + pub const Index = u32; const std = @import("std"); @@ -306,4 +455,3 @@ const Allocator = std.mem.Allocator; const Atom = @This(); const Elf = @import("../Elf.zig"); const File = @import("file.zig").File; -const Relocation = @import("Relocation.zig"); diff --git a/src/link/Elf/Relocation.zig b/src/link/Elf/Relocation.zig deleted file mode 100644 index 719a211238..0000000000 --- a/src/link/Elf/Relocation.zig +++ /dev/null @@ -1,8 +0,0 @@ -target: Symbol.Index, -offset: u64, -addend: u32, - -const std = @import("std"); - -const Symbol = @import("Symbol.zig"); -const Relocation = @This(); diff --git a/src/link/Elf/ZigModule.zig b/src/link/Elf/ZigModule.zig index b7616edc81..760a71e14b 100644 --- a/src/link/Elf/ZigModule.zig +++ b/src/link/Elf/ZigModule.zig @@ -8,8 +8,8 @@ local_symbols: std.AutoArrayHashMapUnmanaged(Symbol.Index, void) = .{}, elf_global_symbols: std.ArrayListUnmanaged(elf.Elf64_Sym) = .{}, global_symbols: std.AutoArrayHashMapUnmanaged(Symbol.Index, void) = .{}, -atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, -relocs: std.ArrayListUnmanaged(std.ArrayListUnmanaged(Relocation)) = .{}, +atoms: std.AutoArrayHashMapUnmanaged(Atom.Index, void) = .{}, +relocs: std.ArrayListUnmanaged(std.ArrayListUnmanaged(elf.Elf64_Rela)) = .{}, alive: bool = true, @@ -43,7 +43,7 @@ pub fn createAtom(self: *ZigModule, output_section_index: u16, elf_file: *Elf) ! const relocs = try self.relocs.addOne(gpa); relocs.* = .{}; atom_ptr.relocs_section_index = relocs_index; - try self.atoms.append(gpa, atom_index); + try self.atoms.putNoClobber(gpa, atom_index, {}); return symbol_index; } @@ -184,6 +184,28 @@ fn formatSymtab( } } +pub fn fmtAtoms(self: *ZigModule, elf_file: *Elf) std.fmt.Formatter(formatAtoms) { + return .{ .data = .{ + .self = self, + .elf_file = elf_file, + } }; +} + +fn formatAtoms( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + try writer.writeAll(" atoms\n"); + for (ctx.self.atoms.keys()) |atom_index| { + const atom = ctx.elf_file.atom(atom_index) orelse continue; + try writer.print(" {}\n", .{atom.fmt(ctx.elf_file)}); + } +} + const assert = std.debug.assert; const std = @import("std"); const elf = std.elf; @@ -193,6 +215,5 @@ const Atom = @import("Atom.zig"); const Elf = @import("../Elf.zig"); const File = @import("file.zig").File; const Module = @import("../../Module.zig"); -const Relocation = @import("Relocation.zig"); const Symbol = @import("Symbol.zig"); const ZigModule = @This();