From baeff1762b90fc9a4cd4b1d6a7db6ba43fd35356 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 13 Apr 2022 13:50:35 +0200 Subject: [PATCH 1/5] stage2,x64: recursively mark decls as alive when lowering --- src/arch/x86_64/CodeGen.zig | 39 ++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index fb79097d54..71074edc2d 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -3903,17 +3903,17 @@ fn airDbgVar(self: *Self, inst: Air.Inst.Index) !void { const pl_op = self.air.instructions.items(.data)[inst].pl_op; const operand = pl_op.operand; const ty = self.air.typeOf(operand); + const mcv = try self.resolveInst(operand); - if (!self.liveness.operandDies(inst, 0)) { - const mcv = try self.resolveInst(operand); - const name = self.air.nullTerminatedString(pl_op.payload); + log.debug("airDbgVar: %{d}: {}, {}", .{ inst, ty.fmtDebug(), mcv }); - const tag = self.air.instructions.items(.tag)[inst]; - switch (tag) { - .dbg_var_ptr => try self.genVarDbgInfo(ty.childType(), mcv, name), - .dbg_var_val => try self.genVarDbgInfo(ty, mcv, name), - else => unreachable, - } + const name = self.air.nullTerminatedString(pl_op.payload); + + const tag = self.air.instructions.items(.tag)[inst]; + switch (tag) { + .dbg_var_ptr => try self.genVarDbgInfo(ty.childType(), mcv, name), + .dbg_var_val => try self.genVarDbgInfo(ty, mcv, name), + else => unreachable, } return self.finishAir(inst, .dead, .{ operand, .none, .none }); @@ -6089,6 +6089,7 @@ fn limitImmediateType(self: *Self, operand: Air.Inst.Ref, comptime T: type) !MCV } fn lowerDeclRef(self: *Self, tv: TypedValue, decl: *Module.Decl) InnerError!MCValue { + log.debug("lowerDeclRef: ty = {}, val = {}", .{ tv.ty.fmtDebug(), tv.val.fmtDebug() }); const ptr_bits = self.target.cpu.arch.ptrBitWidth(); const ptr_bytes: u64 = @divExact(ptr_bits, 8); @@ -6100,7 +6101,8 @@ fn lowerDeclRef(self: *Self, tv: TypedValue, decl: *Module.Decl) InnerError!MCVa } } - decl.alive = true; + decl.markAlive(); + if (self.bin_file.cast(link.File.Elf)) |elf_file| { const got = &elf_file.program_headers.items[elf_file.phdr_got_index.?]; const got_addr = got.p_vaddr + decl.link.elf.offset_table_index * ptr_bytes; @@ -6120,8 +6122,6 @@ fn lowerDeclRef(self: *Self, tv: TypedValue, decl: *Module.Decl) InnerError!MCVa } else { return self.fail("TODO codegen non-ELF const Decl pointer", .{}); } - - _ = tv; } fn lowerUnnamedConst(self: *Self, tv: TypedValue) InnerError!MCValue { @@ -6144,6 +6144,7 @@ fn lowerUnnamedConst(self: *Self, tv: TypedValue) InnerError!MCValue { } fn genTypedValue(self: *Self, typed_value: TypedValue) InnerError!MCValue { + log.debug("genTypedValue: ty = {}, val = {}", .{ typed_value.ty.fmtDebug(), typed_value.val.fmtDebug() }); if (typed_value.val.isUndef()) return MCValue{ .undef = {} }; const ptr_bits = self.target.cpu.arch.ptrBitWidth(); @@ -6181,8 +6182,6 @@ fn genTypedValue(self: *Self, typed_value: TypedValue) InnerError!MCValue { .Bool => { return MCValue{ .immediate = @boolToInt(typed_value.val.toBool()) }; }, - .ComptimeInt => unreachable, // semantic analysis prevents this - .ComptimeFloat => unreachable, // semantic analysis prevents this .Optional => { if (typed_value.ty.isPtrLikeOptional()) { if (typed_value.val.isNull()) @@ -6243,6 +6242,18 @@ fn genTypedValue(self: *Self, typed_value: TypedValue) InnerError!MCValue { } } }, + + .ComptimeInt => unreachable, + .ComptimeFloat => unreachable, + .Type => unreachable, + .EnumLiteral => unreachable, + .Void => unreachable, + .NoReturn => unreachable, + .Undefined => unreachable, + .Null => unreachable, + .BoundFn => unreachable, + .Opaque => unreachable, + else => {}, } From 4c50a27d682d3241ec73c7130d69d1d2f2553b86 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 13 Apr 2022 14:31:04 +0200 Subject: [PATCH 2/5] stage2,x64: generate debug info for local vars at hardcoded mem addr --- src/arch/x86_64/CodeGen.zig | 66 +++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 71074edc2d..a06f7941b4 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -3926,31 +3926,21 @@ fn genVarDbgInfo( name: [:0]const u8, ) !void { const name_with_null = name.ptr[0 .. name.len + 1]; - switch (mcv) { - .register => |reg| { - switch (self.debug_output) { - .dwarf => |dw| { - const dbg_info = &dw.dbg_info; - try dbg_info.ensureUnusedCapacity(3); - dbg_info.appendAssumeCapacity(@enumToInt(link.File.Dwarf.AbbrevKind.variable)); + switch (self.debug_output) { + .dwarf => |dw| { + const dbg_info = &dw.dbg_info; + try dbg_info.append(@enumToInt(link.File.Dwarf.AbbrevKind.variable)); + + switch (mcv) { + .register => |reg| { + try dbg_info.ensureUnusedCapacity(2); dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc 1, // ULEB128 dwarf expression length reg.dwarfLocOp(), }); - try dbg_info.ensureUnusedCapacity(5 + name_with_null.len); - try self.addDbgInfoTypeReloc(ty); // DW.AT.type, DW.FORM.ref4 - dbg_info.appendSliceAssumeCapacity(name_with_null); // DW.AT.name, DW.FORM.string }, - .plan9 => {}, - .none => {}, - } - }, - .ptr_stack_offset, .stack_offset => |off| { - switch (self.debug_output) { - .dwarf => |dw| { - const dbg_info = &dw.dbg_info; - try dbg_info.ensureUnusedCapacity(8); - dbg_info.appendAssumeCapacity(@enumToInt(link.File.Dwarf.AbbrevKind.variable)); + .ptr_stack_offset, .stack_offset => |off| { + try dbg_info.ensureUnusedCapacity(7); const fixup = dbg_info.items.len; dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc 1, // we will backpatch it after we encode the displacement in LEB128 @@ -3958,18 +3948,36 @@ fn genVarDbgInfo( }); leb128.writeILEB128(dbg_info.writer(), -off) catch unreachable; dbg_info.items[fixup] += @intCast(u8, dbg_info.items.len - fixup - 2); - try dbg_info.ensureUnusedCapacity(5 + name_with_null.len); - try self.addDbgInfoTypeReloc(ty); // DW.AT.type, DW.FORM.ref4 - dbg_info.appendSliceAssumeCapacity(name_with_null); // DW.AT.name, DW.FORM.string - }, - .plan9 => {}, - .none => {}, + .memory => |addr| { + const endian = self.target.cpu.arch.endian(); + const ptr_width = @intCast(u8, @divExact(self.target.cpu.arch.ptrBitWidth(), 8)); + try dbg_info.ensureUnusedCapacity(2 + ptr_width); + dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc + 1 + ptr_width, + DW.OP.addr, // literal address + }); + switch (ptr_width) { + 0...4 => { + try dbg_info.writer().writeInt(u32, @intCast(u32, addr), endian); + }, + 5...8 => { + try dbg_info.writer().writeInt(u64, addr, endian); + }, + else => unreachable, + } + }, + else => { + log.debug("TODO generate debug info for {}", .{mcv}); + }, } + + try dbg_info.ensureUnusedCapacity(5 + name_with_null.len); + try self.addDbgInfoTypeReloc(ty); // DW.AT.type, DW.FORM.ref4 + dbg_info.appendSliceAssumeCapacity(name_with_null); // DW.AT.name, DW.FORM.string }, - else => { - log.debug("TODO generate debug info for {}", .{mcv}); - }, + .plan9 => {}, + .none => {}, } } From 0bf72833307bf500ebec1a08641600f12cda0434 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 13 Apr 2022 16:22:24 +0200 Subject: [PATCH 3/5] dwarf: gen debug info for arrays --- src/codegen.zig | 2 -- src/link/Dwarf.zig | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/src/codegen.zig b/src/codegen.zig index 2ace45c8cb..68e1f3697f 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -203,7 +203,6 @@ pub fn generateSymbol( }, .Array => switch (typed_value.val.tag()) { .bytes => { - // TODO populate .debug_info for the array const payload = typed_value.val.castTag(.bytes).?; const len = @intCast(usize, typed_value.ty.arrayLenIncludingSentinel()); // The bytes payload already includes the sentinel, if any @@ -212,7 +211,6 @@ pub fn generateSymbol( return Result{ .appended = {} }; }, .aggregate => { - // TODO populate .debug_info for the array const elem_vals = typed_value.val.castTag(.aggregate).?.data; const elem_ty = typed_value.ty.elemType(); const len = @intCast(usize, typed_value.ty.arrayLenIncludingSentinel()); diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig index fc392bfe3e..bd7f429177 100644 --- a/src/link/Dwarf.zig +++ b/src/link/Dwarf.zig @@ -270,6 +270,27 @@ pub const DeclState = struct { try self.addTypeReloc(atom, ty.childType(), @intCast(u32, index), null); } }, + .Array => { + // DW.AT.array_type + try dbg_info_buffer.append(@enumToInt(AbbrevKind.array_type)); + // DW.AT.name, DW.FORM.string + try dbg_info_buffer.writer().print("{}\x00", .{ty.fmt(target)}); + // DW.AT.type, DW.FORM.ref4 + var index = dbg_info_buffer.items.len; + try dbg_info_buffer.resize(index + 4); + try self.addTypeReloc(atom, ty.childType(), @intCast(u32, index), null); + // DW.AT.subrange_type + try dbg_info_buffer.append(@enumToInt(AbbrevKind.array_dim)); + // DW.AT.type, DW.FORM.ref4 + index = dbg_info_buffer.items.len; + try dbg_info_buffer.resize(index + 4); + try self.addTypeReloc(atom, Type.usize, @intCast(u32, index), null); + // DW.AT.count, DW.FORM.udata + const len = ty.arrayLenIncludingSentinel(); + try leb128.writeULEB128(dbg_info_buffer.writer(), len); + // DW.AT.array_type delimit children + try dbg_info_buffer.append(0); + }, .Struct => blk: { // DW.AT.structure_type try dbg_info_buffer.append(@enumToInt(AbbrevKind.struct_type)); @@ -564,6 +585,8 @@ pub const AbbrevKind = enum(u8) { pad1, parameter, variable, + array_type, + array_dim, }; /// The reloc offset for the virtual address of a function in its Line Number Program. @@ -1357,6 +1380,18 @@ pub fn writeDbgAbbrev(self: *Dwarf, file: *File) !void { DW.AT.name, DW.FORM.string, 0, 0, // table sentinel + @enumToInt(AbbrevKind.array_type), + DW.TAG.array_type, DW.CHILDREN.yes, // header + DW.AT.name, DW.FORM.string, + DW.AT.type, DW.FORM.ref4, + 0, + 0, // table sentinel + @enumToInt(AbbrevKind.array_dim), + DW.TAG.subrange_type, DW.CHILDREN.no, // header + DW.AT.type, DW.FORM.ref4, + DW.AT.count, DW.FORM.udata, + 0, + 0, // table sentinel 0, 0, 0, // section sentinel From 3f912430bdddede8c3f6a9555b76499aa2dabb7e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 13 Apr 2022 16:24:56 +0200 Subject: [PATCH 4/5] stage2,x64: deref memory if referenced via GOT for local vars --- src/arch/x86_64/CodeGen.zig | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index a06f7941b4..32065fa1bb 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -3911,8 +3911,8 @@ fn airDbgVar(self: *Self, inst: Air.Inst.Index) !void { const tag = self.air.instructions.items(.tag)[inst]; switch (tag) { - .dbg_var_ptr => try self.genVarDbgInfo(ty.childType(), mcv, name), - .dbg_var_val => try self.genVarDbgInfo(ty, mcv, name), + .dbg_var_ptr => try self.genVarDbgInfo(tag, ty.childType(), mcv, name), + .dbg_var_val => try self.genVarDbgInfo(tag, ty, mcv, name), else => unreachable, } @@ -3921,6 +3921,7 @@ fn airDbgVar(self: *Self, inst: Air.Inst.Index) !void { fn genVarDbgInfo( self: *Self, + tag: Air.Inst.Tag, ty: Type, mcv: MCValue, name: [:0]const u8, @@ -3952,9 +3953,14 @@ fn genVarDbgInfo( .memory => |addr| { const endian = self.target.cpu.arch.endian(); const ptr_width = @intCast(u8, @divExact(self.target.cpu.arch.ptrBitWidth(), 8)); + const is_ptr = switch (tag) { + .dbg_var_ptr => true, + .dbg_var_val => false, + else => unreachable, + }; try dbg_info.ensureUnusedCapacity(2 + ptr_width); dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc - 1 + ptr_width, + 1 + ptr_width + @boolToInt(is_ptr), DW.OP.addr, // literal address }); switch (ptr_width) { @@ -3966,6 +3972,10 @@ fn genVarDbgInfo( }, else => unreachable, } + if (is_ptr) { + // We need deref the address as we point to the value via GOT entry. + try dbg_info.append(DW.OP.deref); + } }, else => { log.debug("TODO generate debug info for {}", .{mcv}); From edb428fae42ea82c49347fce6d48d80f1fed6ef1 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 13 Apr 2022 19:05:19 +0200 Subject: [PATCH 5/5] macho,x64: resolve debug info relocs for RIP-based addressing Sometimes we will want to generate debug info for a constant that has been lowered to memory and not copied anywhere else. For this we will need to defer resolution on PIE platforms until all locals (including GOT entries) have been allocated. --- src/arch/x86_64/CodeGen.zig | 11 ++++++- src/link/Dwarf.zig | 43 +++++++++++++++++++++++++++ src/link/MachO.zig | 8 +++++ src/link/MachO/DebugSymbols.zig | 52 ++++++++++++++++++++++++++++++++- 4 files changed, 112 insertions(+), 2 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 32065fa1bb..53a6bfc4d9 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -3950,7 +3950,7 @@ fn genVarDbgInfo( leb128.writeILEB128(dbg_info.writer(), -off) catch unreachable; dbg_info.items[fixup] += @intCast(u8, dbg_info.items.len - fixup - 2); }, - .memory => |addr| { + .memory, .got_load, .direct_load => { const endian = self.target.cpu.arch.endian(); const ptr_width = @intCast(u8, @divExact(self.target.cpu.arch.ptrBitWidth(), 8)); const is_ptr = switch (tag) { @@ -3963,6 +3963,11 @@ fn genVarDbgInfo( 1 + ptr_width + @boolToInt(is_ptr), DW.OP.addr, // literal address }); + const offset = @intCast(u32, dbg_info.items.len); + const addr = switch (mcv) { + .memory => |addr| addr, + else => 0, + }; switch (ptr_width) { 0...4 => { try dbg_info.writer().writeInt(u32, @intCast(u32, addr), endian); @@ -3976,6 +3981,10 @@ fn genVarDbgInfo( // We need deref the address as we point to the value via GOT entry. try dbg_info.append(DW.OP.deref); } + switch (mcv) { + .got_load, .direct_load => |index| try dw.addExprlocReloc(index, offset, is_ptr), + else => {}, + } }, else => { log.debug("TODO generate debug info for {}", .{mcv}); diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig index bd7f429177..248521c544 100644 --- a/src/link/Dwarf.zig +++ b/src/link/Dwarf.zig @@ -79,6 +79,7 @@ pub const DeclState = struct { std.hash_map.default_max_load_percentage, ) = .{}, abbrev_relocs: std.ArrayListUnmanaged(AbbrevRelocation) = .{}, + exprloc_relocs: std.ArrayListUnmanaged(ExprlocRelocation) = .{}, fn init(gpa: Allocator, target: std.Target) DeclState { return .{ @@ -97,6 +98,16 @@ pub const DeclState = struct { self.abbrev_table.deinit(self.gpa); self.abbrev_resolver.deinit(self.gpa); self.abbrev_relocs.deinit(self.gpa); + self.exprloc_relocs.deinit(self.gpa); + } + + pub fn addExprlocReloc(self: *DeclState, target: u32, offset: u32, is_ptr: bool) !void { + log.debug("{x}: target sym @{d}, via GOT {}", .{ offset, target, is_ptr }); + try self.exprloc_relocs.append(self.gpa, .{ + .@"type" = if (is_ptr) .got_load else .direct_load, + .target = target, + .offset = offset, + }); } pub fn addTypeReloc( @@ -549,6 +560,18 @@ pub const AbbrevRelocation = struct { addend: u32, }; +pub const ExprlocRelocation = struct { + /// Type of the relocation: direct load ref, or GOT load ref (via GOT table) + @"type": enum { + direct_load, + got_load, + }, + /// Index of the target in the linker's locals symbol table. + target: u32, + /// Offset within the debug info buffer where to patch up the address value. + offset: u32, +}; + pub const SrcFn = struct { /// Offset from the beginning of the Debug Line Program header that contains this function. off: u32, @@ -1009,6 +1032,26 @@ pub fn commitDeclState( } } + while (decl_state.exprloc_relocs.popOrNull()) |reloc| { + switch (self.tag) { + .macho => { + const macho_file = file.cast(File.MachO).?; + const d_sym = &macho_file.d_sym.?; + try d_sym.relocs.append(d_sym.base.base.allocator, .{ + .@"type" = switch (reloc.@"type") { + .direct_load => .direct_load, + .got_load => .got_load, + }, + .target = reloc.target, + .offset = reloc.offset + atom.off, + .addend = 0, + .prev_vaddr = 0, + }); + }, + else => unreachable, + } + } + try self.writeDeclDebugInfo(file, atom, dbg_info_buffer.items); } diff --git a/src/link/MachO.zig b/src/link/MachO.zig index b193068361..d359a3fd5d 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -3472,6 +3472,9 @@ pub fn closeFiles(self: MachO) void { for (self.dylibs.items) |dylib| { dylib.file.close(); } + if (self.d_sym) |ds| { + ds.file.close(); + } } fn freeAtom(self: *MachO, atom: *Atom, match: MatchingSection, owns_atom: bool) void { @@ -4274,6 +4277,11 @@ pub fn freeDecl(self: *MachO, decl: *Module.Decl) void { self.got_entries_free_list.append(self.base.allocator, @intCast(u32, got_index)) catch {}; self.got_entries.items[got_index] = .{ .target = .{ .local = 0 }, .atom = undefined }; _ = self.got_entries_table.swapRemove(.{ .local = decl.link.macho.local_sym_index }); + + if (self.d_sym) |*d_sym| { + d_sym.swapRemoveRelocs(decl.link.macho.local_sym_index); + } + log.debug(" adding GOT index {d} to free list (target local@{d})", .{ got_index, decl.link.macho.local_sym_index, diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 885f0ca6a8..aa7a29fcd1 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -59,6 +59,19 @@ debug_aranges_section_dirty: bool = false, debug_info_header_dirty: bool = false, debug_line_header_dirty: bool = false, +relocs: std.ArrayListUnmanaged(Reloc) = .{}, + +pub const Reloc = struct { + @"type": enum { + direct_load, + got_load, + }, + target: u32, + offset: u64, + addend: u32, + prev_vaddr: u64, +}; + /// You must call this function *after* `MachO.populateMissingMetadata()` /// has been called to get a viable debug symbols output. pub fn populateMissingMetadata(self: *DebugSymbols, allocator: Allocator) !void { @@ -254,6 +267,30 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti // Zig source code. const module = options.module orelse return error.LinkingWithoutZigSourceUnimplemented; + for (self.relocs.items) |*reloc| { + const sym = switch (reloc.@"type") { + .direct_load => self.base.locals.items[reloc.target], + .got_load => blk: { + const got_index = self.base.got_entries_table.get(.{ .local = reloc.target }).?; + const got_entry = self.base.got_entries.items[got_index]; + break :blk self.base.locals.items[got_entry.atom.local_sym_index]; + }, + }; + if (sym.n_value == reloc.prev_vaddr) continue; + + const seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; + const sect = &seg.sections.items[self.debug_info_section_index.?]; + const file_offset = sect.offset + reloc.offset; + log.debug("resolving relocation: {d}@{x} ('{s}') at offset {x}", .{ + reloc.target, + sym.n_value, + self.base.getString(sym.n_strx), + file_offset, + }); + try self.file.pwriteAll(mem.asBytes(&sym.n_value), file_offset); + reloc.prev_vaddr = sym.n_value; + } + if (self.debug_abbrev_section_dirty) { try self.dwarf.writeDbgAbbrev(&self.base.base); self.load_commands_dirty = true; @@ -330,7 +367,20 @@ pub fn deinit(self: *DebugSymbols, allocator: Allocator) void { } self.load_commands.deinit(allocator); self.dwarf.deinit(); - self.file.close(); + self.relocs.deinit(allocator); +} + +pub fn swapRemoveRelocs(self: *DebugSymbols, target: u32) void { + // TODO re-implement using a hashmap with free lists + var last_index: usize = 0; + while (last_index < self.relocs.items.len) { + const reloc = self.relocs.items[last_index]; + if (reloc.target == target) { + _ = self.relocs.swapRemove(last_index); + } else { + last_index += 1; + } + } } fn copySegmentCommand(