From 1fd708b1bc41878c33db7e2a50433ab1a66d41ee Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sat, 4 Jan 2025 18:13:25 -0800 Subject: [PATCH] wasm linker: implement data relocs --- src/link/Wasm.zig | 193 ++++++++++++++++++++++++++++++++++------ src/link/Wasm/Flush.zig | 40 +++++---- 2 files changed, 187 insertions(+), 46 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index a6d1ade61d..0ccf3ecb7a 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -242,7 +242,7 @@ function_imports: std.AutoArrayHashMapUnmanaged(String, FunctionImportId) = .emp data_imports: std.AutoArrayHashMapUnmanaged(String, DataImportId) = .empty, /// Set of data symbols that will appear in the final binary. Used to populate /// `Flush.data_segments` before sorting. -data_segments: std.AutoArrayHashMapUnmanaged(DataId, void) = .empty, +data_segments: std.AutoArrayHashMapUnmanaged(DataSegmentId, void) = .empty, /// Ordered list of non-import globals that will appear in the final binary. /// Empty until prelink. @@ -1523,27 +1523,120 @@ pub const ObjectDataImport = extern struct { source_location: SourceLocation, pub const Resolution = enum(u32) { + unresolved, __zig_error_names, __zig_error_name_table, __heap_base, __heap_end, - unresolved = std.math.maxInt(u32), + /// Next, an `ObjectData.Index`. + /// Next, index into `uavs_obj` or `uavs_exe` depending on whether emitting an object. + /// Next, index into `navs_obj` or `navs_exe` depending on whether emitting an object. _, - comptime { - assert(@intFromEnum(Resolution.__zig_error_names) == @intFromEnum(DataId.__zig_error_names)); - assert(@intFromEnum(Resolution.__zig_error_name_table) == @intFromEnum(DataId.__zig_error_name_table)); - assert(@intFromEnum(Resolution.__heap_base) == @intFromEnum(DataId.__heap_base)); - assert(@intFromEnum(Resolution.__heap_end) == @intFromEnum(DataId.__heap_end)); + const first_object = @intFromEnum(Resolution.__heap_end) + 1; + + pub const Unpacked = union(enum) { + unresolved, + __zig_error_names, + __zig_error_name_table, + __heap_base, + __heap_end, + object: ObjectData.Index, + uav_exe: UavsExeIndex, + uav_obj: UavsObjIndex, + nav_exe: NavsExeIndex, + nav_obj: NavsObjIndex, + }; + + pub fn unpack(r: Resolution, wasm: *const Wasm) Unpacked { + return switch (r) { + .unresolved => .unresolved, + .__zig_error_names => .__zig_error_names, + .__zig_error_name_table => .__zig_error_name_table, + .__heap_base => .__heap_base, + .__heap_end => .__heap_end, + _ => { + const object_index = @intFromEnum(r) - first_object; + + const uav_index = if (object_index < wasm.object_datas.items.len) + return .{ .object = @enumFromInt(object_index) } + else + object_index - wasm.object_datas.items.len; + + const comp = wasm.base.comp; + const is_obj = comp.config.output_mode == .Obj; + if (is_obj) { + const nav_index = if (uav_index < wasm.uavs_obj.entries.len) + return .{ .uav_obj = @enumFromInt(uav_index) } + else + uav_index - wasm.uavs_obj.entries.len; + + return .{ .nav_obj = @enumFromInt(nav_index) }; + } else { + const nav_index = if (uav_index < wasm.uavs_exe.entries.len) + return .{ .uav_exe = @enumFromInt(uav_index) } + else + uav_index - wasm.uavs_exe.entries.len; + + return .{ .nav_exe = @enumFromInt(nav_index) }; + } + }, + }; } - pub fn toDataId(r: Resolution) ?DataId { - if (r == .unresolved) return null; - return @enumFromInt(@intFromEnum(r)); + pub fn pack(wasm: *const Wasm, unpacked: Unpacked) Resolution { + return switch (unpacked) { + .unresolved => .unresolved, + .__zig_error_names => .__zig_error_names, + .__zig_error_name_table => .__zig_error_name_table, + .__heap_base => .__heap_base, + .__heap_end => .__heap_end, + .object => |i| @enumFromInt(first_object + @intFromEnum(i)), + inline .uav_exe, .uav_obj => |i| @enumFromInt(first_object + wasm.object_datas.items.len + @intFromEnum(i)), + .nav_exe => |i| @enumFromInt(first_object + wasm.object_datas.items.len + wasm.uavs_exe.entries.len + @intFromEnum(i)), + .nav_obj => |i| @enumFromInt(first_object + wasm.object_datas.items.len + wasm.uavs_obj.entries.len + @intFromEnum(i)), + }; } pub fn fromObjectDataIndex(wasm: *const Wasm, object_data_index: ObjectData.Index) Resolution { - return @enumFromInt(@intFromEnum(DataId.pack(wasm, .{ .object = object_data_index.ptr(wasm).segment }))); + return pack(wasm, .{ .object = object_data_index }); + } + + pub fn objectDataSegment(r: Resolution, wasm: *const Wasm) ?ObjectDataSegment.Index { + return switch (unpack(r, wasm)) { + .unresolved => unreachable, + .object => |i| i.ptr(wasm).segment, + .__zig_error_names, + .__zig_error_name_table, + .__heap_base, + .__heap_end, + .uav_exe, + .uav_obj, + .nav_exe, + .nav_obj, + => null, + }; + } + + pub fn dataLoc(r: Resolution, wasm: *const Wasm) DataLoc { + return switch (unpack(r, wasm)) { + .unresolved => unreachable, + .object => |i| { + const ptr = i.ptr(wasm); + return .{ + .segment = .fromObjectDataSegment(wasm, ptr.segment), + .offset = ptr.offset, + }; + }, + .__zig_error_names => .{ .segment = .__zig_error_names, .offset = 0 }, + .__zig_error_name_table => .{ .segment = .__zig_error_name_table, .offset = 0 }, + .__heap_base => .{ .segment = .__heap_base, .offset = 0 }, + .__heap_end => .{ .segment = .__heap_end, .offset = 0 }, + .uav_exe => @panic("TODO"), + .uav_obj => @panic("TODO"), + .nav_exe => @panic("TODO"), + .nav_obj => @panic("TODO"), + }; } }; @@ -1583,7 +1676,7 @@ pub const DataPayload = extern struct { }; /// A reference to a local or exported global const. -pub const DataId = enum(u32) { +pub const DataSegmentId = enum(u32) { __zig_error_names, __zig_error_name_table, /// This and `__heap_end` are better retrieved via a global, but there is @@ -1596,7 +1689,7 @@ pub const DataId = enum(u32) { /// Next, index into `navs_obj` or `navs_exe` depending on whether emitting an object. _, - const first_object = @intFromEnum(DataId.__heap_end) + 1; + const first_object = @intFromEnum(DataSegmentId.__heap_end) + 1; pub const Category = enum { /// Thread-local variables. @@ -1620,7 +1713,7 @@ pub const DataId = enum(u32) { nav_obj: NavsObjIndex, }; - pub fn pack(wasm: *const Wasm, unpacked: Unpacked) DataId { + pub fn pack(wasm: *const Wasm, unpacked: Unpacked) DataSegmentId { return switch (unpacked) { .__zig_error_names => .__zig_error_names, .__zig_error_name_table => .__zig_error_name_table, @@ -1633,7 +1726,7 @@ pub const DataId = enum(u32) { }; } - pub fn unpack(id: DataId, wasm: *const Wasm) Unpacked { + pub fn unpack(id: DataSegmentId, wasm: *const Wasm) Unpacked { return switch (id) { .__zig_error_names => .__zig_error_names, .__zig_error_name_table => .__zig_error_name_table, @@ -1668,11 +1761,21 @@ pub const DataId = enum(u32) { }; } - pub fn fromObjectDataSegment(wasm: *const Wasm, object_data_segment: ObjectDataSegment.Index) DataId { + pub fn fromNav(wasm: *const Wasm, nav_index: InternPool.Nav.Index) DataSegmentId { + const comp = wasm.base.comp; + const is_obj = comp.config.output_mode == .Obj; + return pack(wasm, if (is_obj) .{ + .nav_obj = @enumFromInt(wasm.navs_obj.getIndex(nav_index).?), + } else .{ + .nav_exe = @enumFromInt(wasm.navs_exe.getIndex(nav_index).?), + }); + } + + pub fn fromObjectDataSegment(wasm: *const Wasm, object_data_segment: ObjectDataSegment.Index) DataSegmentId { return pack(wasm, .{ .object = object_data_segment }); } - pub fn category(id: DataId, wasm: *const Wasm) Category { + pub fn category(id: DataSegmentId, wasm: *const Wasm) Category { return switch (unpack(id, wasm)) { .__zig_error_names, .__zig_error_name_table, .__heap_base, .__heap_end => .data, .object => |i| { @@ -1693,7 +1796,7 @@ pub const DataId = enum(u32) { }; } - pub fn isTls(id: DataId, wasm: *const Wasm) bool { + pub fn isTls(id: DataSegmentId, wasm: *const Wasm) bool { return switch (unpack(id, wasm)) { .__zig_error_names, .__zig_error_name_table, .__heap_base, .__heap_end => false, .object => |i| i.ptr(wasm).flags.tls, @@ -1707,11 +1810,11 @@ pub const DataId = enum(u32) { }; } - pub fn isBss(id: DataId, wasm: *const Wasm) bool { + pub fn isBss(id: DataSegmentId, wasm: *const Wasm) bool { return id.category(wasm) == .zero; } - pub fn name(id: DataId, wasm: *const Wasm) []const u8 { + pub fn name(id: DataSegmentId, wasm: *const Wasm) []const u8 { return switch (unpack(id, wasm)) { .__zig_error_names, .__zig_error_name_table, .uav_exe, .uav_obj, .__heap_base, .__heap_end => ".data", .object => |i| i.ptr(wasm).name.unwrap().?.slice(wasm), @@ -1724,7 +1827,7 @@ pub const DataId = enum(u32) { }; } - pub fn alignment(id: DataId, wasm: *const Wasm) Alignment { + pub fn alignment(id: DataSegmentId, wasm: *const Wasm) Alignment { return switch (unpack(id, wasm)) { .__zig_error_names => .@"1", .__zig_error_name_table, .__heap_base, .__heap_end => wasm.pointerAlignment(), @@ -1752,7 +1855,7 @@ pub const DataId = enum(u32) { }; } - pub fn refCount(id: DataId, wasm: *const Wasm) u32 { + pub fn refCount(id: DataSegmentId, wasm: *const Wasm) u32 { return switch (unpack(id, wasm)) { .__zig_error_names => @intCast(wasm.error_name_offs.items.len), .__zig_error_name_table => wasm.error_name_table_ref_count, @@ -1761,7 +1864,7 @@ pub const DataId = enum(u32) { }; } - pub fn isPassive(id: DataId, wasm: *const Wasm) bool { + pub fn isPassive(id: DataSegmentId, wasm: *const Wasm) bool { const comp = wasm.base.comp; if (comp.config.import_memory and !id.isBss(wasm)) return true; return switch (unpack(id, wasm)) { @@ -1771,7 +1874,7 @@ pub const DataId = enum(u32) { }; } - pub fn isEmpty(id: DataId, wasm: *const Wasm) bool { + pub fn isEmpty(id: DataSegmentId, wasm: *const Wasm) bool { return switch (unpack(id, wasm)) { .__zig_error_names, .__zig_error_name_table, .__heap_base, .__heap_end => false, .object => |i| i.ptr(wasm).payload.off == .none, @@ -1779,7 +1882,7 @@ pub const DataId = enum(u32) { }; } - pub fn size(id: DataId, wasm: *const Wasm) u32 { + pub fn size(id: DataSegmentId, wasm: *const Wasm) u32 { return switch (unpack(id, wasm)) { .__zig_error_names => @intCast(wasm.error_name_bytes.items.len), .__zig_error_name_table => { @@ -1796,6 +1899,38 @@ pub const DataId = enum(u32) { } }; +pub const DataLoc = struct { + segment: Wasm.DataSegmentId, + offset: u32, + + pub fn fromObjectDataIndex(wasm: *const Wasm, i: Wasm.ObjectData.Index) DataLoc { + const ptr = i.ptr(wasm); + return .{ + .segment = .fromObjectDataSegment(wasm, ptr.segment), + .offset = ptr.offset, + }; + } + + pub fn fromDataImportId(wasm: *const Wasm, id: Wasm.DataImportId) DataLoc { + return switch (id.unpack(wasm)) { + .object_data_import => |i| .fromObjectDataImportIndex(wasm, i), + .zcu_import => |i| .fromZcuImport(wasm, i), + }; + } + + pub fn fromObjectDataImportIndex(wasm: *const Wasm, i: Wasm.ObjectDataImport.Index) DataLoc { + return i.value(wasm).resolution.dataLoc(wasm); + } + + pub fn fromZcuImport(wasm: *const Wasm, zcu_import: ZcuImportIndex) DataLoc { + const nav_index = zcu_import.ptr(wasm).*; + return .{ + .segment = .fromNav(wasm, nav_index), + .offset = 0, + }; + } +}; + /// Index into `Wasm.uavs`. pub const UavIndex = enum(u32) { _, @@ -3330,8 +3465,8 @@ fn markDataImport( } else { try wasm.data_imports.put(gpa, name, .fromObject(data_index, wasm)); } - } else { - try markDataSegment(wasm, import.resolution.toDataId().?.unpack(wasm).object); + } else if (import.resolution.objectDataSegment(wasm)) |segment_index| { + try markDataSegment(wasm, segment_index); } } @@ -4144,7 +4279,7 @@ pub fn uavAddr(wasm: *Wasm, uav_index: UavsExeIndex) u32 { assert(wasm.flush_buffer.memory_layout_finished); const comp = wasm.base.comp; assert(comp.config.output_mode != .Obj); - const ds_id: DataId = .pack(wasm, .{ .uav_exe = uav_index }); + const ds_id: DataSegmentId = .pack(wasm, .{ .uav_exe = uav_index }); return wasm.flush_buffer.data_segments.get(ds_id).?; } @@ -4155,7 +4290,7 @@ pub fn navAddr(wasm: *Wasm, nav_index: InternPool.Nav.Index) u32 { assert(comp.config.output_mode != .Obj); const navs_exe_index: NavsExeIndex = @enumFromInt(wasm.navs_exe.getIndex(nav_index).?); log.debug("navAddr {s} {}", .{ navs_exe_index.name(wasm), nav_index }); - const ds_id: DataId = .pack(wasm, .{ .nav_exe = navs_exe_index }); + const ds_id: DataSegmentId = .pack(wasm, .{ .nav_exe = navs_exe_index }); return wasm.flush_buffer.data_segments.get(ds_id).?; } diff --git a/src/link/Wasm/Flush.zig b/src/link/Wasm/Flush.zig index 68a3c3ac69..48bae7f890 100644 --- a/src/link/Wasm/Flush.zig +++ b/src/link/Wasm/Flush.zig @@ -22,7 +22,7 @@ const assert = std.debug.assert; /// Ordered list of data segments that will appear in the final binary. /// When sorted, to-be-merged segments will be made adjacent. /// Values are virtual address. -data_segments: std.AutoArrayHashMapUnmanaged(Wasm.DataId, u32) = .empty, +data_segments: std.AutoArrayHashMapUnmanaged(Wasm.DataSegmentId, u32) = .empty, /// Each time a `data_segment` offset equals zero it indicates a new group, and /// the next element in this array will contain the total merged segment size. /// Value is the virtual memory address of the end of the segment. @@ -228,7 +228,7 @@ pub fn finish(f: *Flush, wasm: *Wasm) !void { // For the purposes of sorting, they are implicitly all named ".data". const Sort = struct { wasm: *const Wasm, - segments: []const Wasm.DataId, + segments: []const Wasm.DataSegmentId, pub fn lessThan(ctx: @This(), lhs: usize, rhs: usize) bool { const lhs_segment = ctx.segments[lhs]; const rhs_segment = ctx.segments[rhs]; @@ -311,7 +311,7 @@ pub fn finish(f: *Flush, wasm: *Wasm) !void { const data_vaddr: u32 = @intCast(memory_ptr); { var seen_tls: enum { before, during, after } = .before; - var category: Wasm.DataId.Category = undefined; + var category: Wasm.DataSegmentId.Category = undefined; for (segment_ids, segment_vaddrs, 0..) |segment_id, *segment_vaddr, i| { const alignment = segment_id.alignment(wasm); category = segment_id.category(wasm); @@ -710,7 +710,7 @@ pub fn finish(f: *Flush, wasm: *Wasm) !void { if (!is_obj) { for (wasm.uav_fixups.items) |uav_fixup| { - const ds_id: Wasm.DataId = .pack(wasm, .{ .uav_exe = uav_fixup.uavs_exe_index }); + const ds_id: Wasm.DataSegmentId = .pack(wasm, .{ .uav_exe = uav_fixup.uavs_exe_index }); const vaddr = f.data_segments.get(ds_id).?; if (!is64) { mem.writeInt(u32, wasm.string_bytes.items[uav_fixup.offset..][0..4], vaddr, .little); @@ -719,7 +719,7 @@ pub fn finish(f: *Flush, wasm: *Wasm) !void { } } for (wasm.nav_fixups.items) |nav_fixup| { - const ds_id: Wasm.DataId = .pack(wasm, .{ .nav_exe = nav_fixup.navs_exe_index }); + const ds_id: Wasm.DataSegmentId = .pack(wasm, .{ .nav_exe = nav_fixup.navs_exe_index }); const vaddr = f.data_segments.get(ds_id).?; if (!is64) { mem.writeInt(u32, wasm.string_bytes.items[nav_fixup.offset..][0..4], vaddr, .little); @@ -867,7 +867,7 @@ pub fn finish(f: *Flush, wasm: *Wasm) !void { fn emitNameSection( wasm: *Wasm, - data_segments: *const std.AutoArrayHashMapUnmanaged(Wasm.DataId, u32), + data_segments: *const std.AutoArrayHashMapUnmanaged(Wasm.DataSegmentId, u32), binary_bytes: *std.ArrayListUnmanaged(u8), ) !void { const f = &wasm.flush_buffer; @@ -1142,9 +1142,9 @@ fn splitSegmentName(name: []const u8) struct { []const u8, []const u8 } { fn wantSegmentMerge( wasm: *const Wasm, - a_id: Wasm.DataId, - b_id: Wasm.DataId, - b_category: Wasm.DataId.Category, + a_id: Wasm.DataSegmentId, + b_id: Wasm.DataSegmentId, + b_category: Wasm.DataSegmentId.Category, ) bool { const a_category = a_id.category(wasm); if (a_category != b_category) return false; @@ -1519,17 +1519,23 @@ const RelocAddr = struct { addr: u32, fn fromObjectData(wasm: *const Wasm, i: Wasm.ObjectData.Index, addend: i32) RelocAddr { - const ptr = i.ptr(wasm); - const f = &wasm.flush_buffer; - const addr = f.data_segments.get(.fromObjectDataSegment(wasm, ptr.segment)).?; - return .{ .addr = @intCast(@as(i64, addr) + addend) }; + return fromDataLoc(&wasm.flush_buffer, .fromObjectDataIndex(wasm, i), addend); } fn fromSymbolName(wasm: *const Wasm, name: String, addend: i32) RelocAddr { - _ = wasm; - _ = name; - _ = addend; - @panic("TODO implement data symbol resolution"); + const flush = &wasm.flush_buffer; + if (wasm.object_data_imports.getPtr(name)) |import| { + return fromDataLoc(flush, import.resolution.dataLoc(wasm), addend); + } else if (wasm.data_imports.get(name)) |id| { + return fromDataLoc(flush, .fromDataImportId(wasm, id), addend); + } else { + unreachable; + } + } + + fn fromDataLoc(flush: *const Flush, data_loc: Wasm.DataLoc, addend: i32) RelocAddr { + const base_addr: i64 = flush.data_segments.get(data_loc.segment).?; + return .{ .addr = @intCast(base_addr + data_loc.offset + addend) }; } };