From 729f822e311f3bce1e7bd99bcf71937145451a4c Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Tue, 13 Jun 2023 21:08:02 +0200 Subject: [PATCH 1/5] wasm-linker: correctly resolve exported symbols When compiling Zig code using the Wasm backend, we would previously incorrectly resolve exported symbols as it would not correctly remove existing symbols if they were to be overwritten. This meant that undefined symbols could cause collisions although they should be resolved by the exported symbol. --- src/link/Wasm.zig | 81 ++++++++++++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index fdac7dfa63..f593f95c3d 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -1703,6 +1703,7 @@ pub fn updateDeclExports( const decl = mod.declPtr(decl_index); const atom_index = try wasm.getOrCreateAtomForDecl(decl_index); const atom = wasm.getAtom(atom_index); + const atom_sym = atom.symbolLoc().getSymbol(wasm).*; const gpa = mod.gpa; for (exports) |exp| { @@ -1716,43 +1717,21 @@ pub fn updateDeclExports( continue; } - const export_name = try wasm.string_table.put(wasm.base.allocator, mod.intern_pool.stringToSlice(exp.opts.name)); - if (wasm.globals.getPtr(export_name)) |existing_loc| { - if (existing_loc.index == atom.sym_index) continue; - const existing_sym: Symbol = existing_loc.getSymbol(wasm).*; - - const exp_is_weak = exp.opts.linkage == .Internal or exp.opts.linkage == .Weak; - // When both the to-be-exported symbol and the already existing symbol - // are strong symbols, we have a linker error. - // In the other case we replace one with the other. - if (!exp_is_weak and !existing_sym.isWeak()) { - try mod.failed_exports.put(gpa, exp, try Module.ErrorMsg.create( - gpa, - decl.srcLoc(mod), - \\LinkError: symbol '{}' defined multiple times - \\ first definition in '{s}' - \\ next definition in '{s}' - , - .{ exp.opts.name.fmt(&mod.intern_pool), wasm.name, wasm.name }, - )); - continue; - } else if (exp_is_weak) { - continue; // to-be-exported symbol is weak, so we keep the existing symbol - } else { - // TODO: Revisit this, why was this needed? - existing_loc.index = atom.sym_index; - existing_loc.file = null; - // exp.link.wasm.sym_index = existing_loc.index; - } - } - const exported_atom_index = try wasm.getOrCreateAtomForDecl(exp.exported_decl); const exported_atom = wasm.getAtom(exported_atom_index); + const export_name = try wasm.string_table.put(wasm.base.allocator, mod.intern_pool.stringToSlice(exp.opts.name)); const sym_loc = exported_atom.symbolLoc(); const symbol = sym_loc.getSymbol(wasm); + symbol.setGlobal(true); + symbol.setUndefined(false); + symbol.index = atom_sym.index; + symbol.tag = atom_sym.tag; + symbol.name = atom_sym.name; + switch (exp.opts.linkage) { .Internal => { symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN); + symbol.setFlag(.WASM_SYM_BINDING_WEAK); }, .Weak => { symbol.setFlag(.WASM_SYM_BINDING_WEAK); @@ -1768,22 +1747,52 @@ pub fn updateDeclExports( continue; }, } + + if (wasm.globals.get(export_name)) |existing_loc| { + if (existing_loc.index == atom.sym_index) continue; + const existing_sym: Symbol = existing_loc.getSymbol(wasm).*; + + if (!existing_sym.isUndefined()) blk: { + if (symbol.isWeak()) { + try wasm.discarded.put(wasm.base.allocator, existing_loc, sym_loc); + continue; // to-be-exported symbol is weak, so we keep the existing symbol + } + + // new symbol is not weak while existing is, replace existing symbol + if (existing_sym.isWeak()) { + break :blk; + } + // When both the to-be-exported symbol and the already existing symbol + // are strong symbols, we have a linker error. + // In the other case we replace one with the other. + try mod.failed_exports.put(gpa, exp, try Module.ErrorMsg.create( + gpa, + decl.srcLoc(mod), + \\LinkError: symbol '{}' defined multiple times + \\ first definition in '{s}' + \\ next definition in '{s}' + , + .{ exp.opts.name.fmt(&mod.intern_pool), wasm.name, wasm.name }, + )); + continue; + } + + // in this case the existing symbol must be replaced either because it's weak or undefined. + try wasm.discarded.put(wasm.base.allocator, existing_loc, sym_loc); + _ = wasm.imports.remove(existing_loc); + _ = wasm.undefs.swapRemove(existing_sym.name); + } + // Ensure the symbol will be exported using the given name if (!mod.intern_pool.stringEqlSlice(exp.opts.name, sym_loc.getName(wasm))) { try wasm.export_names.put(wasm.base.allocator, sym_loc, export_name); } - symbol.setGlobal(true); - symbol.setUndefined(false); try wasm.globals.put( wasm.base.allocator, export_name, sym_loc, ); - - // if the symbol was previously undefined, remove it as an import - _ = wasm.imports.remove(sym_loc); - _ = wasm.undefs.swapRemove(export_name); } } From 098b0b50ab3980a257ad0840034f21ef5349ac8b Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Wed, 14 Jun 2023 20:01:52 +0200 Subject: [PATCH 2/5] wasm: fix lowerParentPtr offsets The was incorrectly merged during internPool. This commit forward fixes that and reinstates the correct logic. --- src/arch/wasm/CodeGen.zig | 58 ++++++++------------------------------- 1 file changed, 12 insertions(+), 46 deletions(-) diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index 877db4b623..9a19ca439c 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -2938,49 +2938,31 @@ fn wrapOperand(func: *CodeGen, operand: WValue, ty: Type) InnerError!WValue { return WValue{ .stack = {} }; } -fn lowerParentPtr(func: *CodeGen, ptr_val: Value) InnerError!WValue { +fn lowerParentPtr(func: *CodeGen, ptr_val: Value, offset: u32) InnerError!WValue { const mod = func.bin_file.base.options.module.?; const ptr = mod.intern_pool.indexToKey(ptr_val.ip_index).ptr; switch (ptr.addr) { .decl => |decl_index| { - return func.lowerParentPtrDecl(ptr_val, decl_index, 0); + return func.lowerParentPtrDecl(ptr_val, decl_index, offset); }, .mut_decl => |mut_decl| { const decl_index = mut_decl.decl; - return func.lowerParentPtrDecl(ptr_val, decl_index, 0); - }, - .int, .eu_payload => |tag| return func.fail("TODO: Implement lowerParentPtr for {}", .{tag}), - .opt_payload => |base_ptr| { - return func.lowerParentPtr(base_ptr.toValue()); + return func.lowerParentPtrDecl(ptr_val, decl_index, offset); }, + .eu_payload => |tag| return func.fail("TODO: Implement lowerParentPtr for {}", .{tag}), + .int => |base| return func.lowerConstant(base.toValue(), Type.usize), + .opt_payload => |base_ptr| return func.lowerParentPtr(base_ptr.toValue(), offset), .comptime_field => unreachable, .elem => |elem| { const index = elem.index; const elem_type = mod.intern_pool.typeOf(elem.base).toType().elemType2(mod); - const offset = index * elem_type.abiSize(mod); - const array_ptr = try func.lowerParentPtr(elem.base.toValue()); - - return switch (array_ptr) { - .memory => |ptr_| WValue{ - .memory_offset = .{ - .pointer = ptr_, - .offset = @intCast(u32, offset), - }, - }, - .memory_offset => |mem_off| WValue{ - .memory_offset = .{ - .pointer = mem_off.pointer, - .offset = @intCast(u32, offset) + mem_off.offset, - }, - }, - else => unreachable, - }; + const elem_offset = index * elem_type.abiSize(mod); + return func.lowerParentPtr(elem.base.toValue(), @intCast(u32, elem_offset + offset)); }, .field => |field| { const parent_ty = mod.intern_pool.typeOf(field.base).toType().childType(mod); - const parent_ptr = try func.lowerParentPtr(field.base.toValue()); - const offset = switch (parent_ty.zigTypeTag(mod)) { + const field_offset = switch (parent_ty.zigTypeTag(mod)) { .Struct => switch (parent_ty.containerLayout(mod)) { .Packed => parent_ty.packedStructFieldByteOffset(@intCast(usize, field.index), mod), else => parent_ty.structFieldOffset(@intCast(usize, field.index), mod), @@ -2993,8 +2975,7 @@ fn lowerParentPtr(func: *CodeGen, ptr_val: Value) InnerError!WValue { if (layout.payload_align > layout.tag_align) break :blk 0; // tag is stored first so calculate offset from where payload starts - const offset = @intCast(u32, std.mem.alignForwardGeneric(u64, layout.tag_size, layout.tag_align)); - break :blk offset; + break :blk @intCast(u32, std.mem.alignForwardGeneric(u64, layout.tag_size, layout.tag_align)); }, }, .Pointer => switch (parent_ty.ptrSize(mod)) { @@ -3007,22 +2988,7 @@ fn lowerParentPtr(func: *CodeGen, ptr_val: Value) InnerError!WValue { }, else => unreachable, }; - - return switch (parent_ptr) { - .memory => |ptr_| WValue{ - .memory_offset = .{ - .pointer = ptr_, - .offset = @intCast(u32, offset), - }, - }, - .memory_offset => |mem_off| WValue{ - .memory_offset = .{ - .pointer = mem_off.pointer, - .offset = @intCast(u32, offset) + mem_off.offset, - }, - }, - else => unreachable, - }; + return func.lowerParentPtr(field.base.toValue(), @intCast(u32, offset + field_offset)); }, } } @@ -3230,7 +3196,7 @@ fn lowerConstant(func: *CodeGen, arg_val: Value, ty: Type) InnerError!WValue { .decl => |decl| return func.lowerDeclRefValue(.{ .ty = ty, .val = val }, decl, 0), .mut_decl => |mut_decl| return func.lowerDeclRefValue(.{ .ty = ty, .val = val }, mut_decl.decl, 0), .int => |int| return func.lowerConstant(int.toValue(), mod.intern_pool.typeOf(int).toType()), - .opt_payload, .elem, .field => return func.lowerParentPtr(val), + .opt_payload, .elem, .field => return func.lowerParentPtr(val, 0), else => return func.fail("Wasm TODO: lowerConstant for other const addr tag {}", .{ptr.addr}), }, .opt => if (ty.optionalReprIsPayload(mod)) { From 1cfad29f10a557df986fc940dcce7620bbd5d4d9 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Wed, 14 Jun 2023 20:03:01 +0200 Subject: [PATCH 3/5] codegen: fix union padding This regressed during the internpool merges. This commit reinstates the padding logic for unions. --- src/codegen.zig | 4 ++++ test/behavior/bugs/1381.zig | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/codegen.zig b/src/codegen.zig index b39c3c5ec0..cd1ed53307 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -598,6 +598,10 @@ pub fn generateSymbol( .fail => |em| return Result{ .fail = em }, } } + + if (layout.padding > 0) { + try code.writer().writeByteNTimes(0, layout.padding); + } }, .memoized_call => unreachable, } diff --git a/test/behavior/bugs/1381.zig b/test/behavior/bugs/1381.zig index f35c963df3..90941de341 100644 --- a/test/behavior/bugs/1381.zig +++ b/test/behavior/bugs/1381.zig @@ -17,7 +17,6 @@ test "union that needs padding bytes inside an array" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; var as = [_]A{ A{ .B = B{ .D = 1 } }, From e3db210cf1007f87930c97c072c54b2fb8ae0b8c Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Thu, 15 Jun 2023 19:30:00 +0200 Subject: [PATCH 4/5] wasm: support calling alias'd function pointers When lowering a decl value we verify whether its owner decl index equals to the decl index of the decl being lowered. When this is not the case, we are lowering an alias. So instead, we will now lower the owner decl instead and call its symbol to ensure its type is being correctly generated. --- src/arch/wasm/CodeGen.zig | 11 +++++++++++ test/behavior.zig | 6 +----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index 9a19ca439c..aa44dc2bc8 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -3008,6 +3008,17 @@ fn lowerDeclRefValue(func: *CodeGen, tv: TypedValue, decl_index: Module.Decl.Ind } const decl = mod.declPtr(decl_index); + // check if decl is an alias to a function, in which case we + // want to lower the actual decl, rather than the alias itself. + if (decl.val.getFunction(mod)) |func_val| { + if (func_val.owner_decl != decl_index) { + return func.lowerDeclRefValue(tv, func_val.owner_decl, offset); + } + } else if (decl.val.getExternFunc(mod)) |func_val| { + if (func_val.decl != decl_index) { + return func.lowerDeclRefValue(tv, func_val.decl, offset); + } + } if (decl.ty.zigTypeTag(mod) != .Fn and !decl.ty.hasRuntimeBitsIgnoreComptime(mod)) { return WValue{ .imm32 = 0xaaaaaaaa }; } diff --git a/test/behavior.zig b/test/behavior.zig index bdc3f30ede..017b5b4824 100644 --- a/test/behavior.zig +++ b/test/behavior.zig @@ -158,6 +158,7 @@ test { _ = @import("behavior/enum.zig"); _ = @import("behavior/error.zig"); _ = @import("behavior/eval.zig"); + _ = @import("behavior/export_self_referential_type_info.zig"); _ = @import("behavior/field_parent_ptr.zig"); _ = @import("behavior/floatop.zig"); _ = @import("behavior/fn.zig"); @@ -241,7 +242,6 @@ test { if (builtin.zig_backend != .stage2_arm and builtin.zig_backend != .stage2_x86_64 and builtin.zig_backend != .stage2_aarch64 and - builtin.zig_backend != .stage2_wasm and builtin.zig_backend != .stage2_c and builtin.zig_backend != .stage2_spirv64) { @@ -250,8 +250,4 @@ test { _ = @import("behavior/bugs/14198.zig"); _ = @import("behavior/export.zig"); } - - if (builtin.zig_backend != .stage2_wasm) { - _ = @import("behavior/export_self_referential_type_info.zig"); - } } From 44b322ce6410a0fab7c3cbdfc35bb1530a31a304 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Thu, 15 Jun 2023 20:15:01 +0200 Subject: [PATCH 5/5] wasm-linker: correctly resolve undefined functions We now resolve undefined symbols during incremental-compilation where we discard the current symbol if we detect we found an existing symbol which is not the one currently being updated. The symbol will always be discarded in favor of the existing symbol in such a case. --- src/link/Wasm.zig | 13 ++++++++++++- test/behavior/bugs/529.zig | 1 - 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index f593f95c3d..60944ff981 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -1909,6 +1909,17 @@ pub fn addOrUpdateImport( global_gop.value_ptr.* = loc; try wasm.resolved_symbols.put(wasm.base.allocator, loc, {}); try wasm.undefs.putNoClobber(wasm.base.allocator, decl_name_index, loc); + } else if (global_gop.value_ptr.*.index != symbol_index) { + // We are not updating a symbol, but found an existing global + // symbol with the same name. This means we always favor the + // existing symbol, regardless whether it's defined or not. + // We can also skip storing the import as we will not output + // this symbol. + return wasm.discarded.put( + wasm.base.allocator, + .{ .file = null, .index = symbol_index }, + global_gop.value_ptr.*, + ); } if (type_index) |ty_index| { @@ -1924,8 +1935,8 @@ pub fn addOrUpdateImport( }; } } else { + // non-functions will not be imported from the runtime, but only resolved during link-time symbol.tag = .data; - return; // non-functions will not be imported from the runtime, but only resolved during link-time } } diff --git a/test/behavior/bugs/529.zig b/test/behavior/bugs/529.zig index a2e330055a..49a9b0a46d 100644 --- a/test/behavior/bugs/529.zig +++ b/test/behavior/bugs/529.zig @@ -11,7 +11,6 @@ comptime { const builtin = @import("builtin"); test "issue 529 fixed" { - if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO