Merge pull request #16439 from Luukdegram/wasm-linker

wasm-linker: finish shared-memory & TLS implementation
This commit is contained in:
Luuk de Gram 2023-07-19 21:55:30 +02:00 committed by GitHub
commit cec1e973b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 217 additions and 63 deletions

View File

@ -1012,6 +1012,10 @@ const WasmDumper = struct {
const start = try std.leb.readULEB128(u32, reader);
try writer.print("\nstart {d}\n", .{start});
},
.data_count => {
const count = try std.leb.readULEB128(u32, reader);
try writer.print("\ncount {d}\n", .{count});
},
else => {}, // skip unknown sections
}
}
@ -1143,9 +1147,16 @@ const WasmDumper = struct {
.data => {
var i: u32 = 0;
while (i < entries) : (i += 1) {
const index = try std.leb.readULEB128(u32, reader);
const flags = try std.leb.readULEB128(u32, reader);
const index = if (flags & 0x02 != 0)
try std.leb.readULEB128(u32, reader)
else
0;
try writer.print("memory index 0x{x}\n", .{index});
try parseDumpInit(step, reader, writer);
if (flags == 0) {
try parseDumpInit(step, reader, writer);
}
const size = try std.leb.readULEB128(u32, reader);
try writer.print("size {d}\n", .{size});
try reader.skipBytes(size, .{}); // we do not care about the content of the segments
@ -1174,7 +1185,7 @@ const WasmDumper = struct {
}
fn parseDumpInit(step: *Step, reader: anytype, writer: anytype) !void {
const byte = try std.leb.readULEB128(u8, reader);
const byte = try reader.readByte();
const opcode = std.meta.intToEnum(std.wasm.Opcode, byte) catch {
return step.fail("invalid wasm opcode '{d}'", .{byte});
};

View File

@ -65,6 +65,7 @@ sanitize_thread: bool,
rdynamic: bool,
dwarf_format: ?std.dwarf.Format = null,
import_memory: bool = false,
export_memory: bool = false,
/// For WebAssembly targets, this will allow for undefined symbols to
/// be imported from the host environment.
import_symbols: bool = false,
@ -1662,6 +1663,9 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void {
if (self.import_memory) {
try zig_args.append("--import-memory");
}
if (self.export_memory) {
try zig_args.append("--export-memory");
}
if (self.import_symbols) {
try zig_args.append("--import-symbols");
}

View File

@ -125,6 +125,8 @@ exports: std.ArrayListUnmanaged(types.Export) = .{},
/// List of initialization functions. These must be called in order of priority
/// by the (synthetic) __wasm_call_ctors function.
init_funcs: std.ArrayListUnmanaged(InitFuncLoc) = .{},
/// Index to a function defining the entry of the wasm file
entry: ?u32 = null,
/// Indirect function table, used to call function pointers
/// When this is non-zero, we must emit a table entry,
@ -409,7 +411,7 @@ pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Option
},
);
} else {
symbol.index = @as(u32, @intCast(wasm_bin.imported_globals_count + wasm_bin.wasm_globals.items.len));
symbol.index = @intCast(wasm_bin.imported_globals_count + wasm_bin.wasm_globals.items.len);
symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN);
const global = try wasm_bin.wasm_globals.addOne(allocator);
global.* = .{
@ -432,7 +434,7 @@ pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Option
};
if (options.output_mode == .Obj or options.import_table) {
symbol.setUndefined(true);
symbol.index = @as(u32, @intCast(wasm_bin.imported_tables_count));
symbol.index = @intCast(wasm_bin.imported_tables_count);
wasm_bin.imported_tables_count += 1;
try wasm_bin.imports.put(allocator, loc, .{
.module_name = try wasm_bin.string_table.put(allocator, wasm_bin.host_name),
@ -466,19 +468,34 @@ pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Option
const loc = try wasm_bin.createSyntheticSymbol("__tls_base", .global);
const symbol = loc.getSymbol(wasm_bin);
symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN);
symbol.index = @intCast(wasm_bin.imported_globals_count + wasm_bin.wasm_globals.items.len);
try wasm_bin.wasm_globals.append(wasm_bin.base.allocator, .{
.global_type = .{ .valtype = .i32, .mutable = true },
.init = .{ .i32_const = undefined },
});
}
{
const loc = try wasm_bin.createSyntheticSymbol("__tls_size", .global);
const symbol = loc.getSymbol(wasm_bin);
symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN);
symbol.index = @intCast(wasm_bin.imported_globals_count + wasm_bin.wasm_globals.items.len);
try wasm_bin.wasm_globals.append(wasm_bin.base.allocator, .{
.global_type = .{ .valtype = .i32, .mutable = false },
.init = .{ .i32_const = undefined },
});
}
{
const loc = try wasm_bin.createSyntheticSymbol("__tls_align", .global);
const symbol = loc.getSymbol(wasm_bin);
symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN);
symbol.index = @intCast(wasm_bin.imported_globals_count + wasm_bin.wasm_globals.items.len);
try wasm_bin.wasm_globals.append(wasm_bin.base.allocator, .{
.global_type = .{ .valtype = .i32, .mutable = false },
.init = .{ .i32_const = undefined },
});
}
{
const loc = try wasm_bin.createSyntheticSymbol("__wasm_tls_init", .function);
const loc = try wasm_bin.createSyntheticSymbol("__wasm_init_tls", .function);
const symbol = loc.getSymbol(wasm_bin);
symbol.setFlag(.WASM_SYM_VISIBILITY_HIDDEN);
}
@ -844,6 +861,12 @@ fn resolveSymbolsInArchives(wasm: *Wasm) !void {
}
}
/// Writes an unsigned 32-bit integer as a LEB128-encoded 'i32.const' value.
fn writeI32Const(writer: anytype, val: u32) !void {
try writer.writeByte(std.wasm.opcode(.i32_const));
try leb.writeILEB128(writer, @as(i32, @bitCast(val)));
}
fn setupInitMemoryFunction(wasm: *Wasm) !void {
// Passive segments are used to avoid memory being reinitialized on each
// thread's instantiation. These passive segments are initialized and
@ -881,12 +904,9 @@ fn setupInitMemoryFunction(wasm: *Wasm) !void {
try writer.writeByte(std.wasm.block_empty); // block type
// atomically check
try writer.writeByte(std.wasm.opcode(.i32_const));
try leb.writeULEB128(writer, flag_address);
try writer.writeByte(std.wasm.opcode(.i32_const));
try leb.writeULEB128(writer, @as(u32, 0));
try writer.writeByte(std.wasm.opcode(.i32_const));
try leb.writeULEB128(writer, @as(u32, 1));
try writeI32Const(writer, flag_address);
try writeI32Const(writer, 0);
try writeI32Const(writer, 1);
try writer.writeByte(std.wasm.opcode(.atomics_prefix));
try leb.writeULEB128(writer, std.wasm.atomicsOpcode(.i32_atomic_rmw_cmpxchg));
try leb.writeULEB128(writer, @as(u32, 2)); // alignment
@ -910,24 +930,20 @@ fn setupInitMemoryFunction(wasm: *Wasm) !void {
// For non-BSS segments we do a memory.init. Both these
// instructions take as their first argument the destination
// address.
try writer.writeByte(std.wasm.opcode(.i32_const));
try leb.writeULEB128(writer, segment.offset);
try writeI32Const(writer, segment.offset);
if (wasm.base.options.shared_memory and std.mem.eql(u8, entry.key_ptr.*, ".tdata")) {
// When we initialize the TLS segment we also set the `__tls_base`
// global. This allows the runtime to use this static copy of the
// TLS data for the first/main thread.
try writer.writeByte(std.wasm.opcode(.i32_const));
try leb.writeULEB128(writer, segment.offset);
try writeI32Const(writer, segment.offset);
try writer.writeByte(std.wasm.opcode(.global_set));
const loc = wasm.findGlobalSymbol("__tls_base").?;
try leb.writeULEB128(writer, loc.getSymbol(wasm).index);
}
try writer.writeByte(std.wasm.opcode(.i32_const));
try leb.writeULEB128(writer, @as(u32, 0));
try writer.writeByte(std.wasm.opcode(.i32_const));
try leb.writeULEB128(writer, segment.size);
try writeI32Const(writer, 0);
try writeI32Const(writer, segment.size);
try writer.writeByte(std.wasm.opcode(.misc_prefix));
if (std.mem.eql(u8, entry.key_ptr.*, ".bss")) {
// fill bss segment with zeroes
@ -943,18 +959,15 @@ fn setupInitMemoryFunction(wasm: *Wasm) !void {
if (wasm.base.options.shared_memory) {
// we set the init memory flag to value '2'
try writer.writeByte(std.wasm.opcode(.i32_const));
try leb.writeULEB128(writer, flag_address);
try writer.writeByte(std.wasm.opcode(.i32_const));
try leb.writeULEB128(writer, @as(u32, 2));
try writeI32Const(writer, flag_address);
try writeI32Const(writer, 2);
try writer.writeByte(std.wasm.opcode(.atomics_prefix));
try leb.writeULEB128(writer, std.wasm.atomicsOpcode(.i32_atomic_store));
try leb.writeULEB128(writer, @as(u32, 2)); // alignment
try leb.writeULEB128(writer, @as(u32, 0)); // offset
// notify any waiters for segment initialization completion
try writer.writeByte(std.wasm.opcode(.i32_const));
try leb.writeULEB128(writer, flag_address);
try writeI32Const(writer, flag_address);
try writer.writeByte(std.wasm.opcode(.i32_const));
try leb.writeILEB128(writer, @as(i32, -1)); // number of waiters
try writer.writeByte(std.wasm.opcode(.atomics_prefix));
@ -969,12 +982,10 @@ fn setupInitMemoryFunction(wasm: *Wasm) !void {
// wait for thread to initialize memory segments
try writer.writeByte(std.wasm.opcode(.end)); // end $wait
try writer.writeByte(std.wasm.opcode(.i32_const));
try leb.writeULEB128(writer, flag_address);
try writer.writeByte(std.wasm.opcode(.i32_const));
try leb.writeULEB128(writer, @as(u32, 1)); // expected flag value
try writer.writeByte(std.wasm.opcode(.i32_const));
try leb.writeILEB128(writer, @as(i32, -1)); // timeout
try writeI32Const(writer, flag_address);
try writeI32Const(writer, 1); // expected flag value
try writer.writeByte(std.wasm.opcode(.i64_const));
try leb.writeILEB128(writer, @as(i64, -1)); // timeout
try writer.writeByte(std.wasm.opcode(.atomics_prefix));
try leb.writeULEB128(writer, std.wasm.atomicsOpcode(.memory_atomic_wait32));
try leb.writeULEB128(writer, @as(u32, 2)); // alignment
@ -2154,7 +2165,14 @@ fn allocateVirtualAddresses(wasm: *Wasm) void {
const segment_name = segment_info[symbol.index].outputName(merge_segment);
const segment_index = wasm.data_segments.get(segment_name).?;
const segment = wasm.segments.items[segment_index];
symbol.virtual_address = atom.offset + segment.offset;
// TLS symbols have their virtual address set relative to their own TLS segment,
// rather than the entire Data section.
if (symbol.hasFlag(.WASM_SYM_TLS)) {
symbol.virtual_address = atom.offset;
} else {
symbol.virtual_address = atom.offset + segment.offset;
}
}
}
@ -2168,7 +2186,7 @@ fn sortDataSegments(wasm: *Wasm) !void {
const SortContext = struct {
fn sort(_: void, lhs: []const u8, rhs: []const u8) bool {
return order(lhs) <= order(rhs);
return order(lhs) < order(rhs);
}
fn order(name: []const u8) u8 {
@ -2399,6 +2417,13 @@ pub fn createFunction(
return loc.index;
}
/// If required, sets the function index in the `start` section.
fn setupStartSection(wasm: *Wasm) !void {
if (wasm.findGlobalSymbol("__wasm_init_memory")) |loc| {
wasm.entry = loc.getSymbol(wasm).index;
}
}
fn initializeTLSFunction(wasm: *Wasm) !void {
if (!wasm.base.options.shared_memory) return;
@ -2420,7 +2445,7 @@ fn initializeTLSFunction(wasm: *Wasm) !void {
try leb.writeULEB128(writer, param_local);
const tls_base_loc = wasm.findGlobalSymbol("__tls_base").?;
try writer.writeByte(std.wasm.opcode(.global_get));
try writer.writeByte(std.wasm.opcode(.global_set));
try leb.writeULEB128(writer, tls_base_loc.getSymbol(wasm).index);
// load stack values for the bulk-memory operation
@ -2748,27 +2773,18 @@ fn setupMemory(wasm: *Wasm) !void {
if (mem.eql(u8, entry.key_ptr.*, ".tdata")) {
if (wasm.findGlobalSymbol("__tls_size")) |loc| {
const sym = loc.getSymbol(wasm);
sym.index = @as(u32, @intCast(wasm.wasm_globals.items.len)) + wasm.imported_globals_count;
try wasm.wasm_globals.append(wasm.base.allocator, .{
.global_type = .{ .valtype = .i32, .mutable = false },
.init = .{ .i32_const = @as(i32, @intCast(segment.size)) },
});
wasm.wasm_globals.items[sym.index - wasm.imported_globals_count].init.i32_const = @intCast(segment.size);
}
if (wasm.findGlobalSymbol("__tls_align")) |loc| {
const sym = loc.getSymbol(wasm);
sym.index = @as(u32, @intCast(wasm.wasm_globals.items.len)) + wasm.imported_globals_count;
try wasm.wasm_globals.append(wasm.base.allocator, .{
.global_type = .{ .valtype = .i32, .mutable = false },
.init = .{ .i32_const = @as(i32, @intCast(segment.alignment)) },
});
wasm.wasm_globals.items[sym.index - wasm.imported_globals_count].init.i32_const = @intCast(segment.alignment);
}
if (wasm.findGlobalSymbol("__tls_base")) |loc| {
const sym = loc.getSymbol(wasm);
sym.index = @as(u32, @intCast(wasm.wasm_globals.items.len)) + wasm.imported_globals_count;
try wasm.wasm_globals.append(wasm.base.allocator, .{
.global_type = .{ .valtype = .i32, .mutable = wasm.base.options.shared_memory },
.init = .{ .i32_const = if (wasm.base.options.shared_memory) @as(u32, 0) else @as(i32, @intCast(memory_ptr)) },
});
wasm.wasm_globals.items[sym.index - wasm.imported_globals_count].init.i32_const = if (wasm.base.options.shared_memory)
@as(i32, 0)
else
@as(i32, @intCast(memory_ptr));
}
}
@ -3323,6 +3339,7 @@ fn linkWithZld(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Node) l
try wasm.setupInitMemoryFunction();
try wasm.setupTLSRelocationsFunction();
try wasm.initializeTLSFunction();
try wasm.setupStartSection();
try wasm.setupExports();
try wasm.writeToFile(enabled_features, emit_features_count, arena);
@ -3460,6 +3477,7 @@ pub fn flushModule(wasm: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod
try wasm.setupInitMemoryFunction();
try wasm.setupTLSRelocationsFunction();
try wasm.initializeTLSFunction();
try wasm.setupStartSection();
try wasm.setupExports();
try wasm.writeToFile(enabled_features, emit_features_count, arena);
}
@ -3520,6 +3538,7 @@ fn writeToFile(
// Import section
const import_memory = wasm.base.options.import_memory or is_obj;
const export_memory = wasm.base.options.export_memory;
if (wasm.imports.count() != 0 or import_memory) {
const header_offset = try reserveVecSectionHeader(&binary_bytes);
@ -3622,7 +3641,7 @@ fn writeToFile(
}
// Export section
if (wasm.exports.items.len != 0 or !import_memory) {
if (wasm.exports.items.len != 0 or export_memory) {
const header_offset = try reserveVecSectionHeader(&binary_bytes);
for (wasm.exports.items) |exp| {
@ -3633,7 +3652,7 @@ fn writeToFile(
try leb.writeULEB128(binary_writer, exp.index);
}
if (!import_memory) {
if (export_memory) {
try leb.writeULEB128(binary_writer, @as(u32, @intCast("memory".len)));
try binary_writer.writeAll("memory");
try binary_writer.writeByte(std.wasm.externalKind(.memory));
@ -3645,11 +3664,22 @@ fn writeToFile(
header_offset,
.@"export",
@as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)),
@as(u32, @intCast(wasm.exports.items.len)) + @intFromBool(!import_memory),
@as(u32, @intCast(wasm.exports.items.len)) + @intFromBool(export_memory),
);
section_count += 1;
}
if (wasm.entry) |entry_index| {
const header_offset = try reserveVecSectionHeader(&binary_bytes);
try writeVecSectionHeader(
binary_bytes.items,
header_offset,
.start,
@intCast(binary_bytes.items.len - header_offset - header_size),
entry_index,
);
}
// element section (function table)
if (wasm.function_table.count() > 0) {
const header_offset = try reserveVecSectionHeader(&binary_bytes);
@ -3683,7 +3713,7 @@ fn writeToFile(
}
// When the shared-memory option is enabled, we *must* emit the 'data count' section.
const data_segments_count = wasm.data_segments.count() - @intFromBool(wasm.data_segments.contains(".bss") and import_memory);
const data_segments_count = wasm.data_segments.count() - @intFromBool(wasm.data_segments.contains(".bss") and !import_memory);
if (data_segments_count != 0 and wasm.base.options.shared_memory) {
const header_offset = try reserveVecSectionHeader(&binary_bytes);
try writeVecSectionHeader(

View File

@ -174,14 +174,14 @@ fn relocationValue(atom: Atom, relocation: types.Relocation, wasm_bin: *const Wa
return 0;
}
const va = @as(i64, @intCast(symbol.virtual_address));
return @as(u32, @intCast(va + relocation.addend));
return @intCast(va + relocation.addend);
},
.R_WASM_EVENT_INDEX_LEB => return symbol.index,
.R_WASM_SECTION_OFFSET_I32 => {
const target_atom_index = wasm_bin.symbol_atom.get(target_loc).?;
const target_atom = wasm_bin.getAtom(target_atom_index);
const rel_value = @as(i32, @intCast(target_atom.offset)) + relocation.addend;
return @as(u32, @intCast(rel_value));
const rel_value: i32 = @intCast(target_atom.offset);
return @intCast(rel_value + relocation.addend);
},
.R_WASM_FUNCTION_OFFSET_I32 => {
const target_atom_index = wasm_bin.symbol_atom.get(target_loc) orelse {
@ -189,13 +189,14 @@ fn relocationValue(atom: Atom, relocation: types.Relocation, wasm_bin: *const Wa
};
const target_atom = wasm_bin.getAtom(target_atom_index);
const offset: u32 = 11 + Wasm.getULEB128Size(target_atom.size); // Header (11 bytes fixed-size) + body size (leb-encoded)
const rel_value = @as(i32, @intCast(target_atom.offset + offset)) + relocation.addend;
return @as(u32, @intCast(rel_value));
const rel_value: i32 = @intCast(target_atom.offset + offset);
return @intCast(rel_value + relocation.addend);
},
.R_WASM_MEMORY_ADDR_TLS_SLEB,
.R_WASM_MEMORY_ADDR_TLS_SLEB64,
=> {
@panic("TODO: Implement TLS relocations");
const va: i32 = @intCast(symbol.virtual_address);
return @intCast(va + relocation.addend);
},
}
}

View File

@ -353,9 +353,14 @@ fn Parser(comptime ReaderType: type) type {
var debug_names = std.ArrayList(u8).init(gpa);
errdefer {
while (relocatable_data.popOrNull()) |rel_data| {
gpa.free(rel_data.data[0..rel_data.size]);
} else relocatable_data.deinit();
// only free the inner contents of relocatable_data if we didn't
// assign it to the object yet.
if (parser.object.relocatable_data.len == 0) {
for (relocatable_data.items) |rel_data| {
gpa.free(rel_data.data[0..rel_data.size]);
}
relocatable_data.deinit();
}
gpa.free(debug_names.items);
debug_names.deinit();
}

View File

@ -0,0 +1,98 @@
const std = @import("std");
pub fn build(b: *std.Build) void {
const test_step = b.step("test", "Test");
b.default_step = test_step;
add(b, test_step, .Debug);
add(b, test_step, .ReleaseFast);
add(b, test_step, .ReleaseSmall);
add(b, test_step, .ReleaseSafe);
}
fn add(b: *std.Build, test_step: *std.Build.Step, optimize_mode: std.builtin.OptimizeMode) void {
{
const lib = b.addSharedLibrary(.{
.name = "lib",
.root_source_file = .{ .path = "lib.zig" },
.target = .{
.cpu_arch = .wasm32,
.cpu_model = .{ .explicit = &std.Target.wasm.cpu.mvp },
.cpu_features_add = std.Target.wasm.featureSet(&.{ .atomics, .bulk_memory }),
.os_tag = .freestanding,
},
.optimize = optimize_mode,
});
lib.use_lld = false;
lib.strip = false;
lib.import_memory = true;
lib.export_memory = true;
lib.shared_memory = true;
lib.max_memory = 67108864;
lib.single_threaded = false;
lib.export_symbol_names = &.{"foo"};
const check_lib = lib.checkObject();
check_lib.checkStart("Section import");
check_lib.checkNext("entries 1");
check_lib.checkNext("module env");
check_lib.checkNext("name memory"); // ensure we are importing memory
check_lib.checkStart("Section export");
check_lib.checkNext("entries 2");
check_lib.checkNext("name memory"); // ensure we also export memory again
// This section *must* be emit as the start function is set to the index
// of __wasm_init_memory
// release modes will have the TLS segment optimized out in our test-case.
// This means we won't have __wasm_init_memory in such case, and therefore
// should also not have a section "start"
if (optimize_mode == .Debug) {
check_lib.checkStart("Section start");
}
// This section is only and *must* be emit when shared-memory is enabled
// release modes will have the TLS segment optimized out in our test-case.
if (optimize_mode == .Debug) {
check_lib.checkStart("Section data_count");
check_lib.checkNext("count 3");
}
check_lib.checkStart("Section custom");
check_lib.checkNext("name name");
check_lib.checkNext("type function");
if (optimize_mode == .Debug) {
check_lib.checkNext("name __wasm_init_memory");
}
check_lib.checkNext("name __wasm_init_tls");
check_lib.checkNext("type global");
// In debug mode the symbol __tls_base is resolved to an undefined symbol
// from the object file, hence its placement differs than in release modes
// where the entire tls segment is optimized away, and tls_base will have
// its original position.
if (optimize_mode == .Debug) {
check_lib.checkNext("name __tls_size");
check_lib.checkNext("name __tls_align");
check_lib.checkNext("name __tls_base");
} else {
check_lib.checkNext("name __tls_base");
check_lib.checkNext("name __tls_size");
check_lib.checkNext("name __tls_align");
}
check_lib.checkNext("type data_segment");
if (optimize_mode == .Debug) {
check_lib.checkNext("names 3");
check_lib.checkNext("index 0");
check_lib.checkNext("name .rodata");
check_lib.checkNext("index 1");
check_lib.checkNext("name .bss");
check_lib.checkNext("index 2");
check_lib.checkNext("name .tdata");
}
test_step.dependOn(&check_lib.step);
}
}

View File

@ -0,0 +1,5 @@
threadlocal var some_tls_global: u32 = 1;
export fn foo() void {
some_tls_global = 2;
}