diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index caddbbeaca..685528c05d 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -895,7 +895,7 @@ fn genFunc(self: *Self) InnerError!void { try prologue.append(.{ .tag = .i32_sub, .data = .{ .tag = {} } }); // Get negative stack aligment try prologue.append(.{ .tag = .i32_const, .data = .{ .imm32 = @intCast(i32, self.stack_alignment) * -1 } }); - // Bit and the value to get the new stack pointer to ensure the pointers are aligned with the abi alignment + // Bitwise-and the value to get the new stack pointer to ensure the pointers are aligned with the abi alignment try prologue.append(.{ .tag = .i32_and, .data = .{ .tag = {} } }); // store the current stack pointer as the bottom, which will be used to calculate all stack pointer offsets try prologue.append(.{ .tag = .local_tee, .data = .{ .label = self.bottom_stack_value.local } }); @@ -1074,22 +1074,123 @@ fn toWasmBits(bits: u16) ?u16 { /// Performs a copy of bytes for a given type. Copying all bytes /// from rhs to lhs. -/// -/// TODO: Perform feature detection and when bulk_memory is available, -/// use wasm's mem.copy instruction. -fn memCopy(self: *Self, ty: Type, lhs: WValue, rhs: WValue) !void { - const abi_size = ty.abiSize(self.target); - var offset: u32 = 0; - const lhs_base = lhs.offset(); - const rhs_base = rhs.offset(); - while (offset < abi_size) : (offset += 1) { - // get lhs' address to store the result - try self.emitWValue(lhs); - // load byte from rhs' adress - try self.emitWValue(rhs); - try self.addMemArg(.i32_load8_u, .{ .offset = rhs_base + offset, .alignment = 1 }); - // store the result in lhs (we already have its address on the stack) - try self.addMemArg(.i32_store8, .{ .offset = lhs_base + offset, .alignment = 1 }); +fn memcpy(self: *Self, dst: WValue, src: WValue, len: WValue) !void { + // When bulk_memory is enabled, we lower it to wasm's memcpy instruction. + // If not, we lower it ourselves manually + if (std.Target.wasm.featureSetHas(self.target.cpu.features, .bulk_memory)) { + switch (dst) { + .stack_offset => try self.emitWValue(try self.buildPointerOffset(dst, 0, .new)), + else => try self.emitWValue(dst), + } + switch (src) { + .stack_offset => try self.emitWValue(try self.buildPointerOffset(src, 0, .new)), + else => try self.emitWValue(src), + } + try self.emitWValue(len); + try self.addExtended(.memory_copy); + return; + } + + // when the length is comptime-known, rather than a runtime value, we can optimize the generated code by having + // the loop during codegen, rather than inserting a runtime loop into the binary. + switch (len) { + .imm32, .imm64 => { + const length = switch (len) { + .imm32 => |val| val, + .imm64 => |val| val, + else => unreachable, + }; + var offset: u32 = 0; + const lhs_base = dst.offset(); + const rhs_base = src.offset(); + while (offset < length) : (offset += 1) { + // get dst's address to store the result + try self.emitWValue(dst); + // load byte from src's address + try self.emitWValue(src); + switch (self.arch()) { + .wasm32 => { + try self.addMemArg(.i32_load8_u, .{ .offset = rhs_base + offset, .alignment = 1 }); + try self.addMemArg(.i32_store8, .{ .offset = lhs_base + offset, .alignment = 1 }); + }, + .wasm64 => { + try self.addMemArg(.i64_load8_u, .{ .offset = rhs_base + offset, .alignment = 1 }); + try self.addMemArg(.i64_store8, .{ .offset = lhs_base + offset, .alignment = 1 }); + }, + else => unreachable, + } + } + }, + else => { + // TODO: We should probably lower this to a call to compiler_rt + // But for now, we implement it manually + const offset = try self.allocLocal(Type.usize); // local for counter + // outer block to jump to when loop is done + try self.startBlock(.block, wasm.block_empty); + try self.startBlock(.loop, wasm.block_empty); + + // loop condition (offset == length -> break) + { + try self.emitWValue(offset); + try self.emitWValue(len); + switch (self.arch()) { + .wasm32 => try self.addTag(.i32_eq), + .wasm64 => try self.addTag(.i64_eq), + else => unreachable, + } + try self.addLabel(.br_if, 1); // jump out of loop into outer block (finished) + } + + // get dst ptr + { + try self.emitWValue(dst); + try self.emitWValue(offset); + switch (self.arch()) { + .wasm32 => try self.addTag(.i32_add), + .wasm64 => try self.addTag(.i64_add), + else => unreachable, + } + } + + // get src value and also store in dst + { + try self.emitWValue(src); + try self.emitWValue(offset); + switch (self.arch()) { + .wasm32 => { + try self.addTag(.i32_add); + try self.addMemArg(.i32_load8_u, .{ .offset = src.offset(), .alignment = 1 }); + try self.addMemArg(.i32_store8, .{ .offset = dst.offset(), .alignment = 1 }); + }, + .wasm64 => { + try self.addTag(.i64_add); + try self.addMemArg(.i64_load8_u, .{ .offset = src.offset(), .alignment = 1 }); + try self.addMemArg(.i64_store8, .{ .offset = dst.offset(), .alignment = 1 }); + }, + else => unreachable, + } + } + + // increment loop counter + { + try self.emitWValue(offset); + switch (self.arch()) { + .wasm32 => { + try self.addImm32(1); + try self.addTag(.i32_add); + }, + .wasm64 => { + try self.addImm64(1); + try self.addTag(.i64_add); + }, + else => unreachable, + } + try self.addLabel(.local_set, offset.local); + try self.addLabel(.br, 0); // jump to start of loop + } + try self.endBlock(); // close off loop block + try self.endBlock(); // close off outer block + }, } } @@ -1298,6 +1399,8 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue { .wasm_memory_size => self.airWasmMemorySize(inst), .wasm_memory_grow => self.airWasmMemoryGrow(inst), + .memcpy => self.airMemcpy(inst), + .add_sat, .sub_sat, .mul_sat, @@ -1338,7 +1441,6 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue { .ptr_slice_len_ptr, .ptr_slice_ptr_ptr, .int_to_float, - .memcpy, .cmpxchg_weak, .cmpxchg_strong, .fence, @@ -1520,7 +1622,8 @@ fn store(self: *Self, lhs: WValue, rhs: WValue, ty: Type, offset: u32) InnerErro return self.store(lhs, rhs, err_ty, 0); } - return self.memCopy(ty, lhs, rhs); + const len = @intCast(u32, ty.abiSize(self.target)); + return self.memcpy(lhs, rhs, .{ .imm32 = len }); }, .Optional => { if (ty.isPtrLikeOptional()) { @@ -1532,10 +1635,12 @@ fn store(self: *Self, lhs: WValue, rhs: WValue, ty: Type, offset: u32) InnerErro return self.store(lhs, rhs, Type.u8, 0); } - return self.memCopy(ty, lhs, rhs); + const len = @intCast(u32, ty.abiSize(self.target)); + return self.memcpy(lhs, rhs, .{ .imm32 = len }); }, .Struct, .Array, .Union, .Vector => { - return self.memCopy(ty, lhs, rhs); + const len = @intCast(u32, ty.abiSize(self.target)); + return self.memcpy(lhs, rhs, .{ .imm32 = len }); }, .Pointer => { if (ty.isSlice()) { @@ -1550,7 +1655,8 @@ fn store(self: *Self, lhs: WValue, rhs: WValue, ty: Type, offset: u32) InnerErro } }, .Int => if (ty.intInfo(self.target).bits > 64) { - return self.memCopy(ty, lhs, rhs); + const len = @intCast(u32, ty.abiSize(self.target)); + return self.memcpy(lhs, rhs, .{ .imm32 = len }); }, else => {}, } @@ -2414,8 +2520,16 @@ fn airWrapErrUnionErr(self: *Self, inst: Air.Inst.Index) InnerError!WValue { if (!err_ty.errorUnionPayload().hasRuntimeBits()) return operand; const err_union = try self.allocStack(err_ty); - // TODO: Also write 'undefined' to the payload try self.store(err_union, operand, err_ty.errorUnionSet(), 0); + + // write 'undefined' to the payload + const err_align = err_ty.abiAlignment(self.target); + const set_size = err_ty.errorUnionSet().abiSize(self.target); + const offset = mem.alignForwardGeneric(u64, set_size, err_align); + const payload_ptr = try self.buildPointerOffset(err_union, offset, .new); + const len = @intCast(u32, err_ty.errorUnionPayload().abiSize(self.target)); + try self.memset(payload_ptr, .{ .imm32 = len }, .{ .imm32 = 0xaaaaaaaa }); + return err_union; } @@ -2867,7 +2981,7 @@ fn airMemset(self: *Self, inst: Air.Inst.Index) InnerError!WValue { const ptr = try self.resolveInst(pl_op.operand); const value = try self.resolveInst(bin_op.lhs); const len = try self.resolveInst(bin_op.rhs); - try self.memSet(ptr, len, value); + try self.memset(ptr, len, value); return WValue{ .none = {} }; } @@ -2876,7 +2990,7 @@ fn airMemset(self: *Self, inst: Air.Inst.Index) InnerError!WValue { /// When the user has enabled the bulk_memory feature, we lower /// this to wasm's memset instruction. When the feature is not present, /// we implement it manually. -fn memSet(self: *Self, ptr: WValue, len: WValue, value: WValue) InnerError!void { +fn memset(self: *Self, ptr: WValue, len: WValue, value: WValue) InnerError!void { // When bulk_memory is enabled, we lower it to wasm's memset instruction. // If not, we lower it ourselves if (std.Target.wasm.featureSetHas(self.target.cpu.features, .bulk_memory)) { @@ -2890,45 +3004,74 @@ fn memSet(self: *Self, ptr: WValue, len: WValue, value: WValue) InnerError!void return; } - // TODO: We should probably lower this to a call to compiler_rt - // But for now, we implement it manually - const offset = try self.allocLocal(Type.usize); // local for counter - // outer block to jump to when loop is done - try self.startBlock(.block, wasm.block_empty); - try self.startBlock(.loop, wasm.block_empty); - try self.emitWValue(offset); - try self.emitWValue(len); - switch (self.ptrSize()) { - 4 => try self.addTag(.i32_eq), - 8 => try self.addTag(.i64_eq), - else => unreachable, + // When the length is comptime-known we do the loop at codegen, rather + // than emitting a runtime loop into the binary + switch (len) { + .imm32, .imm64 => { + const length = switch (len) { + .imm32 => |val| val, + .imm64 => |val| val, + else => unreachable, + }; + + var offset: u32 = 0; + const base = ptr.offset(); + while (offset < length) : (offset += 1) { + try self.emitWValue(ptr); + try self.emitWValue(value); + switch (self.arch()) { + .wasm32 => { + try self.addMemArg(.i32_store8, .{ .offset = base + offset, .alignment = 1 }); + }, + .wasm64 => { + try self.addMemArg(.i64_store8, .{ .offset = base + offset, .alignment = 1 }); + }, + else => unreachable, + } + } + }, + else => { + // TODO: We should probably lower this to a call to compiler_rt + // But for now, we implement it manually + const offset = try self.allocLocal(Type.usize); // local for counter + // outer block to jump to when loop is done + try self.startBlock(.block, wasm.block_empty); + try self.startBlock(.loop, wasm.block_empty); + try self.emitWValue(offset); + try self.emitWValue(len); + switch (self.arch()) { + .wasm32 => try self.addTag(.i32_eq), + .wasm64 => try self.addTag(.i64_eq), + else => unreachable, + } + try self.addLabel(.br_if, 1); // jump out of loop into outer block (finished) + try self.emitWValue(ptr); + try self.emitWValue(offset); + switch (self.arch()) { + .wasm32 => try self.addTag(.i32_add), + .wasm64 => try self.addTag(.i64_add), + else => unreachable, + } + try self.emitWValue(value); + const mem_store_op: Mir.Inst.Tag = switch (self.arch()) { + .wasm32 => .i32_store8, + .wasm64 => .i64_store8, + else => unreachable, + }; + try self.addMemArg(mem_store_op, .{ .offset = ptr.offset(), .alignment = 1 }); + try self.emitWValue(offset); + try self.addImm32(1); + switch (self.arch()) { + .wasm32 => try self.addTag(.i32_add), + .wasm64 => try self.addTag(.i64_add), + else => unreachable, + } + try self.addLabel(.local_set, offset.local); + try self.addLabel(.br, 0); // jump to start of loop + try self.endBlock(); + try self.endBlock(); + }, } - try self.addLabel(.br_if, 1); // jump out of loop into outer block (finished) - try self.emitWValue(ptr); - try self.emitWValue(offset); - switch (self.arch()) { - .wasm32 => try self.addTag(.i32_add), - .wasm64 => try self.addTag(.i64_add), - else => unreachable, - } - try self.emitWValue(value); - const mem_store_op: Mir.Inst.Tag = switch (self.arch()) { - .wasm32 => .i32_store8, - .wasm64 => .i64_store8, - else => unreachable, - }; - try self.addMemArg(mem_store_op, .{ .offset = ptr.offset(), .alignment = 1 }); - try self.emitWValue(offset); - try self.addImm32(1); - switch (self.ptrSize()) { - 4 => try self.addTag(.i32_add), - 8 => try self.addTag(.i64_add), - else => unreachable, - } - try self.addLabel(.local_set, offset.local); - try self.addLabel(.br, 0); // jump to start of loop - try self.endBlock(); - try self.endBlock(); } fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) InnerError!WValue { @@ -3309,3 +3452,13 @@ fn airFieldParentPtr(self: *Self, inst: Air.Inst.Index) InnerError!WValue { try self.addLabel(.local_set, base.local); return base; } + +fn airMemcpy(self: *Self, inst: Air.Inst.Index) InnerError!WValue { + const pl_op = self.air.instructions.items(.data)[inst].pl_op; + const bin_op = self.air.extraData(Air.Bin, pl_op.payload).data; + const dst = try self.resolveInst(pl_op.operand); + const src = try self.resolveInst(bin_op.lhs); + const len = try self.resolveInst(bin_op.rhs); + try self.memcpy(dst, src, len); + return WValue{ .none = {} }; +} diff --git a/test/behavior/basic.zig b/test/behavior/basic.zig index f22e93008c..bb3232c01c 100644 --- a/test/behavior/basic.zig +++ b/test/behavior/basic.zig @@ -340,7 +340,6 @@ fn f2(x: bool) []const u8 { test "memcpy and memset intrinsics" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO try testMemcpyMemset(); // TODO add comptime test coverage