From 917b4ad5e0b21d37e3b0f69dc0db02a62cf2e75b Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Fri, 1 Dec 2023 20:34:26 -0500 Subject: [PATCH 01/10] x86_64: implement more atomic ops --- src/arch/x86_64/CodeGen.zig | 105 ++++++++++++++++++++++++++++++++---- test/behavior/atomics.zig | 2 - 2 files changed, 96 insertions(+), 11 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 14d2449ea4..9aa044f77e 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -14162,9 +14162,8 @@ fn atomicOp( }; defer if (mem_lock) |lock| self.register_manager.unlockReg(lock); - const method: enum { lock, loop, libcall } = if (val_ty.isRuntimeFloat()) - .loop - else switch (rmw_op orelse .Xchg) { + const use_sse = rmw_op orelse .Xchg != .Xchg and val_ty.isRuntimeFloat(); + const strat: enum { lock, loop, libcall } = if (use_sse) .loop else switch (rmw_op orelse .Xchg) { .Xchg, .Add, .Sub, @@ -14178,7 +14177,7 @@ fn atomicOp( .Min, => if (val_abi_size <= 16) .loop else .libcall, }; - switch (method) { + switch (strat) { .lock => { const tag: Mir.Inst.Tag = if (rmw_op) |op| switch (op) { .Xchg => if (unused) .mov else .xchg, @@ -14216,6 +14215,14 @@ fn atomicOp( return if (unused) .unreach else dst_mcv; }, .loop => _ = if (val_abi_size <= 8) { + const sse_reg: Register = if (use_sse) + try self.register_manager.allocReg(null, abi.RegisterClass.sse) + else + undefined; + const sse_lock = + if (use_sse) self.register_manager.lockRegAssumeUnused(sse_reg) else undefined; + defer if (use_sse) self.register_manager.unlockReg(sse_lock); + const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); const tmp_mcv = MCValue{ .register = tmp_reg }; const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); @@ -14223,10 +14230,67 @@ fn atomicOp( try self.asmRegisterMemory(.{ ._, .mov }, registerAlias(.rax, val_abi_size), ptr_mem); const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); - if (rmw_op != std.builtin.AtomicRmwOp.Xchg) { + if (!use_sse and rmw_op orelse .Xchg != .Xchg) { try self.genSetReg(tmp_reg, val_ty, .{ .register = .rax }); } - if (rmw_op) |op| switch (op) { + if (rmw_op) |op| if (use_sse) { + const mir_tag = @as(?Mir.Inst.FixedTag, switch (op) { + .Add => switch (val_ty.floatBits(self.target.*)) { + 32 => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add }, + 64 => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add }, + else => null, + }, + .Sub => switch (val_ty.floatBits(self.target.*)) { + 32 => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub }, + 64 => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub }, + else => null, + }, + .Min => switch (val_ty.floatBits(self.target.*)) { + 32 => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min }, + 64 => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min }, + else => null, + }, + .Max => switch (val_ty.floatBits(self.target.*)) { + 32 => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max }, + 64 => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max }, + else => null, + }, + else => unreachable, + }) orelse return self.fail("TODO implement atomicOp of {s} for {}", .{ + @tagName(op), val_ty.fmt(mod), + }); + try self.genSetReg(sse_reg, val_ty, .{ .register = .rax }); + switch (mir_tag[0]) { + .v_ss, .v_sd => if (val_mcv.isMemory()) try self.asmRegisterRegisterMemory( + mir_tag, + sse_reg.to128(), + sse_reg.to128(), + try val_mcv.mem(self, self.memSize(val_ty)), + ) else try self.asmRegisterRegisterRegister( + mir_tag, + sse_reg.to128(), + sse_reg.to128(), + (if (val_mcv.isRegister()) + val_mcv.getReg().? + else + try self.copyToTmpRegister(val_ty, val_mcv)).to128(), + ), + ._ss, ._sd => if (val_mcv.isMemory()) try self.asmRegisterMemory( + mir_tag, + sse_reg.to128(), + try val_mcv.mem(self, self.memSize(val_ty)), + ) else try self.asmRegisterRegister( + mir_tag, + sse_reg.to128(), + (if (val_mcv.isRegister()) + val_mcv.getReg().? + else + try self.copyToTmpRegister(val_ty, val_mcv)).to128(), + ), + else => unreachable, + } + try self.genSetReg(tmp_reg, val_ty, .{ .register = sse_reg }); + } else switch (op) { .Xchg => try self.genSetReg(tmp_reg, val_ty, val_mcv), .Add => try self.genBinOpMir(.{ ._, .add }, val_ty, tmp_mcv, val_mcv), .Sub => try self.genBinOpMir(.{ ._, .sub }, val_ty, tmp_mcv, val_mcv), @@ -14362,9 +14426,32 @@ fn atomicOp( try self.asmRegisterMemory(.{ ._, .xor }, .rbx, val_lo_mem); try self.asmRegisterMemory(.{ ._, .xor }, .rcx, val_hi_mem); }, - else => return self.fail("TODO implement x86 atomic loop for {} {s}", .{ - val_ty.fmt(mod), @tagName(op), - }), + .Min, .Max => { + const cc: Condition = switch (if (val_ty.isAbiInt(mod)) + val_ty.intInfo(mod).signedness + else + .unsigned) { + .unsigned => switch (op) { + .Min => .a, + .Max => .b, + else => unreachable, + }, + .signed => switch (op) { + .Min => .g, + .Max => .l, + else => unreachable, + }, + }; + + const tmp_reg = try self.copyToTmpRegister(Type.usize, .{ .register = .rcx }); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + try self.asmRegisterMemory(.{ ._, .cmp }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .sbb }, tmp_reg, val_hi_mem); + try self.asmCmovccRegisterMemory(cc, .rbx, val_lo_mem); + try self.asmCmovccRegisterMemory(cc, .rcx, val_hi_mem); + }, }; try self.asmMemory(.{ .@"lock _16b", .cmpxchg }, ptr_mem); _ = try self.asmJccReloc(.ne, loop); diff --git a/test/behavior/atomics.zig b/test/behavior/atomics.zig index 158c931eb6..fa817dd639 100644 --- a/test/behavior/atomics.zig +++ b/test/behavior/atomics.zig @@ -208,7 +208,6 @@ fn testAtomicStore() !void { } test "atomicrmw with floats" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO @@ -316,7 +315,6 @@ test "atomicrmw with 128-bit ints" { if (!supports_128_bit_atomics) return error.SkipZigTest; if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO // TODO "ld.lld: undefined symbol: __sync_lock_test_and_set_16" on -mcpu x86_64 if (builtin.cpu.arch == .x86_64 and builtin.zig_backend == .stage2_llvm) return error.SkipZigTest; From 014833b61fae62d75935606ff2946009c708fd58 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Fri, 1 Dec 2023 23:27:35 -0500 Subject: [PATCH 02/10] x86_64: implement more compliant vectors --- src/arch/x86_64/CodeGen.zig | 265 ++++++++++++++++++++++-------------- src/codegen.zig | 73 +++++++--- test/behavior/bitcast.zig | 1 - test/behavior/vector.zig | 1 - 4 files changed, 217 insertions(+), 123 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 9aa044f77e..1e62e8c720 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -5133,74 +5133,114 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { const mod = self.bin_file.options.module.?; const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; - const array_ty = self.typeOf(bin_op.lhs); - const array = try self.resolveInst(bin_op.lhs); - const array_lock: ?RegisterLock = switch (array) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (array_lock) |lock| self.register_manager.unlockReg(lock); + const result: MCValue = result: { + const array_ty = self.typeOf(bin_op.lhs); + const elem_ty = array_ty.childType(mod); - const elem_ty = array_ty.childType(mod); - const elem_abi_size = elem_ty.abiSize(mod); + const array_mcv = try self.resolveInst(bin_op.lhs); + const array_lock: ?RegisterLock = switch (array_mcv) { + .register => |reg| self.register_manager.lockRegAssumeUnused(reg), + else => null, + }; + defer if (array_lock) |lock| self.register_manager.unlockReg(lock); - const index_ty = self.typeOf(bin_op.rhs); - const index = try self.resolveInst(bin_op.rhs); - const index_lock: ?RegisterLock = switch (index) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (index_lock) |lock| self.register_manager.unlockReg(lock); + const index_ty = self.typeOf(bin_op.rhs); + const index_mcv = try self.resolveInst(bin_op.rhs); + const index_lock: ?RegisterLock = switch (index_mcv) { + .register => |reg| self.register_manager.lockRegAssumeUnused(reg), + else => null, + }; + defer if (index_lock) |lock| self.register_manager.unlockReg(lock); - const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); - defer self.register_manager.unlockReg(addr_lock); + try self.spillEflagsIfOccupied(); + if (array_ty.isVector(mod) and elem_ty.toIntern() == .bool_type) { + const index_reg = switch (index_mcv) { + .register => |reg| reg, + else => try self.copyToTmpRegister(index_ty, index_mcv), + }; + switch (array_mcv) { + .register => |array_reg| try self.asmRegisterRegister( + .{ ._, .bt }, + array_reg.to64(), + index_reg.to64(), + ), + .load_frame => try self.asmMemoryRegister( + .{ ._, .bt }, + try array_mcv.mem(self, .qword), + index_reg.to64(), + ), + .memory, .load_symbol, .load_direct, .load_got, .load_tlv => try self.asmMemoryRegister( + .{ ._, .bt }, + .{ + .base = .{ + .reg = try self.copyToTmpRegister(Type.usize, array_mcv.address()), + }, + .mod = .{ .rm = .{ .size = .qword } }, + }, + index_reg.to64(), + ), + else => return self.fail("TODO airArrayElemVal for {s} of {}", .{ + @tagName(array_mcv), array_ty.fmt(mod), + }), + } - switch (array) { - .register => { - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(array_ty, mod)); - try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array); - try self.asmRegisterMemory( + const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); + try self.asmSetccRegister(.c, dst_reg.to8()); + break :result .{ .register = dst_reg }; + } + + const elem_abi_size = elem_ty.abiSize(mod); + const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); + const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); + defer self.register_manager.unlockReg(addr_lock); + + switch (array_mcv) { + .register => { + const frame_index = try self.allocFrameIndex(FrameAlloc.initType(array_ty, mod)); + try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array_mcv); + try self.asmRegisterMemory( + .{ ._, .lea }, + addr_reg, + .{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{ .size = .qword } } }, + ); + }, + .load_frame => |frame_addr| try self.asmRegisterMemory( .{ ._, .lea }, addr_reg, - .{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{ .size = .qword } } }, - ); - }, - .load_frame => |frame_addr| try self.asmRegisterMemory( - .{ ._, .lea }, - addr_reg, - .{ - .base = .{ .frame = frame_addr.index }, - .mod = .{ .rm = .{ .size = .qword, .disp = frame_addr.off } }, - }, - ), - .memory, - .load_symbol, - .load_direct, - .load_got, - .load_tlv, - => try self.genSetReg(addr_reg, Type.usize, array.address()), - .lea_symbol, .lea_direct, .lea_tlv => unreachable, - else => return self.fail("TODO implement array_elem_val when array is {}", .{array}), - } + .{ + .base = .{ .frame = frame_addr.index }, + .mod = .{ .rm = .{ .size = .qword, .disp = frame_addr.off } }, + }, + ), + .memory, + .load_symbol, + .load_direct, + .load_got, + .load_tlv, + => try self.genSetReg(addr_reg, Type.usize, array_mcv.address()), + .lea_symbol, .lea_direct, .lea_tlv => unreachable, + else => return self.fail("TODO airArrayElemVal_val for {s} of {}", .{ + @tagName(array_mcv), array_ty.fmt(mod), + }), + } - const offset_reg = try self.elemOffset(index_ty, index, elem_abi_size); - const offset_lock = self.register_manager.lockRegAssumeUnused(offset_reg); - defer self.register_manager.unlockReg(offset_lock); + const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_abi_size); + const offset_lock = self.register_manager.lockRegAssumeUnused(offset_reg); + defer self.register_manager.unlockReg(offset_lock); - // TODO we could allocate register here, but need to expect addr register and potentially - // offset register. - try self.spillEflagsIfOccupied(); - const dst_mcv = try self.allocRegOrMem(inst, false); - try self.genBinOpMir( - .{ ._, .add }, - Type.usize, - .{ .register = addr_reg }, - .{ .register = offset_reg }, - ); - try self.genCopy(elem_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } }); - - return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); + // TODO we could allocate register here, but need to expect addr register and potentially + // offset register. + const dst_mcv = try self.allocRegOrMem(inst, false); + try self.genBinOpMir( + .{ ._, .add }, + Type.usize, + .{ .register = addr_reg }, + .{ .register = offset_reg }, + ); + try self.genCopy(elem_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } }); + break :result dst_mcv; + }; + return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } fn airPtrElemVal(self: *Self, inst: Air.Inst.Index) !void { @@ -5258,32 +5298,44 @@ fn airPtrElemPtr(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; - const ptr_ty = self.typeOf(extra.lhs); - const ptr = try self.resolveInst(extra.lhs); - const ptr_lock: ?RegisterLock = switch (ptr) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, + const result = result: { + const elem_ptr_ty = self.typeOfIndex(inst); + const base_ptr_ty = self.typeOf(extra.lhs); + + const base_ptr_mcv = try self.resolveInst(extra.lhs); + const base_ptr_lock: ?RegisterLock = switch (base_ptr_mcv) { + .register => |reg| self.register_manager.lockRegAssumeUnused(reg), + else => null, + }; + defer if (base_ptr_lock) |lock| self.register_manager.unlockReg(lock); + + if (elem_ptr_ty.ptrInfo(mod).flags.vector_index != .none) { + break :result if (self.reuseOperand(inst, extra.lhs, 0, base_ptr_mcv)) + base_ptr_mcv + else + try self.copyToRegisterWithInstTracking(inst, elem_ptr_ty, base_ptr_mcv); + } + + const elem_ty = base_ptr_ty.elemType2(mod); + const elem_abi_size = elem_ty.abiSize(mod); + const index_ty = self.typeOf(extra.rhs); + const index_mcv = try self.resolveInst(extra.rhs); + const index_lock: ?RegisterLock = switch (index_mcv) { + .register => |reg| self.register_manager.lockRegAssumeUnused(reg), + else => null, + }; + defer if (index_lock) |lock| self.register_manager.unlockReg(lock); + + const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_abi_size); + const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg); + defer self.register_manager.unlockReg(offset_reg_lock); + + const dst_mcv = try self.copyToRegisterWithInstTracking(inst, elem_ptr_ty, base_ptr_mcv); + try self.genBinOpMir(.{ ._, .add }, elem_ptr_ty, dst_mcv, .{ .register = offset_reg }); + + break :result dst_mcv; }; - defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock); - - const elem_ty = ptr_ty.elemType2(mod); - const elem_abi_size = elem_ty.abiSize(mod); - const index_ty = self.typeOf(extra.rhs); - const index = try self.resolveInst(extra.rhs); - const index_lock: ?RegisterLock = switch (index) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (index_lock) |lock| self.register_manager.unlockReg(lock); - - const offset_reg = try self.elemOffset(index_ty, index, elem_abi_size); - const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg); - defer self.register_manager.unlockReg(offset_reg_lock); - - const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ptr_ty, ptr); - try self.genBinOpMir(.{ ._, .add }, ptr_ty, dst_mcv, .{ .register = offset_reg }); - - return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none }); + return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, .none }); } fn airSetUnionTag(self: *Self, inst: Air.Inst.Index) !void { @@ -6712,19 +6764,20 @@ fn reuseOperandAdvanced( fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void { const mod = self.bin_file.options.module.?; - const ptr_info = ptr_ty.ptrInfo(mod); + const ptr_info = ptr_ty.ptrInfo(mod); const val_ty = Type.fromInterned(ptr_info.child); if (!val_ty.hasRuntimeBitsIgnoreComptime(mod)) return; const val_abi_size: u32 = @intCast(val_ty.abiSize(mod)); - if (ptr_info.packed_offset.bit_offset % 8 == 0) { - try self.load( - dst_mcv, - ptr_ty, - ptr_mcv.offset(@intCast(@divExact(ptr_info.packed_offset.bit_offset, 8))), - ); - const val_bit_size: u32 = @intCast(val_ty.bitSize(mod)); + const val_bit_size: u32 = @intCast(val_ty.bitSize(mod)); + const ptr_bit_off = ptr_info.packed_offset.bit_offset + switch (ptr_info.flags.vector_index) { + .none => 0, + .runtime => unreachable, + else => |vector_index| @intFromEnum(vector_index) * val_bit_size, + }; + if (ptr_bit_off % 8 == 0) { + try self.load(dst_mcv, ptr_ty, ptr_mcv.offset(@intCast(@divExact(ptr_bit_off, 8)))); if (val_abi_size * 8 > val_bit_size) { if (dst_mcv.isRegister()) { try self.truncateRegister(val_ty, dst_mcv.getReg().?); @@ -6746,9 +6799,8 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) Inn const limb_abi_size: u32 = @min(val_abi_size, 8); const limb_abi_bits = limb_abi_size * 8; - const val_byte_off: i32 = - @intCast(ptr_info.packed_offset.bit_offset / limb_abi_bits * limb_abi_size); - const val_bit_off = ptr_info.packed_offset.bit_offset % limb_abi_bits; + const val_byte_off: i32 = @intCast(ptr_bit_off / limb_abi_bits * limb_abi_size); + const val_bit_off = ptr_bit_off % limb_abi_bits; const val_extra_bits = self.regExtraBits(val_ty); const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv); @@ -6875,7 +6927,8 @@ fn airLoad(self: *Self, inst: Air.Inst.Index) !void { else try self.allocRegOrMem(inst, true); - if (ptr_ty.ptrInfo(mod).packed_offset.host_size > 0) { + const ptr_info = ptr_ty.ptrInfo(mod); + if (ptr_info.flags.vector_index != .none or ptr_info.packed_offset.host_size > 0) { try self.packedLoad(dst_mcv, ptr_ty, ptr_mcv); } else { try self.load(dst_mcv, ptr_ty, ptr_mcv); @@ -6909,7 +6962,7 @@ fn airLoad(self: *Self, inst: Air.Inst.Index) !void { fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerError!void { const mod = self.bin_file.options.module.?; const ptr_info = ptr_ty.ptrInfo(mod); - const src_ty = ptr_ty.childType(mod); + const src_ty = Type.fromInterned(ptr_info.child); if (!src_ty.hasRuntimeBitsIgnoreComptime(mod)) return; const limb_abi_size: u16 = @min(ptr_info.packed_offset.host_size, 8); @@ -6917,8 +6970,13 @@ fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) In const limb_ty = try mod.intType(.unsigned, limb_abi_bits); const src_bit_size = src_ty.bitSize(mod); - const src_byte_off: i32 = @intCast(ptr_info.packed_offset.bit_offset / limb_abi_bits * limb_abi_size); - const src_bit_off = ptr_info.packed_offset.bit_offset % limb_abi_bits; + const ptr_bit_off = ptr_info.packed_offset.bit_offset + switch (ptr_info.flags.vector_index) { + .none => 0, + .runtime => unreachable, + else => |vector_index| @intFromEnum(vector_index) * src_bit_size, + }; + const src_byte_off: i32 = @intCast(ptr_bit_off / limb_abi_bits * limb_abi_size); + const src_bit_off = ptr_bit_off % limb_abi_bits; const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv); const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg); @@ -7055,11 +7113,14 @@ fn airStore(self: *Self, inst: Air.Inst.Index, safety: bool) !void { const ptr_mcv = try self.resolveInst(bin_op.lhs); const ptr_ty = self.typeOf(bin_op.lhs); const src_mcv = try self.resolveInst(bin_op.rhs); - if (ptr_ty.ptrInfo(mod).packed_offset.host_size > 0) { + + const ptr_info = ptr_ty.ptrInfo(mod); + if (ptr_info.flags.vector_index != .none or ptr_info.packed_offset.host_size > 0) { try self.packedStore(ptr_ty, ptr_mcv, src_mcv); } else { try self.store(ptr_ty, ptr_mcv, src_mcv); } + return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -12777,7 +12838,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo else => { const classes = mem.sliceTo(&abi.classifySystemV(ty, mod, .other), .none); assert(std.mem.indexOfNone(abi.Class, classes, &.{ - .integer, .sse, .float, .float_combine, + .integer, .sse, .memory, .float, .float_combine, }) == null); const abi_size = ty.abiSize(mod); if (abi_size < 4 or diff --git a/src/codegen.zig b/src/codegen.zig index b3f896c616..f1758f978d 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -389,31 +389,66 @@ pub fn generateSymbol( }, }, .vector_type => |vector_type| { - switch (aggregate.storage) { - .bytes => |bytes| try code.appendSlice(bytes), - .elems, .repeated_elem => { - var index: u64 = 0; - while (index < vector_type.len) : (index += 1) { - switch (try generateSymbol(bin_file, src_loc, .{ - .ty = Type.fromInterned(vector_type.child), - .val = Value.fromInterned(switch (aggregate.storage) { - .bytes => unreachable, - .elems => |elems| elems[@as(usize, @intCast(index))], - .repeated_elem => |elem| elem, - }), - }, code, debug_output, reloc_info)) { - .ok => {}, - .fail => |em| return .{ .fail = em }, - } + const abi_size = math.cast(usize, typed_value.ty.abiSize(mod)) orelse + return error.Overflow; + switch (vector_type.child) { + .bool_type => { + const bytes = try code.addManyAsSlice(abi_size); + @memset(bytes, 0xaa); + var index: usize = 0; + const len = math.cast(usize, vector_type.len) orelse return error.Overflow; + while (index < len) : (index += 1) { + const bit_index = switch (endian) { + .big => len - 1 - index, + .little => index, + }; + const byte = &bytes[bit_index / 8]; + const mask = @as(u8, 1) << @truncate(bit_index); + if (switch (switch (aggregate.storage) { + .bytes => unreachable, + .elems => |elems| elems[index], + .repeated_elem => |elem| elem, + }) { + .bool_true => true, + .bool_false => false, + else => |elem| { + assert(mod.intern_pool.indexToKey(elem).undef == .bool_type); + continue; + }, + }) byte.* |= mask else byte.* &= ~mask; } }, + else => switch (aggregate.storage) { + .bytes => |bytes| try code.appendSlice(bytes), + .elems, .repeated_elem => { + var index: u64 = 0; + while (index < vector_type.len) : (index += 1) { + switch (try generateSymbol(bin_file, src_loc, .{ + .ty = Type.fromInterned(vector_type.child), + .val = Value.fromInterned(switch (aggregate.storage) { + .bytes => unreachable, + .elems => |elems| elems[ + math.cast(usize, index) orelse return error.Overflow + ], + .repeated_elem => |elem| elem, + }), + }, code, debug_output, reloc_info)) { + .ok => {}, + .fail => |em| return .{ .fail = em }, + } + } + }, + }, } - const padding = math.cast(usize, typed_value.ty.abiSize(mod) - - (math.divCeil(u64, Type.fromInterned(vector_type.child).bitSize(mod) * vector_type.len, 8) catch |err| switch (err) { + const padding = abi_size - (math.cast(usize, math.divCeil( + u64, + Type.fromInterned(vector_type.child).bitSize(mod) * vector_type.len, + 8, + ) catch |err| switch (err) { error.DivisionByZero => unreachable, else => |e| return e, - })) orelse return error.Overflow; + }) orelse return error.Overflow); if (padding > 0) try code.appendNTimes(0, padding); }, .anon_struct_type => |tuple| { diff --git a/test/behavior/bitcast.zig b/test/behavior/bitcast.zig index a6e281e2dc..e0c305d540 100644 --- a/test/behavior/bitcast.zig +++ b/test/behavior/bitcast.zig @@ -393,7 +393,6 @@ test "bitcast vector to integer and back" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index 521fb7f6a2..8cb1a8bb66 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -1234,7 +1234,6 @@ test "array of vectors is copied" { test "byte vector initialized in inline function" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO From e00f1397e33715512f205eeac240d836b0d1b172 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sat, 2 Dec 2023 13:02:45 -0500 Subject: [PATCH 03/10] x86_64: implement some todos --- src/arch/x86_64/CodeGen.zig | 296 +++++++++++++++++++----------------- test/behavior/bugs/2114.zig | 3 +- test/behavior/cast.zig | 1 - test/behavior/int128.zig | 1 - 4 files changed, 155 insertions(+), 146 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 1e62e8c720..9185f19ca8 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -3104,18 +3104,17 @@ fn airSlice(self: *Self, inst: Air.Inst.Index) !void { const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; const slice_ty = self.typeOfIndex(inst); - const ptr = try self.resolveInst(bin_op.lhs); - const ptr_ty = self.typeOf(bin_op.lhs); - const len = try self.resolveInst(bin_op.rhs); - const len_ty = self.typeOf(bin_op.rhs); - const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(slice_ty, mod)); - try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, ptr); + + const ptr_ty = self.typeOf(bin_op.lhs); + try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, .{ .air_ref = bin_op.lhs }); + + const len_ty = self.typeOf(bin_op.rhs); try self.genSetMem( .{ .frame = frame_index }, @intCast(ptr_ty.abiSize(mod)), len_ty, - len, + .{ .air_ref = bin_op.rhs }, ); const result = MCValue{ .load_frame = .{ .index = frame_index } }; @@ -7099,28 +7098,26 @@ fn store(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerErr fn airStore(self: *Self, inst: Air.Inst.Index, safety: bool) !void { const mod = self.bin_file.options.module.?; - if (safety) { - // TODO if the value is undef, write 0xaa bytes to dest - } else { - // TODO if the value is undef, don't lower this instruction - } - - try self.spillRegisters(&.{ .rdi, .rsi, .rcx }); - const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx }); - defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); - const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; - const ptr_mcv = try self.resolveInst(bin_op.lhs); - const ptr_ty = self.typeOf(bin_op.lhs); - const src_mcv = try self.resolveInst(bin_op.rhs); - const ptr_info = ptr_ty.ptrInfo(mod); - if (ptr_info.flags.vector_index != .none or ptr_info.packed_offset.host_size > 0) { - try self.packedStore(ptr_ty, ptr_mcv, src_mcv); - } else { - try self.store(ptr_ty, ptr_mcv, src_mcv); + result: { + if (!safety and (try self.resolveInst(bin_op.rhs)) == .undef) break :result; + + try self.spillRegisters(&.{ .rdi, .rsi, .rcx }); + const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx }); + defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); + + const src_mcv = try self.resolveInst(bin_op.rhs); + const ptr_mcv = try self.resolveInst(bin_op.lhs); + const ptr_ty = self.typeOf(bin_op.lhs); + + const ptr_info = ptr_ty.ptrInfo(mod); + if (ptr_info.flags.vector_index != .none or ptr_info.packed_offset.host_size > 0) { + try self.packedStore(ptr_ty, ptr_mcv, src_mcv); + } else { + try self.store(ptr_ty, ptr_mcv, src_mcv); + } } - return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -11549,7 +11546,6 @@ fn genCondBrMir(self: *Self, ty: Type, mcv: MCValue) !Mir.Inst.Index { }, else => return self.fail("TODO implement condbr when condition is {s}", .{@tagName(mcv)}), } - return 0; // TODO } fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { @@ -12336,7 +12332,18 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { // for the string, we still use the next u32 for the null terminator. extra_i += clobber.len / 4 + 1; - // TODO honor these + if (std.mem.eql(u8, clobber, "") or std.mem.eql(u8, clobber, "memory")) { + // ok, sure + } else if (std.mem.eql(u8, clobber, "cc") or + std.mem.eql(u8, clobber, "flags") or + std.mem.eql(u8, clobber, "eflags") or + std.mem.eql(u8, clobber, "rflags")) + { + try self.spillEflagsIfOccupied(); + } else { + try self.register_manager.getReg(parseRegName(clobber) orelse + return self.fail("invalid clobber: '{s}'", .{clobber}), null); + } } } @@ -13517,7 +13524,11 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal }; switch (src_mcv) { .none, .unreach, .dead, .reserved_frame => unreachable, - .undef => {}, + .undef => try self.genInlineMemset( + dst_ptr_mcv, + .{ .immediate = 0xaa }, + .{ .immediate = abi_size }, + ), .immediate => |imm| switch (abi_size) { 1, 2, 4 => { const immediate = switch (if (ty.isAbiInt(mod)) @@ -14596,128 +14607,129 @@ fn airAtomicStore(self: *Self, inst: Air.Inst.Index, order: std.builtin.AtomicOr fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void { const mod = self.bin_file.options.module.?; - if (safety) { - // TODO if the value is undef, write 0xaa bytes to dest - } else { - // TODO if the value is undef, don't lower this instruction - } - const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; - try self.spillRegisters(&.{ .rdi, .rsi, .rcx }); - const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx }); - defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); + result: { + if (!safety and (try self.resolveInst(bin_op.rhs)) == .undef) break :result; - const dst_ptr = try self.resolveInst(bin_op.lhs); - const dst_ptr_ty = self.typeOf(bin_op.lhs); - const dst_ptr_lock: ?RegisterLock = switch (dst_ptr) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (dst_ptr_lock) |lock| self.register_manager.unlockReg(lock); + try self.spillRegisters(&.{ .rax, .rdi, .rsi, .rcx }); + const reg_locks = self.register_manager.lockRegsAssumeUnused(4, .{ .rax, .rdi, .rsi, .rcx }); + defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); - const src_val = try self.resolveInst(bin_op.rhs); - const elem_ty = self.typeOf(bin_op.rhs); - const src_val_lock: ?RegisterLock = switch (src_val) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (src_val_lock) |lock| self.register_manager.unlockReg(lock); - - const elem_abi_size: u31 = @intCast(elem_ty.abiSize(mod)); - - if (elem_abi_size == 1) { - const ptr: MCValue = switch (dst_ptr_ty.ptrSize(mod)) { - // TODO: this only handles slices stored in the stack - .Slice => dst_ptr, - .One => dst_ptr, - .C, .Many => unreachable, - }; - const len: MCValue = switch (dst_ptr_ty.ptrSize(mod)) { - // TODO: this only handles slices stored in the stack - .Slice => dst_ptr.address().offset(8).deref(), - .One => .{ .immediate = dst_ptr_ty.childType(mod).arrayLen(mod) }, - .C, .Many => unreachable, - }; - const len_lock: ?RegisterLock = switch (len) { + const dst_ptr = try self.resolveInst(bin_op.lhs); + const dst_ptr_ty = self.typeOf(bin_op.lhs); + const dst_ptr_lock: ?RegisterLock = switch (dst_ptr) { .register => |reg| self.register_manager.lockRegAssumeUnused(reg), else => null, }; - defer if (len_lock) |lock| self.register_manager.unlockReg(lock); + defer if (dst_ptr_lock) |lock| self.register_manager.unlockReg(lock); - try self.genInlineMemset(ptr, src_val, len); - return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none }); + const src_val = try self.resolveInst(bin_op.rhs); + const elem_ty = self.typeOf(bin_op.rhs); + const src_val_lock: ?RegisterLock = switch (src_val) { + .register => |reg| self.register_manager.lockRegAssumeUnused(reg), + else => null, + }; + defer if (src_val_lock) |lock| self.register_manager.unlockReg(lock); + + const elem_abi_size: u31 = @intCast(elem_ty.abiSize(mod)); + + if (elem_abi_size == 1) { + const ptr: MCValue = switch (dst_ptr_ty.ptrSize(mod)) { + // TODO: this only handles slices stored in the stack + .Slice => dst_ptr, + .One => dst_ptr, + .C, .Many => unreachable, + }; + const len: MCValue = switch (dst_ptr_ty.ptrSize(mod)) { + // TODO: this only handles slices stored in the stack + .Slice => dst_ptr.address().offset(8).deref(), + .One => .{ .immediate = dst_ptr_ty.childType(mod).arrayLen(mod) }, + .C, .Many => unreachable, + }; + const len_lock: ?RegisterLock = switch (len) { + .register => |reg| self.register_manager.lockRegAssumeUnused(reg), + else => null, + }; + defer if (len_lock) |lock| self.register_manager.unlockReg(lock); + + try self.genInlineMemset(ptr, src_val, len); + break :result; + } + + // Store the first element, and then rely on memcpy copying forwards. + // Length zero requires a runtime check - so we handle arrays specially + // here to elide it. + switch (dst_ptr_ty.ptrSize(mod)) { + .Slice => { + const slice_ptr_ty = dst_ptr_ty.slicePtrFieldType(mod); + + // TODO: this only handles slices stored in the stack + const ptr = dst_ptr; + const len = dst_ptr.address().offset(8).deref(); + + // Used to store the number of elements for comparison. + // After comparison, updated to store number of bytes needed to copy. + const len_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); + const len_mcv: MCValue = .{ .register = len_reg }; + const len_lock = self.register_manager.lockRegAssumeUnused(len_reg); + defer self.register_manager.unlockReg(len_lock); + + try self.genSetReg(len_reg, Type.usize, len); + try self.asmRegisterRegister(.{ ._, .@"test" }, len_reg, len_reg); + + const skip_reloc = try self.asmJccReloc(.z, undefined); + try self.store(slice_ptr_ty, ptr, src_val); + + const second_elem_ptr_reg = + try self.register_manager.allocReg(null, abi.RegisterClass.gp); + const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg }; + const second_elem_ptr_lock = + self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg); + defer self.register_manager.unlockReg(second_elem_ptr_lock); + + try self.genSetReg(second_elem_ptr_reg, Type.usize, .{ .register_offset = .{ + .reg = try self.copyToTmpRegister(Type.usize, ptr), + .off = elem_abi_size, + } }); + + try self.genBinOpMir(.{ ._, .sub }, Type.usize, len_mcv, .{ .immediate = 1 }); + try self.asmRegisterRegisterImmediate( + .{ .i_, .mul }, + len_reg, + len_reg, + Immediate.s(elem_abi_size), + ); + try self.genInlineMemcpy(second_elem_ptr_mcv, ptr, len_mcv); + + try self.performReloc(skip_reloc); + }, + .One => { + const elem_ptr_ty = try mod.singleMutPtrType(elem_ty); + + const len = dst_ptr_ty.childType(mod).arrayLen(mod); + + assert(len != 0); // prevented by Sema + try self.store(elem_ptr_ty, dst_ptr, src_val); + + const second_elem_ptr_reg = + try self.register_manager.allocReg(null, abi.RegisterClass.gp); + const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg }; + const second_elem_ptr_lock = + self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg); + defer self.register_manager.unlockReg(second_elem_ptr_lock); + + try self.genSetReg(second_elem_ptr_reg, Type.usize, .{ .register_offset = .{ + .reg = try self.copyToTmpRegister(Type.usize, dst_ptr), + .off = elem_abi_size, + } }); + + const bytes_to_copy: MCValue = .{ .immediate = elem_abi_size * (len - 1) }; + try self.genInlineMemcpy(second_elem_ptr_mcv, dst_ptr, bytes_to_copy); + }, + .C, .Many => unreachable, + } } - - // Store the first element, and then rely on memcpy copying forwards. - // Length zero requires a runtime check - so we handle arrays specially - // here to elide it. - switch (dst_ptr_ty.ptrSize(mod)) { - .Slice => { - const slice_ptr_ty = dst_ptr_ty.slicePtrFieldType(mod); - - // TODO: this only handles slices stored in the stack - const ptr = dst_ptr; - const len = dst_ptr.address().offset(8).deref(); - - // Used to store the number of elements for comparison. - // After comparison, updated to store number of bytes needed to copy. - const len_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const len_mcv: MCValue = .{ .register = len_reg }; - const len_lock = self.register_manager.lockRegAssumeUnused(len_reg); - defer self.register_manager.unlockReg(len_lock); - - try self.genSetReg(len_reg, Type.usize, len); - try self.asmRegisterRegister(.{ ._, .@"test" }, len_reg, len_reg); - - const skip_reloc = try self.asmJccReloc(.z, undefined); - try self.store(slice_ptr_ty, ptr, src_val); - - const second_elem_ptr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg }; - const second_elem_ptr_lock = self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg); - defer self.register_manager.unlockReg(second_elem_ptr_lock); - - try self.genSetReg(second_elem_ptr_reg, Type.usize, .{ .register_offset = .{ - .reg = try self.copyToTmpRegister(Type.usize, ptr), - .off = elem_abi_size, - } }); - - try self.genBinOpMir(.{ ._, .sub }, Type.usize, len_mcv, .{ .immediate = 1 }); - try self.asmRegisterRegisterImmediate( - .{ .i_, .mul }, - len_reg, - len_reg, - Immediate.s(elem_abi_size), - ); - try self.genInlineMemcpy(second_elem_ptr_mcv, ptr, len_mcv); - - try self.performReloc(skip_reloc); - }, - .One => { - const elem_ptr_ty = try mod.singleMutPtrType(elem_ty); - - const len = dst_ptr_ty.childType(mod).arrayLen(mod); - - assert(len != 0); // prevented by Sema - try self.store(elem_ptr_ty, dst_ptr, src_val); - - const second_elem_ptr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg }; - const second_elem_ptr_lock = self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg); - defer self.register_manager.unlockReg(second_elem_ptr_lock); - - try self.genSetReg(second_elem_ptr_reg, Type.usize, .{ .register_offset = .{ - .reg = try self.copyToTmpRegister(Type.usize, dst_ptr), - .off = elem_abi_size, - } }); - - const bytes_to_copy: MCValue = .{ .immediate = elem_abi_size * (len - 1) }; - try self.genInlineMemcpy(second_elem_ptr_mcv, dst_ptr, bytes_to_copy); - }, - .C, .Many => unreachable, - } - return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none }); } diff --git a/test/behavior/bugs/2114.zig b/test/behavior/bugs/2114.zig index c785928024..dfbf58d333 100644 --- a/test/behavior/bugs/2114.zig +++ b/test/behavior/bugs/2114.zig @@ -9,8 +9,7 @@ fn ctz(x: anytype) usize { test "fixed" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64 and - !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .bmi)) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest; if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO diff --git a/test/behavior/cast.zig b/test/behavior/cast.zig index 3bbb1e9a9a..9259e51314 100644 --- a/test/behavior/cast.zig +++ b/test/behavior/cast.zig @@ -336,7 +336,6 @@ test "array coercion to undefined at runtime" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; @setRuntimeSafety(true); diff --git a/test/behavior/int128.zig b/test/behavior/int128.zig index 7287cd1ab2..6d7b54ea31 100644 --- a/test/behavior/int128.zig +++ b/test/behavior/int128.zig @@ -28,7 +28,6 @@ test "undefined 128 bit int" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; @setRuntimeSafety(true); From 82ba9b8560f6f173953e6d89321235036b98bafe Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sun, 3 Dec 2023 11:35:59 -0500 Subject: [PATCH 04/10] print_air: fix printing of instruction indices --- src/print_air.zig | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/print_air.zig b/src/print_air.zig index 0ae9b37df3..7cc09e9f99 100644 --- a/src/print_air.zig +++ b/src/print_air.zig @@ -97,7 +97,7 @@ const Writer = struct { const tag = w.air.instructions.items(.tag)[@intFromEnum(inst)]; try s.writeByteNTimes(' ', w.indent); try s.print("%{d}{c}= {s}(", .{ - inst, + @intFromEnum(inst), @as(u8, if (if (w.liveness) |liveness| liveness.isUnused(inst) else false) '!' else ' '), @tagName(tag), }); @@ -388,7 +388,7 @@ const Writer = struct { try s.writeAll("}"); for (liveness_block.deaths) |operand| { - try s.print(" %{d}!", .{operand}); + try s.print(" %{d}!", .{@intFromEnum(operand)}); } } @@ -715,7 +715,7 @@ const Writer = struct { try s.writeByteNTimes(' ', w.indent); for (liveness_condbr.else_deaths, 0..) |operand, i| { if (i != 0) try s.writeAll(" "); - try s.print("%{d}!", .{operand}); + try s.print("%{d}!", .{@intFromEnum(operand)}); } try s.writeAll("\n"); } @@ -726,7 +726,7 @@ const Writer = struct { try s.writeAll("}"); for (liveness_condbr.then_deaths) |operand| { - try s.print(" %{d}!", .{operand}); + try s.print(" %{d}!", .{@intFromEnum(operand)}); } } @@ -752,7 +752,7 @@ const Writer = struct { try s.writeByteNTimes(' ', w.indent); for (liveness_condbr.else_deaths, 0..) |operand, i| { if (i != 0) try s.writeAll(" "); - try s.print("%{d}!", .{operand}); + try s.print("%{d}!", .{@intFromEnum(operand)}); } try s.writeAll("\n"); } @@ -763,7 +763,7 @@ const Writer = struct { try s.writeAll("}"); for (liveness_condbr.then_deaths) |operand| { - try s.print(" %{d}!", .{operand}); + try s.print(" %{d}!", .{@intFromEnum(operand)}); } } @@ -787,7 +787,7 @@ const Writer = struct { try s.writeByteNTimes(' ', w.indent); for (liveness_condbr.then_deaths, 0..) |operand, i| { if (i != 0) try s.writeAll(" "); - try s.print("%{d}!", .{operand}); + try s.print("%{d}!", .{@intFromEnum(operand)}); } try s.writeAll("\n"); } @@ -800,7 +800,7 @@ const Writer = struct { try s.writeByteNTimes(' ', w.indent); for (liveness_condbr.else_deaths, 0..) |operand, i| { if (i != 0) try s.writeAll(" "); - try s.print("%{d}!", .{operand}); + try s.print("%{d}!", .{@intFromEnum(operand)}); } try s.writeAll("\n"); } @@ -852,7 +852,7 @@ const Writer = struct { try s.writeByteNTimes(' ', w.indent); for (deaths, 0..) |operand, i| { if (i != 0) try s.writeAll(" "); - try s.print("%{d}!", .{operand}); + try s.print("%{d}!", .{@intFromEnum(operand)}); } try s.writeAll("\n"); } @@ -873,7 +873,7 @@ const Writer = struct { try s.writeByteNTimes(' ', w.indent); for (deaths, 0..) |operand, i| { if (i != 0) try s.writeAll(" "); - try s.print("%{d}!", .{operand}); + try s.print("%{d}!", .{@intFromEnum(operand)}); } try s.writeAll("\n"); } @@ -957,7 +957,7 @@ const Writer = struct { dies: bool, ) @TypeOf(s).Error!void { _ = w; - try s.print("%{d}", .{inst}); + try s.print("%{d}", .{@intFromEnum(inst)}); if (dies) try s.writeByte('!'); } From 7c85ea65ba9f85be44aa8af3745a6038b132bd7f Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sun, 3 Dec 2023 13:55:16 -0500 Subject: [PATCH 05/10] x86_64: "implement" `aggregate_init` for vectors --- src/arch/x86_64/CodeGen.zig | 30 ++++++++++++++++++++---------- test/behavior/cast.zig | 9 +++------ test/behavior/vector.zig | 2 +- 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 9185f19ca8..2eb45203d3 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -8480,7 +8480,8 @@ fn genBinOp( if (maybe_mask_reg) |mask_reg| self.register_manager.lockRegAssumeUnused(mask_reg) else null; defer if (mask_lock) |lock| self.register_manager.unlockReg(lock); - const ordered_air = if (lhs_ty.isVector(mod) and switch (lhs_ty.childType(mod).zigTypeTag(mod)) { + const ordered_air: [2]Air.Inst.Ref = if (lhs_ty.isVector(mod) and + switch (lhs_ty.childType(mod).zigTypeTag(mod)) { .Int => switch (air_tag) { .cmp_lt, .cmp_gte => true, else => false, @@ -8490,14 +8491,24 @@ fn genBinOp( else => false, }, else => unreachable, - }) .{ .lhs = rhs_air, .rhs = lhs_air } else .{ .lhs = lhs_air, .rhs = rhs_air }; + }) .{ rhs_air, lhs_air } else .{ lhs_air, rhs_air }; - const lhs_mcv = try self.resolveInst(ordered_air.lhs); - var rhs_mcv = try self.resolveInst(ordered_air.rhs); + if (lhs_ty.isAbiInt(mod)) for (ordered_air) |op_air| { + switch (try self.resolveInst(op_air)) { + .register => |op_reg| switch (op_reg.class()) { + .sse => try self.register_manager.getReg(op_reg, null), + else => {}, + }, + else => {}, + } + }; + + const lhs_mcv = try self.resolveInst(ordered_air[0]); + var rhs_mcv = try self.resolveInst(ordered_air[1]); switch (lhs_mcv) { .immediate => |imm| switch (imm) { 0 => switch (air_tag) { - .sub, .sub_wrap => return self.genUnOp(maybe_inst, .neg, ordered_air.rhs), + .sub, .sub_wrap => return self.genUnOp(maybe_inst, .neg, ordered_air[1]), else => {}, }, else => {}, @@ -8549,10 +8560,10 @@ fn genBinOp( }; if (maybe_inst) |inst| { if ((!sse_op or lhs_mcv.isRegister()) and - self.reuseOperandAdvanced(inst, ordered_air.lhs, 0, lhs_mcv, tracked_inst)) + self.reuseOperandAdvanced(inst, ordered_air[0], 0, lhs_mcv, tracked_inst)) break :dst lhs_mcv; if (is_commutative and (!sse_op or rhs_mcv.isRegister()) and - self.reuseOperandAdvanced(inst, ordered_air.rhs, 1, rhs_mcv, tracked_inst)) + self.reuseOperandAdvanced(inst, ordered_air[1], 1, rhs_mcv, tracked_inst)) { flipped = true; break :dst rhs_mcv; @@ -8563,7 +8574,7 @@ fn genBinOp( copied_to_dst = false else try self.genCopy(lhs_ty, dst_mcv, lhs_mcv); - rhs_mcv = try self.resolveInst(ordered_air.rhs); + rhs_mcv = try self.resolveInst(ordered_air[1]); break :dst dst_mcv; }; const dst_locks: [2]?RegisterLock = switch (dst_mcv) { @@ -15445,7 +15456,7 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { } break :result .{ .load_frame = .{ .index = frame_index } }; }, - .Array => { + .Array, .Vector => { const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(result_ty, mod)); const elem_ty = result_ty.childType(mod); const elem_size: u32 = @intCast(elem_ty.abiSize(mod)); @@ -15467,7 +15478,6 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { ); break :result .{ .load_frame = .{ .index = frame_index } }; }, - .Vector => return self.fail("TODO implement aggregate_init for vectors", .{}), else => unreachable, } }; diff --git a/test/behavior/cast.zig b/test/behavior/cast.zig index 9259e51314..a61c4fb29a 100644 --- a/test/behavior/cast.zig +++ b/test/behavior/cast.zig @@ -1476,7 +1476,7 @@ test "coerce between pointers of compatible differently-named floats" { if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest; - if (builtin.os.tag == .windows) { + if (builtin.zig_backend == .stage2_llvm and builtin.os.tag == .windows) { // https://github.com/ziglang/zig/issues/12396 return error.SkipZigTest; } @@ -1723,7 +1723,6 @@ test "peer type resolution: array and vector with same child type" { test "peer type resolution: array with smaller child type and vector with larger child type" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; // TODO @@ -2311,11 +2310,11 @@ test "cast builtins can wrap result in error union and optional" { test "@floatCast on vector" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest; const S = struct { fn doTheTest() !void { @@ -2332,7 +2331,6 @@ test "@floatCast on vector" { test "@ptrFromInt on vector" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -2382,11 +2380,11 @@ test "@intFromPtr on vector" { test "@floatFromInt on vector" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest; const S = struct { fn doTheTest() !void { @@ -2403,7 +2401,6 @@ test "@floatFromInt on vector" { test "@intFromFloat on vector" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index 8cb1a8bb66..667bf3d898 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -222,7 +222,7 @@ test "array to vector with element type coercion" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest; const S = struct { fn doTheTest() !void { From 0be7c23f111eeda5ed65c065ad65a6febd12f20c Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sun, 3 Dec 2023 18:07:49 -0500 Subject: [PATCH 06/10] Coff: minor fixes * Update the msdos stub to be eight bytes smaller, which moves the machine PE header field into the first 128 bytes of the file, allowing it to be matched by a binfmt_misc magic sequence. This allows the build system to get the correct error during exec. * Fix library name memory leaks in Sema. --- src/Sema.zig | 30 +++++++++++++----------------- src/link/Coff.zig | 2 ++ src/link/msdos-stub.bin | Bin 128 -> 120 bytes 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/src/Sema.zig b/src/Sema.zig index 6d6f8d8fb6..752a5b1023 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -9069,7 +9069,7 @@ fn handleExternLibName( block: *Block, src_loc: LazySrcLoc, lib_name: []const u8, -) CompileError![:0]u8 { +) CompileError!void { blk: { const mod = sema.mod; const comp = mod.comp; @@ -9117,7 +9117,6 @@ fn handleExternLibName( }); }; } - return sema.gpa.dupeZ(u8, lib_name); } /// These are calling conventions that are confirmed to work with variadic functions. @@ -9422,15 +9421,13 @@ fn funcCommon( assert(section != .generic); assert(address_space != null); assert(!is_generic); + if (opt_lib_name) |lib_name| try sema.handleExternLibName(block, .{ + .node_offset_lib_name = src_node_offset, + }, lib_name); const func_index = try ip.getExternFunc(gpa, .{ .ty = func_ty, .decl = sema.owner_decl_index, - .lib_name = if (opt_lib_name) |lib_name| (try mod.intern_pool.getOrPutString( - gpa, - try sema.handleExternLibName(block, .{ - .node_offset_lib_name = src_node_offset, - }, lib_name), - )).toOptional() else .none, + .lib_name = try mod.intern_pool.getOrPutStringOpt(gpa, opt_lib_name), }); return finishFunc( sema, @@ -24688,10 +24685,11 @@ fn zirVarExtended( var extra_index: usize = extra.end; - const lib_name: ?[]const u8 = if (small.has_lib_name) blk: { + const lib_name = if (small.has_lib_name) lib_name: { const lib_name = sema.code.nullTerminatedString(sema.code.extra[extra_index]); extra_index += 1; - break :blk lib_name; + try sema.handleExternLibName(block, ty_src, lib_name); + break :lib_name lib_name; } else null; // ZIR supports encoding this information but it is not used; the information @@ -24729,10 +24727,7 @@ fn zirVarExtended( .ty = var_ty.toIntern(), .init = init_val, .decl = sema.owner_decl_index, - .lib_name = if (lib_name) |lname| (try mod.intern_pool.getOrPutString( - sema.gpa, - try sema.handleExternLibName(block, ty_src, lname), - )).toOptional() else .none, + .lib_name = try mod.intern_pool.getOrPutStringOpt(sema.gpa, lib_name), .is_extern = small.is_extern, .is_threadlocal = small.is_threadlocal, } }))); @@ -25177,12 +25172,13 @@ fn resolveExternOptions( .needed_comptime_reason = "threadlocality of the extern symbol must be comptime-known", }); - const library_name = if (library_name_val.optionalValue(mod)) |payload| blk: { - const library_name = try payload.toAllocatedBytes(Type.slice_const_u8, sema.arena, mod); + const library_name = if (library_name_val.optionalValue(mod)) |library_name_payload| library_name: { + const library_name = try library_name_payload.toAllocatedBytes(Type.slice_const_u8, sema.arena, mod); if (library_name.len == 0) { return sema.fail(block, library_src, "library name cannot be empty", .{}); } - break :blk try sema.handleExternLibName(block, library_src, library_name); + try sema.handleExternLibName(block, library_src, library_name); + break :library_name library_name; } else null; if (name.len == 0) { diff --git a/src/link/Coff.zig b/src/link/Coff.zig index 44b329d5b9..5fbf02871a 100644 --- a/src/link/Coff.zig +++ b/src/link/Coff.zig @@ -316,6 +316,8 @@ pub fn deinit(self: *Coff) void { } self.import_tables.deinit(gpa); + self.lazy_syms.deinit(gpa); + for (self.decls.values()) |*metadata| { metadata.deinit(gpa); } diff --git a/src/link/msdos-stub.bin b/src/link/msdos-stub.bin index 96ad91198f0de1eb25b9d9846c44706823dffa58..8993ab1544ac59a02f2f7013c7c30d9a0cc071e0 100644 GIT binary patch delta 33 jcmZo*tPu2#s$gJbU|?VYVlbE}sLfb0(bj@lg@FM8RNw@n delta 41 rcmb Date: Sun, 3 Dec 2023 23:07:03 -0500 Subject: [PATCH 07/10] x86_64: implement movement for pointer vectors --- src/arch/x86_64/CodeGen.zig | 18 ++++++++++++++++++ test/behavior/cast.zig | 1 - 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 2eb45203d3..0e960cebeb 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -13046,6 +13046,24 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo }, else => {}, }, + .Pointer, .Optional => if (ty.childType(mod).isPtrAtRuntime(mod)) + switch (ty.vectorLen(mod)) { + 1 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_q, .mov } + else + .{ ._q, .mov } }, + 2 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + 3...4 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) + .{ .v_, .movdqa } + else + .{ .v_, .movdqu } }, + else => {}, + } + else + unreachable, .Float => switch (ty.childType(mod).floatBits(self.target.*)) { 16 => switch (ty.vectorLen(mod)) { 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ diff --git a/test/behavior/cast.zig b/test/behavior/cast.zig index a61c4fb29a..1095f1c7eb 100644 --- a/test/behavior/cast.zig +++ b/test/behavior/cast.zig @@ -2355,7 +2355,6 @@ test "@ptrFromInt on vector" { test "@intFromPtr on vector" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO From 50993a8f08595b690e0b566cea3266c5ce2c5131 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 4 Dec 2023 01:27:13 -0500 Subject: [PATCH 08/10] x86_64: implement more operations on vectors with 1-bit elements --- src/arch/x86_64/CodeGen.zig | 76 +++++++++++++++++++-------- src/codegen.zig | 100 ++++++++++++++++++++---------------- test/behavior/cast.zig | 1 - 3 files changed, 111 insertions(+), 66 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 0e960cebeb..e727596ed5 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2478,8 +2478,11 @@ fn regClassForType(self: *Self, ty: Type) RegisterManager.RegisterBitSet { else => abi.RegisterClass.sse, }, .Vector => switch (ty.childType(mod).toIntern()) { - .bool_type => abi.RegisterClass.gp, - else => abi.RegisterClass.sse, + .bool_type, .u1_type => abi.RegisterClass.gp, + else => if (ty.isAbiInt(mod) and ty.intInfo(mod).bits == 1) + abi.RegisterClass.gp + else + abi.RegisterClass.sse, }, else => abi.RegisterClass.gp, }; @@ -5152,7 +5155,7 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { defer if (index_lock) |lock| self.register_manager.unlockReg(lock); try self.spillEflagsIfOccupied(); - if (array_ty.isVector(mod) and elem_ty.toIntern() == .bool_type) { + if (array_ty.isVector(mod) and elem_ty.bitSize(mod) == 1) { const index_reg = switch (index_mcv) { .register => |reg| reg, else => try self.copyToTmpRegister(index_ty, index_mcv), @@ -15475,26 +15478,59 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { break :result .{ .load_frame = .{ .index = frame_index } }; }, .Array, .Vector => { - const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(result_ty, mod)); const elem_ty = result_ty.childType(mod); - const elem_size: u32 = @intCast(elem_ty.abiSize(mod)); + if (result_ty.isVector(mod) and elem_ty.bitSize(mod) == 1) { + const result_size: u32 = @intCast(result_ty.abiSize(mod)); + const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); + try self.asmRegisterRegister( + .{ ._, .xor }, + registerAlias(dst_reg, @min(result_size, 4)), + registerAlias(dst_reg, @min(result_size, 4)), + ); - for (elements, 0..) |elem, elem_i| { - const elem_mcv = try self.resolveInst(elem); - const mat_elem_mcv = switch (elem_mcv) { - .load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index }, - else => elem_mcv, - }; - const elem_off: i32 = @intCast(elem_size * elem_i); - try self.genSetMem(.{ .frame = frame_index }, elem_off, elem_ty, mat_elem_mcv); + for (elements, 0..) |elem, elem_i| { + const elem_reg = try self.copyToTmpRegister(elem_ty, .{ .air_ref = elem }); + const elem_lock = self.register_manager.lockRegAssumeUnused(elem_reg); + defer self.register_manager.unlockReg(elem_lock); + + try self.asmRegisterImmediate( + .{ ._, .@"and" }, + registerAlias(elem_reg, @min(result_size, 4)), + Immediate.u(1), + ); + if (elem_i > 0) try self.asmRegisterImmediate( + .{ ._l, .sh }, + registerAlias(elem_reg, result_size), + Immediate.u(@intCast(elem_i)), + ); + try self.asmRegisterRegister( + .{ ._, .@"or" }, + registerAlias(dst_reg, result_size), + registerAlias(elem_reg, result_size), + ); + } + break :result .{ .register = dst_reg }; + } else { + const frame_index = try self.allocFrameIndex(FrameAlloc.initSpill(result_ty, mod)); + const elem_size: u32 = @intCast(elem_ty.abiSize(mod)); + + for (elements, 0..) |elem, elem_i| { + const elem_mcv = try self.resolveInst(elem); + const mat_elem_mcv = switch (elem_mcv) { + .load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index }, + else => elem_mcv, + }; + const elem_off: i32 = @intCast(elem_size * elem_i); + try self.genSetMem(.{ .frame = frame_index }, elem_off, elem_ty, mat_elem_mcv); + } + if (result_ty.sentinel(mod)) |sentinel| try self.genSetMem( + .{ .frame = frame_index }, + @intCast(elem_size * elements.len), + elem_ty, + try self.genTypedValue(.{ .ty = elem_ty, .val = sentinel }), + ); + break :result .{ .load_frame = .{ .index = frame_index } }; } - if (result_ty.sentinel(mod)) |sentinel| try self.genSetMem( - .{ .frame = frame_index }, - @intCast(elem_size * elements.len), - elem_ty, - try self.genTypedValue(.{ .ty = elem_ty, .val = sentinel }), - ); - break :result .{ .load_frame = .{ .index = frame_index } }; }, else => unreachable, } diff --git a/src/codegen.zig b/src/codegen.zig index f1758f978d..b464a9f365 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -391,53 +391,63 @@ pub fn generateSymbol( .vector_type => |vector_type| { const abi_size = math.cast(usize, typed_value.ty.abiSize(mod)) orelse return error.Overflow; - switch (vector_type.child) { - .bool_type => { - const bytes = try code.addManyAsSlice(abi_size); - @memset(bytes, 0xaa); - var index: usize = 0; - const len = math.cast(usize, vector_type.len) orelse return error.Overflow; - while (index < len) : (index += 1) { - const bit_index = switch (endian) { - .big => len - 1 - index, - .little => index, - }; - const byte = &bytes[bit_index / 8]; - const mask = @as(u8, 1) << @truncate(bit_index); - if (switch (switch (aggregate.storage) { - .bytes => unreachable, - .elems => |elems| elems[index], - .repeated_elem => |elem| elem, - }) { - .bool_true => true, - .bool_false => false, - else => |elem| { - assert(mod.intern_pool.indexToKey(elem).undef == .bool_type); - continue; + if (Type.fromInterned(vector_type.child).bitSize(mod) == 1) { + const bytes = try code.addManyAsSlice(abi_size); + @memset(bytes, 0xaa); + var index: usize = 0; + const len = math.cast(usize, vector_type.len) orelse return error.Overflow; + while (index < len) : (index += 1) { + const bit_index = switch (endian) { + .big => len - 1 - index, + .little => index, + }; + const byte = &bytes[bit_index / 8]; + const mask = @as(u8, 1) << @truncate(bit_index); + if (switch (switch (aggregate.storage) { + .bytes => unreachable, + .elems => |elems| elems[index], + .repeated_elem => |elem| elem, + }) { + .bool_true => true, + .bool_false => false, + else => |elem| switch (mod.intern_pool.indexToKey(elem)) { + .undef => continue, + .int => |int| switch (int.storage) { + .u64 => |x| switch (x) { + 0 => false, + 1 => true, + else => unreachable, + }, + .i64 => |x| switch (x) { + -1 => true, + 0 => false, + else => unreachable, + }, + else => unreachable, }, - }) byte.* |= mask else byte.* &= ~mask; - } - }, - else => switch (aggregate.storage) { - .bytes => |bytes| try code.appendSlice(bytes), - .elems, .repeated_elem => { - var index: u64 = 0; - while (index < vector_type.len) : (index += 1) { - switch (try generateSymbol(bin_file, src_loc, .{ - .ty = Type.fromInterned(vector_type.child), - .val = Value.fromInterned(switch (aggregate.storage) { - .bytes => unreachable, - .elems => |elems| elems[ - math.cast(usize, index) orelse return error.Overflow - ], - .repeated_elem => |elem| elem, - }), - }, code, debug_output, reloc_info)) { - .ok => {}, - .fail => |em| return .{ .fail = em }, - } + else => unreachable, + }, + }) byte.* |= mask else byte.* &= ~mask; + } + } else switch (aggregate.storage) { + .bytes => |bytes| try code.appendSlice(bytes), + .elems, .repeated_elem => { + var index: u64 = 0; + while (index < vector_type.len) : (index += 1) { + switch (try generateSymbol(bin_file, src_loc, .{ + .ty = Type.fromInterned(vector_type.child), + .val = Value.fromInterned(switch (aggregate.storage) { + .bytes => unreachable, + .elems => |elems| elems[ + math.cast(usize, index) orelse return error.Overflow + ], + .repeated_elem => |elem| elem, + }), + }, code, debug_output, reloc_info)) { + .ok => {}, + .fail => |em| return .{ .fail = em }, } - }, + } }, } diff --git a/test/behavior/cast.zig b/test/behavior/cast.zig index 1095f1c7eb..f0a1f60235 100644 --- a/test/behavior/cast.zig +++ b/test/behavior/cast.zig @@ -2420,7 +2420,6 @@ test "@intFromFloat on vector" { test "@intFromBool on vector" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO From bdb6546a8f753fff65790fd289e35b1d5ba6cd5b Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 4 Dec 2023 13:07:33 -0500 Subject: [PATCH 09/10] x86_64: fix vector comparisions --- src/arch/x86_64/CodeGen.zig | 25 +++++++++++++++++++------ test/behavior/math.zig | 6 +++--- test/behavior/vector.zig | 1 - 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index e727596ed5..e1cc9470cb 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -9977,12 +9977,25 @@ fn genBinOp( defer self.register_manager.unlockReg(gp_lock); try self.asmRegisterRegister(switch (mir_tag[0]) { - ._pd, ._sd => .{ ._pd, .movmsk }, - ._ps, ._ss => .{ ._ps, .movmsk }, - .p_b, .p_d, .p_q, .p_w => .{ .p_b, .movmsk }, - .v_pd, .v_sd => .{ .v_pd, .movmsk }, - .v_ps, .v_ss => .{ .v_ps, .movmsk }, - .vp_b, .vp_d, .vp_q, .vp_w => .{ .vp_b, .movmsk }, + ._pd, ._sd, .p_q => .{ ._pd, .movmsk }, + ._ps, ._ss, .p_d => .{ ._ps, .movmsk }, + .p_b => .{ .p_b, .movmsk }, + .p_w => movmsk: { + try self.asmRegisterRegister(.{ .p_b, .ackssw }, dst_reg, dst_reg); + break :movmsk .{ .p_b, .movmsk }; + }, + .v_pd, .v_sd, .vp_q => .{ .v_pd, .movmsk }, + .v_ps, .v_ss, .vp_d => .{ .v_ps, .movmsk }, + .vp_b => .{ .vp_b, .movmsk }, + .vp_w => movmsk: { + try self.asmRegisterRegisterRegister( + .{ .vp_b, .ackssw }, + dst_reg, + dst_reg, + dst_reg, + ); + break :movmsk .{ .vp_b, .movmsk }; + }, else => unreachable, }, gp_reg.to32(), dst_reg); return .{ .register = gp_reg }; diff --git a/test/behavior/math.zig b/test/behavior/math.zig index ab54c9e4a9..3d3c282854 100644 --- a/test/behavior/math.zig +++ b/test/behavior/math.zig @@ -78,12 +78,11 @@ fn testClz() !void { } test "@clz big ints" { - if (builtin.zig_backend == .stage2_x86_64 and - !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .lzcnt)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest; try testClzBigInts(); try comptime testClzBigInts(); @@ -1610,8 +1609,9 @@ test "vector comparison" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .avx2)) return error.SkipZigTest; const S = struct { fn doTheTest() !void { diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index 667bf3d898..4ae7e76c74 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -49,7 +49,6 @@ test "vector wrap operators" { test "vector bin compares with mem.eql" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO From c70c33359498bfe33a2f60cfcc5ea401a277d5bf Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 4 Dec 2023 13:31:34 -0500 Subject: [PATCH 10/10] x86_64: fix packed struct field reuse --- src/arch/x86_64/CodeGen.zig | 8 +++++--- test/behavior/packed-struct.zig | 1 - 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index e1cc9470cb..0a4c9844dc 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -7324,8 +7324,8 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { .load_frame => |frame_addr| { const field_abi_size: u32 = @intCast(field_ty.abiSize(mod)); if (field_off % 8 == 0) { - const off_mcv = - src_mcv.address().offset(@intCast(@divExact(field_off, 8))).deref(); + const field_byte_off = @divExact(field_off, 8); + const off_mcv = src_mcv.address().offset(@intCast(field_byte_off)).deref(); const field_bit_size = field_ty.bitSize(mod); if (field_abi_size <= 8) { @@ -7350,7 +7350,9 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv); } - const dst_mcv = if (self.reuseOperand(inst, operand, 0, src_mcv)) + const container_abi_size: u32 = @intCast(container_ty.abiSize(mod)); + const dst_mcv = if (field_byte_off + field_abi_size <= container_abi_size and + self.reuseOperand(inst, operand, 0, src_mcv)) off_mcv else dst: { const dst_mcv = try self.allocRegOrMem(inst, true); diff --git a/test/behavior/packed-struct.zig b/test/behavior/packed-struct.zig index adbb0b977a..f4aceaa82d 100644 --- a/test/behavior/packed-struct.zig +++ b/test/behavior/packed-struct.zig @@ -804,7 +804,6 @@ test "nested packed struct at non-zero offset" { } test "nested packed struct at non-zero offset 2" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO