diff --git a/lib/std/crypto/blake3.zig b/lib/std/crypto/blake3.zig index d87211fb1e..08c79893d1 100644 --- a/lib/std/crypto/blake3.zig +++ b/lib/std/crypto/blake3.zig @@ -200,7 +200,8 @@ const CompressGeneric = struct { } }; -const compress = if (builtin.cpu.arch == .x86_64 and builtin.zig_backend != .stage2_x86_64) +const compress = if (builtin.cpu.arch == .x86_64 and + (builtin.zig_backend != .stage2_x86_64 or std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3))) CompressVectorized.compress else CompressGeneric.compress; diff --git a/lib/std/crypto/salsa20.zig b/lib/std/crypto/salsa20.zig index 7f4c1b0157..7342d22c3c 100644 --- a/lib/std/crypto/salsa20.zig +++ b/lib/std/crypto/salsa20.zig @@ -302,7 +302,11 @@ fn SalsaNonVecImpl(comptime rounds: comptime_int) type { }; } -const SalsaImpl = if (builtin.cpu.arch == .x86_64 and builtin.zig_backend != .stage2_x86_64) SalsaVecImpl else SalsaNonVecImpl; +const SalsaImpl = if (builtin.cpu.arch == .x86_64 and + (builtin.zig_backend != .stage2_x86_64 or std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3))) + SalsaVecImpl +else + SalsaNonVecImpl; fn keyToWords(key: [32]u8) [8]u32 { var k: [8]u32 = undefined; diff --git a/lib/std/meta.zig b/lib/std/meta.zig index e7dd4e5652..17df0650f3 100644 --- a/lib/std/meta.zig +++ b/lib/std/meta.zig @@ -1286,5 +1286,6 @@ test "hasUniqueRepresentation" { try testing.expect(!hasUniqueRepresentation([]u8)); try testing.expect(!hasUniqueRepresentation([]const u8)); - try testing.expect(hasUniqueRepresentation(@Vector(4, u16))); + try testing.expect(hasUniqueRepresentation(@Vector(std.simd.suggestVectorLength(u8) orelse 1, u8))); + try testing.expect(@sizeOf(@Vector(3, u8)) == 3 or !hasUniqueRepresentation(@Vector(3, u8))); } diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig index 224b0b3801..e74480784e 100644 --- a/lib/std/unicode.zig +++ b/lib/std/unicode.zig @@ -239,18 +239,19 @@ pub fn utf8ValidateSlice(input: []const u8) bool { fn utf8ValidateSliceImpl(input: []const u8, comptime surrogates: Surrogates) bool { var remaining = input; - const chunk_len = std.simd.suggestVectorLength(u8) orelse 1; - const Chunk = @Vector(chunk_len, u8); + if (std.simd.suggestVectorLength(u8)) |chunk_len| { + const Chunk = @Vector(chunk_len, u8); - // Fast path. Check for and skip ASCII characters at the start of the input. - while (remaining.len >= chunk_len) { - const chunk: Chunk = remaining[0..chunk_len].*; - const mask: Chunk = @splat(0x80); - if (@reduce(.Or, chunk & mask == mask)) { - // found a non ASCII byte - break; + // Fast path. Check for and skip ASCII characters at the start of the input. + while (remaining.len >= chunk_len) { + const chunk: Chunk = remaining[0..chunk_len].*; + const mask: Chunk = @splat(0x80); + if (@reduce(.Or, chunk & mask == mask)) { + // found a non ASCII byte + break; + } + remaining = remaining[chunk_len..]; } - remaining = remaining[chunk_len..]; } // default lowest and highest continuation byte @@ -937,8 +938,11 @@ fn utf16LeToUtf8ArrayListImpl( try array_list.ensureTotalCapacityPrecise(utf16le.len); var remaining = utf16le; - if (builtin.zig_backend != .stage2_x86_64) { - const chunk_len = std.simd.suggestVectorLength(u16) orelse 1; + if (builtin.zig_backend != .stage2_x86_64 or + comptime (std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3) and + !std.Target.x86.featureSetHasAny(builtin.cpu.features, .{ .prefer_256_bit, .avx }))) + vectorized: { + const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized; const Chunk = @Vector(chunk_len, u16); // Fast path. Check for and encode ASCII characters at the start of the input. @@ -1029,8 +1033,11 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr var end_index: usize = 0; var remaining = utf16le; - if (builtin.zig_backend != .stage2_x86_64) { - const chunk_len = std.simd.suggestVectorLength(u16) orelse 1; + if (builtin.zig_backend != .stage2_x86_64 or + comptime (std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3) and + !std.Target.x86.featureSetHasAny(builtin.cpu.features, .{ .prefer_256_bit, .avx }))) + vectorized: { + const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized; const Chunk = @Vector(chunk_len, u16); // Fast path. Check for and encode ASCII characters at the start of the input. @@ -1155,8 +1162,12 @@ fn utf8ToUtf16LeArrayListImpl(array_list: *std.ArrayList(u16), utf8: []const u8, var remaining = utf8; // Need support for std.simd.interlace - if (builtin.zig_backend != .stage2_x86_64 and comptime !builtin.cpu.arch.isMIPS()) { - const chunk_len = std.simd.suggestVectorLength(u8) orelse 1; + if ((builtin.zig_backend != .stage2_x86_64 or + comptime (std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3) and + !std.Target.x86.featureSetHasAny(builtin.cpu.features, .{ .prefer_256_bit, .avx }))) and + comptime !builtin.cpu.arch.isMIPS()) + vectorized: { + const chunk_len = @divExact(std.simd.suggestVectorLength(u8) orelse break :vectorized, 2); const Chunk = @Vector(chunk_len, u8); // Fast path. Check for and encode ASCII characters at the start of the input. @@ -1232,8 +1243,12 @@ pub fn utf8ToUtf16LeImpl(utf16le: []u16, utf8: []const u8, comptime surrogates: var remaining = utf8; // Need support for std.simd.interlace - if (builtin.zig_backend != .stage2_x86_64 and comptime !builtin.cpu.arch.isMIPS()) { - const chunk_len = std.simd.suggestVectorLength(u8) orelse 1; + if ((builtin.zig_backend != .stage2_x86_64 or + comptime (std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3) and + !std.Target.x86.featureSetHasAny(builtin.cpu.features, .{ .prefer_256_bit, .avx }))) and + comptime !builtin.cpu.arch.isMIPS()) + vectorized: { + const chunk_len = @divExact(std.simd.suggestVectorLength(u8) orelse break :vectorized, 2); const Chunk = @Vector(chunk_len, u8); // Fast path. Check for and encode ASCII characters at the start of the input. diff --git a/lib/std/zig/c_translation.zig b/lib/std/zig/c_translation.zig index dfa888e94b..337149e97d 100644 --- a/lib/std/zig/c_translation.zig +++ b/lib/std/zig/c_translation.zig @@ -308,14 +308,12 @@ test "promoteIntLiteral" { /// Convert from clang __builtin_shufflevector index to Zig @shuffle index /// clang requires __builtin_shufflevector index arguments to be integer constants. -/// negative values for `this_index` indicate "don't care" so we arbitrarily choose 0 +/// negative values for `this_index` indicate "don't care". /// clang enforces that `this_index` is less than the total number of vector elements /// See https://ziglang.org/documentation/master/#shuffle /// See https://clang.llvm.org/docs/LanguageExtensions.html#langext-builtin-shufflevector pub fn shuffleVectorIndex(comptime this_index: c_int, comptime source_vector_len: usize) i32 { - if (this_index <= 0) return 0; - - const positive_index = @as(usize, @intCast(this_index)); + const positive_index = std.math.cast(usize, this_index) orelse return undefined; if (positive_index < source_vector_len) return @as(i32, @intCast(this_index)); const b_index = positive_index - source_vector_len; return ~@as(i32, @intCast(b_index)); @@ -324,7 +322,7 @@ pub fn shuffleVectorIndex(comptime this_index: c_int, comptime source_vector_len test "shuffleVectorIndex" { const vector_len: usize = 4; - try testing.expect(shuffleVectorIndex(-1, vector_len) == 0); + _ = shuffleVectorIndex(-1, vector_len); try testing.expect(shuffleVectorIndex(0, vector_len) == 0); try testing.expect(shuffleVectorIndex(1, vector_len) == 1); diff --git a/src/InternPool.zig b/src/InternPool.zig index 19be12c129..af1eb241a5 100644 --- a/src/InternPool.zig +++ b/src/InternPool.zig @@ -3587,6 +3587,7 @@ pub const Alignment = enum(u6) { @"8" = 3, @"16" = 4, @"32" = 5, + @"64" = 6, none = std.math.maxInt(u6), _, diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index f7afeed147..a69269239f 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2610,7 +2610,8 @@ fn restoreState(self: *Self, state: State, deaths: []const Air.Inst.Index, compt const ExpectedContents = [@typeInfo(RegisterManager.TrackedRegisters).Array.len]RegisterLock; var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) = - if (opts.update_tracking) ({}) else std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa); + if (opts.update_tracking) + {} else std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa); var reg_locks = if (opts.update_tracking) {} else try std.ArrayList(RegisterLock).initCapacity( stack.get(), @@ -14116,30 +14117,8 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo else => {}, }, .Int => switch (ty.childType(mod).intInfo(mod).bits) { - 8 => switch (ty.vectorLen(mod)) { - 1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{ - .insert = .{ .vp_b, .insr }, - .extract = .{ .vp_b, .extr }, - } } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{ - .insert = .{ .p_b, .insr }, - .extract = .{ .p_b, .extr }, - } }, - 2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ - .insert = .{ .vp_w, .insr }, - .extract = .{ .vp_w, .extr }, - } } else .{ .insert_extract = .{ - .insert = .{ .p_w, .insr }, - .extract = .{ .p_w, .extr }, - } }, - 3...4 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_d, .mov } - else - .{ ._d, .mov } }, - 5...8 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_q, .mov } - else - .{ ._q, .mov } }, - 9...16 => return .{ .move = if (self.hasFeature(.avx)) + 1...8 => switch (ty.vectorLen(mod)) { + 1...16 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, 17...32 => if (self.hasFeature(.avx)) @@ -14149,23 +14128,8 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo .{ .v_, .movdqu } }, else => {}, }, - 16 => switch (ty.vectorLen(mod)) { - 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ - .insert = .{ .vp_w, .insr }, - .extract = .{ .vp_w, .extr }, - } } else .{ .insert_extract = .{ - .insert = .{ .p_w, .insr }, - .extract = .{ .p_w, .extr }, - } }, - 2 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_d, .mov } - else - .{ ._d, .mov } }, - 3...4 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_q, .mov } - else - .{ ._q, .mov } }, - 5...8 => return .{ .move = if (self.hasFeature(.avx)) + 9...16 => switch (ty.vectorLen(mod)) { + 1...8 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, 9...16 => if (self.hasFeature(.avx)) @@ -14175,16 +14139,8 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo .{ .v_, .movdqu } }, else => {}, }, - 32 => switch (ty.vectorLen(mod)) { - 1 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_d, .mov } - else - .{ ._d, .mov } }, - 2 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_q, .mov } - else - .{ ._q, .mov } }, - 3...4 => return .{ .move = if (self.hasFeature(.avx)) + 17...32 => switch (ty.vectorLen(mod)) { + 1...4 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, 5...8 => if (self.hasFeature(.avx)) @@ -14194,12 +14150,8 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo .{ .v_, .movdqu } }, else => {}, }, - 64 => switch (ty.vectorLen(mod)) { - 1 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_q, .mov } - else - .{ ._q, .mov } }, - 2 => return .{ .move = if (self.hasFeature(.avx)) + 33...64 => switch (ty.vectorLen(mod)) { + 1...2 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, 3...4 => if (self.hasFeature(.avx)) @@ -14209,7 +14161,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo .{ .v_, .movdqu } }, else => {}, }, - 128 => switch (ty.vectorLen(mod)) { + 65...128 => switch (ty.vectorLen(mod)) { 1 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, @@ -14220,7 +14172,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo .{ .v_, .movdqu } }, else => {}, }, - 256 => switch (ty.vectorLen(mod)) { + 129...256 => switch (ty.vectorLen(mod)) { 1 => if (self.hasFeature(.avx)) return .{ .move = if (aligned) .{ .v_, .movdqa } @@ -14232,11 +14184,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo }, .Pointer, .Optional => if (ty.childType(mod).isPtrAtRuntime(mod)) switch (ty.vectorLen(mod)) { - 1 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_q, .mov } - else - .{ ._q, .mov } }, - 2 => return .{ .move = if (self.hasFeature(.avx)) + 1...2 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, 3...4 => if (self.hasFeature(.avx)) @@ -14250,22 +14198,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo unreachable, .Float => switch (ty.childType(mod).floatBits(self.target.*)) { 16 => switch (ty.vectorLen(mod)) { - 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ - .insert = .{ .vp_w, .insr }, - .extract = .{ .vp_w, .extr }, - } } else .{ .insert_extract = .{ - .insert = .{ .p_w, .insr }, - .extract = .{ .p_w, .extr }, - } }, - 2 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_d, .mov } - else - .{ ._d, .mov } }, - 3...4 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_q, .mov } - else - .{ ._q, .mov } }, - 5...8 => return .{ .move = if (self.hasFeature(.avx)) + 1...8 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, 9...16 => if (self.hasFeature(.avx)) @@ -14276,15 +14209,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo else => {}, }, 32 => switch (ty.vectorLen(mod)) { - 1 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_ss, .mov } - else - .{ ._ss, .mov } }, - 2 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_sd, .mov } - else - .{ ._sd, .mov } }, - 3...4 => return .{ .move = if (self.hasFeature(.avx)) + 1...4 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, 5...8 => if (self.hasFeature(.avx)) @@ -14295,11 +14220,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo else => {}, }, 64 => switch (ty.vectorLen(mod)) { - 1 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_sd, .mov } - else - .{ ._sd, .mov } }, - 2 => return .{ .move = if (self.hasFeature(.avx)) + 1...2 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu } else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } }, 3...4 => if (self.hasFeature(.avx)) @@ -16551,7 +16472,7 @@ fn airSelect(self: *Self, inst: Air.Inst.Index) !void { const vec_len = ty.vectorLen(mod); const elem_ty = ty.childType(mod); const elem_abi_size: u32 = @intCast(elem_ty.abiSize(mod)); - const abi_size = elem_abi_size * vec_len; + const abi_size: u32 = @intCast(ty.abiSize(mod)); const pred_ty = self.typeOf(pl_op.operand); const result = result: { @@ -16882,10 +16803,283 @@ fn airSelect(self: *Self, inst: Air.Inst.Index) !void { } fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.comp.module.?; const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; - _ = ty_pl; - return self.fail("TODO implement airShuffle for x86_64", .{}); - //return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); + const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data; + + const dst_ty = self.typeOfIndex(inst); + const elem_ty = dst_ty.childType(mod); + const elem_abi_size: u32 = @intCast(elem_ty.abiSize(mod)); + const dst_abi_size: u32 = @intCast(dst_ty.abiSize(mod)); + const lhs_ty = self.typeOf(extra.a); + const lhs_abi_size: u32 = @intCast(lhs_ty.abiSize(mod)); + const rhs_ty = self.typeOf(extra.b); + const rhs_abi_size: u32 = @intCast(rhs_ty.abiSize(mod)); + const max_abi_size = @max(dst_abi_size, lhs_abi_size, rhs_abi_size); + + const ExpectedContents = [32]?i32; + var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) = + std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa); + const allocator = stack.get(); + + const mask_elems = try allocator.alloc(?i32, extra.mask_len); + defer allocator.free(mask_elems); + for (mask_elems, 0..) |*mask_elem, elem_index| { + const mask_elem_val = Value + .fromInterned(extra.mask).elemValue(mod, elem_index) catch unreachable; + mask_elem.* = if (mask_elem_val.isUndef(mod)) + null + else + @intCast(mask_elem_val.toSignedInt(mod)); + } + + const result = @as(?MCValue, result: { + for (mask_elems) |mask_elem| { + if (mask_elem) |_| break; + } else break :result try self.allocRegOrMem(inst, true); + + for (mask_elems, 0..) |mask_elem, elem_index| { + if (mask_elem orelse continue != @as(i32, @intCast(elem_index))) break; + } else { + const lhs_mcv = try self.resolveInst(extra.a); + if (self.reuseOperand(inst, extra.a, 0, lhs_mcv)) break :result lhs_mcv; + const dst_mcv = try self.allocRegOrMem(inst, true); + try self.genCopy(dst_ty, dst_mcv, lhs_mcv, .{}); + break :result dst_mcv; + } + + for (mask_elems, 0..) |mask_elem, elem_index| { + if (mask_elem orelse continue != ~@as(i32, @intCast(elem_index))) break; + } else { + const rhs_mcv = try self.resolveInst(extra.b); + if (self.reuseOperand(inst, extra.b, 1, rhs_mcv)) break :result rhs_mcv; + const dst_mcv = try self.allocRegOrMem(inst, true); + try self.genCopy(dst_ty, dst_mcv, rhs_mcv, .{}); + break :result dst_mcv; + } + + const has_avx = self.hasFeature(.avx); + shufpd: { + if (elem_abi_size != 8) break :shufpd; + if (max_abi_size > @as(u32, if (has_avx) 32 else 16)) break :shufpd; + + var control: u4 = 0b0_0_0_0; + var sources = [1]?u1{null} ** 2; + for (mask_elems, 0..) |maybe_mask_elem, elem_index| { + const mask_elem = maybe_mask_elem orelse continue; + const mask_elem_index: u2 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem); + if (mask_elem_index & 0b10 != elem_index & 0b10) break :shufpd; + + const source = @intFromBool(mask_elem < 0); + if (sources[elem_index & 0b01]) |prev_source| { + if (source != prev_source) break :shufpd; + } else sources[elem_index & 0b01] = source; + + control |= @as(u4, @intCast(mask_elem_index & 0b01)) << @intCast(elem_index); + } + if (sources[0] orelse break :shufpd == sources[1] orelse break :shufpd) break :shufpd; + + const operands = [2]Air.Inst.Ref{ extra.a, extra.b }; + const operand_tys = [2]Type{ lhs_ty, rhs_ty }; + const lhs_mcv = try self.resolveInst(operands[sources[0].?]); + const rhs_mcv = try self.resolveInst(operands[sources[1].?]); + + const dst_mcv: MCValue = if (lhs_mcv.isRegister() and + self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv)) + lhs_mcv + else if (has_avx and lhs_mcv.isRegister()) + .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } + else + try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv); + const dst_reg = dst_mcv.getReg().?; + const dst_alias = registerAlias(dst_reg, max_abi_size); + + if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + .{ .v_pd, .shuf }, + dst_alias, + registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), + try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)), + Immediate.u(control), + ) else try self.asmRegisterRegisterRegisterImmediate( + .{ .v_pd, .shuf }, + dst_alias, + registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), + registerAlias(if (rhs_mcv.isRegister()) + rhs_mcv.getReg().? + else + try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), + Immediate.u(control), + ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate( + .{ ._pd, .shuf }, + dst_alias, + try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)), + Immediate.u(control), + ) else try self.asmRegisterRegisterImmediate( + .{ ._pd, .shuf }, + dst_alias, + registerAlias(if (rhs_mcv.isRegister()) + rhs_mcv.getReg().? + else + try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), + Immediate.u(control), + ); + break :result dst_mcv; + } + + pshufb: { + if (max_abi_size > 16) break :pshufb; + if (!self.hasFeature(.ssse3)) break :pshufb; + + const temp_regs = + try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.sse); + const temp_locks = self.register_manager.lockRegsAssumeUnused(2, temp_regs); + defer for (temp_locks) |lock| self.register_manager.unlockReg(lock); + + const lhs_temp_alias = registerAlias(temp_regs[0], max_abi_size); + try self.genSetReg(temp_regs[0], lhs_ty, .{ .air_ref = extra.a }, .{}); + + const rhs_temp_alias = registerAlias(temp_regs[1], max_abi_size); + try self.genSetReg(temp_regs[1], rhs_ty, .{ .air_ref = extra.b }, .{}); + + var lhs_mask_elems: [16]InternPool.Index = undefined; + for (lhs_mask_elems[0..max_abi_size], 0..) |*lhs_mask_elem, byte_index| { + const elem_index = byte_index / elem_abi_size; + lhs_mask_elem.* = try mod.intern(.{ .int = .{ + .ty = .u8_type, + .storage = .{ .u64 = if (elem_index >= mask_elems.len) 0b1_00_00000 else elem: { + const mask_elem = mask_elems[elem_index] orelse break :elem 0b1_00_00000; + if (mask_elem < 0) break :elem 0b1_00_00000; + const mask_elem_index: u31 = @intCast(mask_elem); + const byte_off: u32 = @intCast(byte_index % elem_abi_size); + break :elem @intCast(mask_elem_index * elem_abi_size + byte_off); + } }, + } }); + } + const lhs_mask_ty = try mod.vectorType(.{ .len = max_abi_size, .child = .u8_type }); + const lhs_mask_mcv = try self.genTypedValue(.{ + .ty = lhs_mask_ty, + .val = Value.fromInterned(try mod.intern(.{ .aggregate = .{ + .ty = lhs_mask_ty.toIntern(), + .storage = .{ .elems = lhs_mask_elems[0..max_abi_size] }, + } })), + }); + const lhs_mask_mem: Memory = .{ + .base = .{ .reg = try self.copyToTmpRegister(Type.usize, lhs_mask_mcv.address()) }, + .mod = .{ .rm = .{ .size = Memory.Size.fromSize(@max(max_abi_size, 16)) } }, + }; + if (has_avx) try self.asmRegisterRegisterMemory( + .{ .vp_b, .shuf }, + lhs_temp_alias, + lhs_temp_alias, + lhs_mask_mem, + ) else try self.asmRegisterMemory( + .{ .p_b, .shuf }, + lhs_temp_alias, + lhs_mask_mem, + ); + + var rhs_mask_elems: [16]InternPool.Index = undefined; + for (rhs_mask_elems[0..max_abi_size], 0..) |*rhs_mask_elem, byte_index| { + const elem_index = byte_index / elem_abi_size; + rhs_mask_elem.* = try mod.intern(.{ .int = .{ + .ty = .u8_type, + .storage = .{ .u64 = if (elem_index >= mask_elems.len) 0b1_00_00000 else elem: { + const mask_elem = mask_elems[elem_index] orelse break :elem 0b1_00_00000; + if (mask_elem >= 0) break :elem 0b1_00_00000; + const mask_elem_index: u31 = @intCast(~mask_elem); + const byte_off: u32 = @intCast(byte_index % elem_abi_size); + break :elem @intCast(mask_elem_index * elem_abi_size + byte_off); + } }, + } }); + } + const rhs_mask_ty = try mod.vectorType(.{ .len = max_abi_size, .child = .u8_type }); + const rhs_mask_mcv = try self.genTypedValue(.{ + .ty = rhs_mask_ty, + .val = Value.fromInterned(try mod.intern(.{ .aggregate = .{ + .ty = rhs_mask_ty.toIntern(), + .storage = .{ .elems = rhs_mask_elems[0..max_abi_size] }, + } })), + }); + const rhs_mask_mem: Memory = .{ + .base = .{ .reg = try self.copyToTmpRegister(Type.usize, rhs_mask_mcv.address()) }, + .mod = .{ .rm = .{ .size = Memory.Size.fromSize(@max(max_abi_size, 16)) } }, + }; + if (has_avx) try self.asmRegisterRegisterMemory( + .{ .vp_b, .shuf }, + rhs_temp_alias, + rhs_temp_alias, + rhs_mask_mem, + ) else try self.asmRegisterMemory( + .{ .p_b, .shuf }, + rhs_temp_alias, + rhs_mask_mem, + ); + + if (has_avx) try self.asmRegisterRegisterRegister( + .{ switch (elem_ty.zigTypeTag(mod)) { + else => break :result null, + .Int => .vp_, + .Float => switch (elem_ty.floatBits(self.target.*)) { + 32 => .v_ps, + 64 => .v_pd, + 16, 80, 128 => break :result null, + else => unreachable, + }, + }, .@"or" }, + lhs_temp_alias, + lhs_temp_alias, + rhs_temp_alias, + ) else try self.asmRegisterRegister( + .{ switch (elem_ty.zigTypeTag(mod)) { + else => break :result null, + .Int => .p_, + .Float => switch (elem_ty.floatBits(self.target.*)) { + 32 => ._ps, + 64 => ._pd, + 16, 80, 128 => break :result null, + else => unreachable, + }, + }, .@"or" }, + lhs_temp_alias, + rhs_temp_alias, + ); + break :result .{ .register = temp_regs[0] }; + } + + if (max_abi_size <= 16) { + const lhs_mcv = try self.resolveInst(extra.a); + const lhs_reg = if (lhs_mcv.isRegister()) + lhs_mcv.getReg().? + else + try self.copyToTmpRegister(lhs_ty, lhs_mcv); + const lhs_lock = self.register_manager.lockRegAssumeUnused(lhs_reg); + defer self.register_manager.unlockReg(lhs_lock); + + const rhs_mcv = try self.resolveInst(extra.b); + const rhs_reg = if (rhs_mcv.isRegister()) + rhs_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, rhs_mcv); + const rhs_lock = self.register_manager.lockReg(rhs_reg); + defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); + + //const dst_mcv = try self.register_manager.allocReg(inst, abi.RegisterClass.sse); + switch (elem_ty.zigTypeTag(mod)) { + .Float => switch (elem_ty.floatBits(self.target.*)) { + 16, 32 => {}, + 64 => unreachable, // fully handled by shufpd + 80, 128 => unreachable, // all possible masks already handled + else => unreachable, + }, + else => {}, + } + } + + break :result null; + }) orelse return self.fail("TODO implement airShuffle from {} and {} to {}", .{ + lhs_ty.fmt(mod), rhs_ty.fmt(mod), dst_ty.fmt(mod), + }); + return self.finishAir(inst, result, .{ extra.a, extra.b, .none }); } fn airReduce(self: *Self, inst: Air.Inst.Index) !void { @@ -17062,7 +17256,7 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { }, .Array, .Vector => { const elem_ty = result_ty.childType(mod); - if (result_ty.isVector(mod) and elem_ty.bitSize(mod) == 1) { + if (result_ty.isVector(mod) and elem_ty.toIntern() == .bool_type) { const result_size: u32 = @intCast(result_ty.abiSize(mod)); const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); try self.asmRegisterRegister( @@ -18112,7 +18306,7 @@ fn splitType(self: *Self, ty: Type) ![2]Type { else => unreachable, }, .float => Type.f32, - .float_combine => try mod.vectorType(.{ .len = 2, .child = .f32_type }), + .float_combine => try mod.arrayType(.{ .len = 2, .child = .f32_type }), .sse => Type.f64, else => break, }; diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 5680d35ed5..29c80fce5f 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -324,7 +324,7 @@ pub const Mnemonic = enum { // SSE3 movddup, movshdup, movsldup, // SSSE3 - pabsb, pabsd, pabsw, palignr, + pabsb, pabsd, pabsw, palignr, pshufb, // SSE4.1 blendpd, blendps, blendvpd, blendvps, extractps, @@ -389,7 +389,7 @@ pub const Mnemonic = enum { vpmovmskb, vpmulhw, vpmulld, vpmullw, vpor, - vpshufd, vpshufhw, vpshuflw, + vpshufb, vpshufd, vpshufhw, vpshuflw, vpslld, vpslldq, vpsllq, vpsllw, vpsrad, vpsraq, vpsraw, vpsrld, vpsrldq, vpsrlq, vpsrlw, diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index f1224e8ead..ac6d561ff3 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -1185,6 +1185,8 @@ pub const table = [_]Entry{ .{ .palignr, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0f }, 0, .none, .ssse3 }, + .{ .pshufb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x00 }, 0, .none, .ssse3 }, + // SSE4.1 .{ .blendpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .none, .sse4_1 }, @@ -1593,6 +1595,8 @@ pub const table = [_]Entry{ .{ .vpor, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_128_wig, .avx }, + .{ .vpshufb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x00 }, 0, .vex_128_wig, .avx }, + .{ .vpshufd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x70 }, 0, .vex_128_wig, .avx }, .{ .vpshufhw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf3, 0x0f, 0x70 }, 0, .vex_128_wig, .avx }, @@ -1820,6 +1824,7 @@ pub const table = [_]Entry{ .{ .vpor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_256_wig, .avx2 }, + .{ .vpshufb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x00 }, 0, .vex_256_wig, .avx2 }, .{ .vpshufd, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x70 }, 0, .vex_256_wig, .avx2 }, .{ .vpshufhw, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0xf3, 0x0f, 0x70 }, 0, .vex_256_wig, .avx2 }, diff --git a/src/codegen.zig b/src/codegen.zig index 118bab4be9..7bcba80065 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -405,7 +405,7 @@ pub fn generateSymbol( .vector_type => |vector_type| { const abi_size = math.cast(usize, typed_value.ty.abiSize(mod)) orelse return error.Overflow; - if (Type.fromInterned(vector_type.child).bitSize(mod) == 1) { + if (vector_type.child == .bool_type) { const bytes = try code.addManyAsSlice(abi_size); @memset(bytes, 0xaa); var index: usize = 0; @@ -443,37 +443,34 @@ pub fn generateSymbol( }, }) byte.* |= mask else byte.* &= ~mask; } - } else switch (aggregate.storage) { - .bytes => |bytes| try code.appendSlice(bytes), - .elems, .repeated_elem => { - var index: u64 = 0; - while (index < vector_type.len) : (index += 1) { - switch (try generateSymbol(bin_file, src_loc, .{ - .ty = Type.fromInterned(vector_type.child), - .val = Value.fromInterned(switch (aggregate.storage) { - .bytes => unreachable, - .elems => |elems| elems[ - math.cast(usize, index) orelse return error.Overflow - ], - .repeated_elem => |elem| elem, - }), - }, code, debug_output, reloc_info)) { - .ok => {}, - .fail => |em| return .{ .fail = em }, + } else { + switch (aggregate.storage) { + .bytes => |bytes| try code.appendSlice(bytes), + .elems, .repeated_elem => { + var index: u64 = 0; + while (index < vector_type.len) : (index += 1) { + switch (try generateSymbol(bin_file, src_loc, .{ + .ty = Type.fromInterned(vector_type.child), + .val = Value.fromInterned(switch (aggregate.storage) { + .bytes => unreachable, + .elems => |elems| elems[ + math.cast(usize, index) orelse return error.Overflow + ], + .repeated_elem => |elem| elem, + }), + }, code, debug_output, reloc_info)) { + .ok => {}, + .fail => |em| return .{ .fail = em }, + } } - } - }, - } + }, + } - const padding = abi_size - (math.cast(usize, math.divCeil( - u64, - Type.fromInterned(vector_type.child).bitSize(mod) * vector_type.len, - 8, - ) catch |err| switch (err) { - error.DivisionByZero => unreachable, - else => |e| return e, - }) orelse return error.Overflow); - if (padding > 0) try code.appendNTimes(0, padding); + const padding = abi_size - + (math.cast(usize, Type.fromInterned(vector_type.child).abiSize(mod) * vector_type.len) orelse + return error.Overflow); + if (padding > 0) try code.appendNTimes(0, padding); + } }, .anon_struct_type => |tuple| { const struct_begin = code.items.len; diff --git a/src/type.zig b/src/type.zig index a6265692c2..f27e157c31 100644 --- a/src/type.zig +++ b/src/type.zig @@ -905,11 +905,28 @@ pub const Type = struct { return Type.fromInterned(array_type.child).abiAlignmentAdvanced(mod, strat); }, .vector_type => |vector_type| { - const bits_u64 = try bitSizeAdvanced(Type.fromInterned(vector_type.child), mod, opt_sema); - const bits: u32 = @intCast(bits_u64); - const bytes = ((bits * vector_type.len) + 7) / 8; - const alignment = std.math.ceilPowerOfTwoAssert(u32, bytes); - return .{ .scalar = Alignment.fromByteUnits(alignment) }; + if (vector_type.len == 0) return .{ .scalar = .@"1" }; + switch (mod.comp.getZigBackend()) { + else => { + const elem_bits: u32 = @intCast(try Type.fromInterned(vector_type.child).bitSizeAdvanced(mod, opt_sema)); + if (elem_bits == 0) return .{ .scalar = .@"1" }; + const bytes = ((elem_bits * vector_type.len) + 7) / 8; + const alignment = std.math.ceilPowerOfTwoAssert(u32, bytes); + return .{ .scalar = Alignment.fromByteUnits(alignment) }; + }, + .stage2_x86_64 => { + if (vector_type.child == .bool_type) return .{ .scalar = intAbiAlignment(@intCast(vector_type.len), target) }; + const elem_bytes: u32 = @intCast((try Type.fromInterned(vector_type.child).abiSizeAdvanced(mod, strat)).scalar); + if (elem_bytes == 0) return .{ .scalar = .@"1" }; + const bytes = elem_bytes * vector_type.len; + if (bytes > 32 and std.Target.x86.featureSetHas(target.cpu.features, .avx512f)) return .{ .scalar = .@"64" }; + if (bytes > 16 and std.Target.x86.featureSetHas( + target.cpu.features, + if (Type.fromInterned(vector_type.child).isRuntimeFloat()) .avx else .avx2, + )) return .{ .scalar = .@"32" }; + return .{ .scalar = .@"16" }; + }, + } }, .opt_type => return abiAlignmentAdvancedOptional(ty, mod, strat), @@ -1237,9 +1254,6 @@ pub const Type = struct { .storage = .{ .lazy_size = ty.toIntern() }, } }))) }, }; - const elem_bits = try Type.fromInterned(vector_type.child).bitSizeAdvanced(mod, opt_sema); - const total_bits = elem_bits * vector_type.len; - const total_bytes = (total_bits + 7) / 8; const alignment = switch (try ty.abiAlignmentAdvanced(mod, strat)) { .scalar => |x| x, .val => return .{ .val = Value.fromInterned((try mod.intern(.{ .int = .{ @@ -1247,6 +1261,18 @@ pub const Type = struct { .storage = .{ .lazy_size = ty.toIntern() }, } }))) }, }; + const total_bytes = switch (mod.comp.getZigBackend()) { + else => total_bytes: { + const elem_bits = try Type.fromInterned(vector_type.child).bitSizeAdvanced(mod, opt_sema); + const total_bits = elem_bits * vector_type.len; + break :total_bytes (total_bits + 7) / 8; + }, + .stage2_x86_64 => total_bytes: { + if (vector_type.child == .bool_type) break :total_bytes std.math.divCeil(u32, vector_type.len, 8) catch unreachable; + const elem_bytes: u32 = @intCast((try Type.fromInterned(vector_type.child).abiSizeAdvanced(mod, strat)).scalar); + break :total_bytes elem_bytes * vector_type.len; + }, + }; return AbiSizeAdvanced{ .scalar = alignment.forward(total_bytes) }; }, diff --git a/test/behavior/bitcast.zig b/test/behavior/bitcast.zig index 001f8c34db..3ac6115216 100644 --- a/test/behavior/bitcast.zig +++ b/test/behavior/bitcast.zig @@ -336,7 +336,7 @@ test "comptime @bitCast packed struct to int and back" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_llvm and native_endian == .big) { // https://github.com/ziglang/zig/issues/13782 diff --git a/test/behavior/cast.zig b/test/behavior/cast.zig index 2ed29eb92d..c591a5a619 100644 --- a/test/behavior/cast.zig +++ b/test/behavior/cast.zig @@ -2441,6 +2441,7 @@ test "@intFromBool on vector" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; const S = struct { fn doTheTest() !void { diff --git a/test/behavior/shuffle.zig b/test/behavior/shuffle.zig index 95913be3af..c3d760103d 100644 --- a/test/behavior/shuffle.zig +++ b/test/behavior/shuffle.zig @@ -4,10 +4,11 @@ const mem = std.mem; const expect = std.testing.expect; test "@shuffle int" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3)) return error.SkipZigTest; const S = struct { fn doTheTest() !void { diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index cb9cd4a87a..9d21f8fdb0 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -29,7 +29,7 @@ test "vector wrap operators" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_x86_64 and - !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; const S = struct { fn doTheTest() !void { @@ -906,22 +906,26 @@ test "vector @reduce comptime" { } test "mask parameter of @shuffle is comptime scope" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3)) return error.SkipZigTest; const __v4hi = @Vector(4, i16); - var v4_a = __v4hi{ 0, 0, 0, 0 }; - var v4_b = __v4hi{ 0, 0, 0, 0 }; + var v4_a = __v4hi{ 1, 2, 3, 4 }; + var v4_b = __v4hi{ 5, 6, 7, 8 }; _ = .{ &v4_a, &v4_b }; const shuffled: __v4hi = @shuffle(i16, v4_a, v4_b, @Vector(4, i32){ std.zig.c_translation.shuffleVectorIndex(0, @typeInfo(@TypeOf(v4_a)).Vector.len), - std.zig.c_translation.shuffleVectorIndex(0, @typeInfo(@TypeOf(v4_a)).Vector.len), - std.zig.c_translation.shuffleVectorIndex(0, @typeInfo(@TypeOf(v4_a)).Vector.len), - std.zig.c_translation.shuffleVectorIndex(0, @typeInfo(@TypeOf(v4_a)).Vector.len), + std.zig.c_translation.shuffleVectorIndex(2, @typeInfo(@TypeOf(v4_a)).Vector.len), + std.zig.c_translation.shuffleVectorIndex(4, @typeInfo(@TypeOf(v4_a)).Vector.len), + std.zig.c_translation.shuffleVectorIndex(6, @typeInfo(@TypeOf(v4_a)).Vector.len), }); - _ = shuffled; + try expect(shuffled[0] == 1); + try expect(shuffled[1] == 3); + try expect(shuffled[2] == 5); + try expect(shuffled[3] == 7); } test "saturating add" { @@ -1177,10 +1181,22 @@ test "@shlWithOverflow" { } test "alignment of vectors" { - try expect(@alignOf(@Vector(2, u8)) == 2); - try expect(@alignOf(@Vector(2, u1)) == 1); - try expect(@alignOf(@Vector(1, u1)) == 1); - try expect(@alignOf(@Vector(2, u16)) == 4); + try expect(@alignOf(@Vector(2, u8)) == switch (builtin.zig_backend) { + else => 2, + .stage2_x86_64 => 16, + }); + try expect(@alignOf(@Vector(2, u1)) == switch (builtin.zig_backend) { + else => 1, + .stage2_x86_64 => 16, + }); + try expect(@alignOf(@Vector(1, u1)) == switch (builtin.zig_backend) { + else => 1, + .stage2_x86_64 => 16, + }); + try expect(@alignOf(@Vector(2, u16)) == switch (builtin.zig_backend) { + else => 4, + .stage2_x86_64 => 16, + }); } test "loading the second vector from a slice of vectors" { @@ -1316,10 +1332,10 @@ test "modRem with zero divisor" { test "array operands to shuffle are coerced to vectors" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; const mask = [5]i32{ -1, 0, 1, 2, 3 };