mirror of
https://github.com/ziglang/zig.git
synced 2025-12-06 06:13:07 +00:00
x86_64: implement @shuffle
This commit is contained in:
parent
defef3f1a1
commit
2fdc9e6ae8
@ -200,7 +200,8 @@ const CompressGeneric = struct {
|
||||
}
|
||||
};
|
||||
|
||||
const compress = if (builtin.cpu.arch == .x86_64 and builtin.zig_backend != .stage2_x86_64)
|
||||
const compress = if (builtin.cpu.arch == .x86_64 and
|
||||
(builtin.zig_backend != .stage2_x86_64 or std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3)))
|
||||
CompressVectorized.compress
|
||||
else
|
||||
CompressGeneric.compress;
|
||||
|
||||
@ -302,7 +302,11 @@ fn SalsaNonVecImpl(comptime rounds: comptime_int) type {
|
||||
};
|
||||
}
|
||||
|
||||
const SalsaImpl = if (builtin.cpu.arch == .x86_64 and builtin.zig_backend != .stage2_x86_64) SalsaVecImpl else SalsaNonVecImpl;
|
||||
const SalsaImpl = if (builtin.cpu.arch == .x86_64 and
|
||||
(builtin.zig_backend != .stage2_x86_64 or std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3)))
|
||||
SalsaVecImpl
|
||||
else
|
||||
SalsaNonVecImpl;
|
||||
|
||||
fn keyToWords(key: [32]u8) [8]u32 {
|
||||
var k: [8]u32 = undefined;
|
||||
|
||||
@ -1286,5 +1286,6 @@ test "hasUniqueRepresentation" {
|
||||
try testing.expect(!hasUniqueRepresentation([]u8));
|
||||
try testing.expect(!hasUniqueRepresentation([]const u8));
|
||||
|
||||
try testing.expect(hasUniqueRepresentation(@Vector(4, u16)));
|
||||
try testing.expect(hasUniqueRepresentation(@Vector(std.simd.suggestVectorLength(u8) orelse 1, u8)));
|
||||
try testing.expect(@sizeOf(@Vector(3, u8)) == 3 or !hasUniqueRepresentation(@Vector(3, u8)));
|
||||
}
|
||||
|
||||
@ -239,7 +239,7 @@ pub fn utf8ValidateSlice(input: []const u8) bool {
|
||||
fn utf8ValidateSliceImpl(input: []const u8, comptime surrogates: Surrogates) bool {
|
||||
var remaining = input;
|
||||
|
||||
const chunk_len = std.simd.suggestVectorLength(u8) orelse 1;
|
||||
if (std.simd.suggestVectorLength(u8)) |chunk_len| {
|
||||
const Chunk = @Vector(chunk_len, u8);
|
||||
|
||||
// Fast path. Check for and skip ASCII characters at the start of the input.
|
||||
@ -252,6 +252,7 @@ fn utf8ValidateSliceImpl(input: []const u8, comptime surrogates: Surrogates) boo
|
||||
}
|
||||
remaining = remaining[chunk_len..];
|
||||
}
|
||||
}
|
||||
|
||||
// default lowest and highest continuation byte
|
||||
const lo_cb = 0b10000000;
|
||||
@ -937,8 +938,11 @@ fn utf16LeToUtf8ArrayListImpl(
|
||||
try array_list.ensureTotalCapacityPrecise(utf16le.len);
|
||||
|
||||
var remaining = utf16le;
|
||||
if (builtin.zig_backend != .stage2_x86_64) {
|
||||
const chunk_len = std.simd.suggestVectorLength(u16) orelse 1;
|
||||
if (builtin.zig_backend != .stage2_x86_64 or
|
||||
comptime (std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3) and
|
||||
!std.Target.x86.featureSetHasAny(builtin.cpu.features, .{ .prefer_256_bit, .avx })))
|
||||
vectorized: {
|
||||
const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized;
|
||||
const Chunk = @Vector(chunk_len, u16);
|
||||
|
||||
// Fast path. Check for and encode ASCII characters at the start of the input.
|
||||
@ -1029,8 +1033,11 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr
|
||||
var end_index: usize = 0;
|
||||
|
||||
var remaining = utf16le;
|
||||
if (builtin.zig_backend != .stage2_x86_64) {
|
||||
const chunk_len = std.simd.suggestVectorLength(u16) orelse 1;
|
||||
if (builtin.zig_backend != .stage2_x86_64 or
|
||||
comptime (std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3) and
|
||||
!std.Target.x86.featureSetHasAny(builtin.cpu.features, .{ .prefer_256_bit, .avx })))
|
||||
vectorized: {
|
||||
const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized;
|
||||
const Chunk = @Vector(chunk_len, u16);
|
||||
|
||||
// Fast path. Check for and encode ASCII characters at the start of the input.
|
||||
@ -1155,8 +1162,12 @@ fn utf8ToUtf16LeArrayListImpl(array_list: *std.ArrayList(u16), utf8: []const u8,
|
||||
|
||||
var remaining = utf8;
|
||||
// Need support for std.simd.interlace
|
||||
if (builtin.zig_backend != .stage2_x86_64 and comptime !builtin.cpu.arch.isMIPS()) {
|
||||
const chunk_len = std.simd.suggestVectorLength(u8) orelse 1;
|
||||
if ((builtin.zig_backend != .stage2_x86_64 or
|
||||
comptime (std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3) and
|
||||
!std.Target.x86.featureSetHasAny(builtin.cpu.features, .{ .prefer_256_bit, .avx }))) and
|
||||
comptime !builtin.cpu.arch.isMIPS())
|
||||
vectorized: {
|
||||
const chunk_len = @divExact(std.simd.suggestVectorLength(u8) orelse break :vectorized, 2);
|
||||
const Chunk = @Vector(chunk_len, u8);
|
||||
|
||||
// Fast path. Check for and encode ASCII characters at the start of the input.
|
||||
@ -1232,8 +1243,12 @@ pub fn utf8ToUtf16LeImpl(utf16le: []u16, utf8: []const u8, comptime surrogates:
|
||||
|
||||
var remaining = utf8;
|
||||
// Need support for std.simd.interlace
|
||||
if (builtin.zig_backend != .stage2_x86_64 and comptime !builtin.cpu.arch.isMIPS()) {
|
||||
const chunk_len = std.simd.suggestVectorLength(u8) orelse 1;
|
||||
if ((builtin.zig_backend != .stage2_x86_64 or
|
||||
comptime (std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3) and
|
||||
!std.Target.x86.featureSetHasAny(builtin.cpu.features, .{ .prefer_256_bit, .avx }))) and
|
||||
comptime !builtin.cpu.arch.isMIPS())
|
||||
vectorized: {
|
||||
const chunk_len = @divExact(std.simd.suggestVectorLength(u8) orelse break :vectorized, 2);
|
||||
const Chunk = @Vector(chunk_len, u8);
|
||||
|
||||
// Fast path. Check for and encode ASCII characters at the start of the input.
|
||||
|
||||
@ -308,14 +308,12 @@ test "promoteIntLiteral" {
|
||||
|
||||
/// Convert from clang __builtin_shufflevector index to Zig @shuffle index
|
||||
/// clang requires __builtin_shufflevector index arguments to be integer constants.
|
||||
/// negative values for `this_index` indicate "don't care" so we arbitrarily choose 0
|
||||
/// negative values for `this_index` indicate "don't care".
|
||||
/// clang enforces that `this_index` is less than the total number of vector elements
|
||||
/// See https://ziglang.org/documentation/master/#shuffle
|
||||
/// See https://clang.llvm.org/docs/LanguageExtensions.html#langext-builtin-shufflevector
|
||||
pub fn shuffleVectorIndex(comptime this_index: c_int, comptime source_vector_len: usize) i32 {
|
||||
if (this_index <= 0) return 0;
|
||||
|
||||
const positive_index = @as(usize, @intCast(this_index));
|
||||
const positive_index = std.math.cast(usize, this_index) orelse return undefined;
|
||||
if (positive_index < source_vector_len) return @as(i32, @intCast(this_index));
|
||||
const b_index = positive_index - source_vector_len;
|
||||
return ~@as(i32, @intCast(b_index));
|
||||
@ -324,7 +322,7 @@ pub fn shuffleVectorIndex(comptime this_index: c_int, comptime source_vector_len
|
||||
test "shuffleVectorIndex" {
|
||||
const vector_len: usize = 4;
|
||||
|
||||
try testing.expect(shuffleVectorIndex(-1, vector_len) == 0);
|
||||
_ = shuffleVectorIndex(-1, vector_len);
|
||||
|
||||
try testing.expect(shuffleVectorIndex(0, vector_len) == 0);
|
||||
try testing.expect(shuffleVectorIndex(1, vector_len) == 1);
|
||||
|
||||
@ -3587,6 +3587,7 @@ pub const Alignment = enum(u6) {
|
||||
@"8" = 3,
|
||||
@"16" = 4,
|
||||
@"32" = 5,
|
||||
@"64" = 6,
|
||||
none = std.math.maxInt(u6),
|
||||
_,
|
||||
|
||||
|
||||
@ -2610,7 +2610,8 @@ fn restoreState(self: *Self, state: State, deaths: []const Air.Inst.Index, compt
|
||||
|
||||
const ExpectedContents = [@typeInfo(RegisterManager.TrackedRegisters).Array.len]RegisterLock;
|
||||
var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) =
|
||||
if (opts.update_tracking) ({}) else std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa);
|
||||
if (opts.update_tracking)
|
||||
{} else std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa);
|
||||
|
||||
var reg_locks = if (opts.update_tracking) {} else try std.ArrayList(RegisterLock).initCapacity(
|
||||
stack.get(),
|
||||
@ -14116,30 +14117,8 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
|
||||
else => {},
|
||||
},
|
||||
.Int => switch (ty.childType(mod).intInfo(mod).bits) {
|
||||
8 => switch (ty.vectorLen(mod)) {
|
||||
1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{
|
||||
.insert = .{ .vp_b, .insr },
|
||||
.extract = .{ .vp_b, .extr },
|
||||
} } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{
|
||||
.insert = .{ .p_b, .insr },
|
||||
.extract = .{ .p_b, .extr },
|
||||
} },
|
||||
2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
|
||||
.insert = .{ .vp_w, .insr },
|
||||
.extract = .{ .vp_w, .extr },
|
||||
} } else .{ .insert_extract = .{
|
||||
.insert = .{ .p_w, .insr },
|
||||
.extract = .{ .p_w, .extr },
|
||||
} },
|
||||
3...4 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_d, .mov }
|
||||
else
|
||||
.{ ._d, .mov } },
|
||||
5...8 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_q, .mov }
|
||||
else
|
||||
.{ ._q, .mov } },
|
||||
9...16 => return .{ .move = if (self.hasFeature(.avx))
|
||||
1...8 => switch (ty.vectorLen(mod)) {
|
||||
1...16 => return .{ .move = if (self.hasFeature(.avx))
|
||||
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
|
||||
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
|
||||
17...32 => if (self.hasFeature(.avx))
|
||||
@ -14149,23 +14128,8 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
|
||||
.{ .v_, .movdqu } },
|
||||
else => {},
|
||||
},
|
||||
16 => switch (ty.vectorLen(mod)) {
|
||||
1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
|
||||
.insert = .{ .vp_w, .insr },
|
||||
.extract = .{ .vp_w, .extr },
|
||||
} } else .{ .insert_extract = .{
|
||||
.insert = .{ .p_w, .insr },
|
||||
.extract = .{ .p_w, .extr },
|
||||
} },
|
||||
2 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_d, .mov }
|
||||
else
|
||||
.{ ._d, .mov } },
|
||||
3...4 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_q, .mov }
|
||||
else
|
||||
.{ ._q, .mov } },
|
||||
5...8 => return .{ .move = if (self.hasFeature(.avx))
|
||||
9...16 => switch (ty.vectorLen(mod)) {
|
||||
1...8 => return .{ .move = if (self.hasFeature(.avx))
|
||||
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
|
||||
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
|
||||
9...16 => if (self.hasFeature(.avx))
|
||||
@ -14175,16 +14139,8 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
|
||||
.{ .v_, .movdqu } },
|
||||
else => {},
|
||||
},
|
||||
32 => switch (ty.vectorLen(mod)) {
|
||||
1 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_d, .mov }
|
||||
else
|
||||
.{ ._d, .mov } },
|
||||
2 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_q, .mov }
|
||||
else
|
||||
.{ ._q, .mov } },
|
||||
3...4 => return .{ .move = if (self.hasFeature(.avx))
|
||||
17...32 => switch (ty.vectorLen(mod)) {
|
||||
1...4 => return .{ .move = if (self.hasFeature(.avx))
|
||||
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
|
||||
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
|
||||
5...8 => if (self.hasFeature(.avx))
|
||||
@ -14194,12 +14150,8 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
|
||||
.{ .v_, .movdqu } },
|
||||
else => {},
|
||||
},
|
||||
64 => switch (ty.vectorLen(mod)) {
|
||||
1 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_q, .mov }
|
||||
else
|
||||
.{ ._q, .mov } },
|
||||
2 => return .{ .move = if (self.hasFeature(.avx))
|
||||
33...64 => switch (ty.vectorLen(mod)) {
|
||||
1...2 => return .{ .move = if (self.hasFeature(.avx))
|
||||
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
|
||||
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
|
||||
3...4 => if (self.hasFeature(.avx))
|
||||
@ -14209,7 +14161,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
|
||||
.{ .v_, .movdqu } },
|
||||
else => {},
|
||||
},
|
||||
128 => switch (ty.vectorLen(mod)) {
|
||||
65...128 => switch (ty.vectorLen(mod)) {
|
||||
1 => return .{ .move = if (self.hasFeature(.avx))
|
||||
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
|
||||
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
|
||||
@ -14220,7 +14172,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
|
||||
.{ .v_, .movdqu } },
|
||||
else => {},
|
||||
},
|
||||
256 => switch (ty.vectorLen(mod)) {
|
||||
129...256 => switch (ty.vectorLen(mod)) {
|
||||
1 => if (self.hasFeature(.avx))
|
||||
return .{ .move = if (aligned)
|
||||
.{ .v_, .movdqa }
|
||||
@ -14232,11 +14184,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
|
||||
},
|
||||
.Pointer, .Optional => if (ty.childType(mod).isPtrAtRuntime(mod))
|
||||
switch (ty.vectorLen(mod)) {
|
||||
1 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_q, .mov }
|
||||
else
|
||||
.{ ._q, .mov } },
|
||||
2 => return .{ .move = if (self.hasFeature(.avx))
|
||||
1...2 => return .{ .move = if (self.hasFeature(.avx))
|
||||
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
|
||||
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
|
||||
3...4 => if (self.hasFeature(.avx))
|
||||
@ -14250,22 +14198,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
|
||||
unreachable,
|
||||
.Float => switch (ty.childType(mod).floatBits(self.target.*)) {
|
||||
16 => switch (ty.vectorLen(mod)) {
|
||||
1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
|
||||
.insert = .{ .vp_w, .insr },
|
||||
.extract = .{ .vp_w, .extr },
|
||||
} } else .{ .insert_extract = .{
|
||||
.insert = .{ .p_w, .insr },
|
||||
.extract = .{ .p_w, .extr },
|
||||
} },
|
||||
2 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_d, .mov }
|
||||
else
|
||||
.{ ._d, .mov } },
|
||||
3...4 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_q, .mov }
|
||||
else
|
||||
.{ ._q, .mov } },
|
||||
5...8 => return .{ .move = if (self.hasFeature(.avx))
|
||||
1...8 => return .{ .move = if (self.hasFeature(.avx))
|
||||
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
|
||||
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
|
||||
9...16 => if (self.hasFeature(.avx))
|
||||
@ -14276,15 +14209,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
|
||||
else => {},
|
||||
},
|
||||
32 => switch (ty.vectorLen(mod)) {
|
||||
1 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_ss, .mov }
|
||||
else
|
||||
.{ ._ss, .mov } },
|
||||
2 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_sd, .mov }
|
||||
else
|
||||
.{ ._sd, .mov } },
|
||||
3...4 => return .{ .move = if (self.hasFeature(.avx))
|
||||
1...4 => return .{ .move = if (self.hasFeature(.avx))
|
||||
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
|
||||
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
|
||||
5...8 => if (self.hasFeature(.avx))
|
||||
@ -14295,11 +14220,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
|
||||
else => {},
|
||||
},
|
||||
64 => switch (ty.vectorLen(mod)) {
|
||||
1 => return .{ .move = if (self.hasFeature(.avx))
|
||||
.{ .v_sd, .mov }
|
||||
else
|
||||
.{ ._sd, .mov } },
|
||||
2 => return .{ .move = if (self.hasFeature(.avx))
|
||||
1...2 => return .{ .move = if (self.hasFeature(.avx))
|
||||
if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu }
|
||||
else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } },
|
||||
3...4 => if (self.hasFeature(.avx))
|
||||
@ -16551,7 +16472,7 @@ fn airSelect(self: *Self, inst: Air.Inst.Index) !void {
|
||||
const vec_len = ty.vectorLen(mod);
|
||||
const elem_ty = ty.childType(mod);
|
||||
const elem_abi_size: u32 = @intCast(elem_ty.abiSize(mod));
|
||||
const abi_size = elem_abi_size * vec_len;
|
||||
const abi_size: u32 = @intCast(ty.abiSize(mod));
|
||||
const pred_ty = self.typeOf(pl_op.operand);
|
||||
|
||||
const result = result: {
|
||||
@ -16882,10 +16803,283 @@ fn airSelect(self: *Self, inst: Air.Inst.Index) !void {
|
||||
}
|
||||
|
||||
fn airShuffle(self: *Self, inst: Air.Inst.Index) !void {
|
||||
const mod = self.bin_file.comp.module.?;
|
||||
const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
|
||||
_ = ty_pl;
|
||||
return self.fail("TODO implement airShuffle for x86_64", .{});
|
||||
//return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
|
||||
const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data;
|
||||
|
||||
const dst_ty = self.typeOfIndex(inst);
|
||||
const elem_ty = dst_ty.childType(mod);
|
||||
const elem_abi_size: u32 = @intCast(elem_ty.abiSize(mod));
|
||||
const dst_abi_size: u32 = @intCast(dst_ty.abiSize(mod));
|
||||
const lhs_ty = self.typeOf(extra.a);
|
||||
const lhs_abi_size: u32 = @intCast(lhs_ty.abiSize(mod));
|
||||
const rhs_ty = self.typeOf(extra.b);
|
||||
const rhs_abi_size: u32 = @intCast(rhs_ty.abiSize(mod));
|
||||
const max_abi_size = @max(dst_abi_size, lhs_abi_size, rhs_abi_size);
|
||||
|
||||
const ExpectedContents = [32]?i32;
|
||||
var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) =
|
||||
std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa);
|
||||
const allocator = stack.get();
|
||||
|
||||
const mask_elems = try allocator.alloc(?i32, extra.mask_len);
|
||||
defer allocator.free(mask_elems);
|
||||
for (mask_elems, 0..) |*mask_elem, elem_index| {
|
||||
const mask_elem_val = Value
|
||||
.fromInterned(extra.mask).elemValue(mod, elem_index) catch unreachable;
|
||||
mask_elem.* = if (mask_elem_val.isUndef(mod))
|
||||
null
|
||||
else
|
||||
@intCast(mask_elem_val.toSignedInt(mod));
|
||||
}
|
||||
|
||||
const result = @as(?MCValue, result: {
|
||||
for (mask_elems) |mask_elem| {
|
||||
if (mask_elem) |_| break;
|
||||
} else break :result try self.allocRegOrMem(inst, true);
|
||||
|
||||
for (mask_elems, 0..) |mask_elem, elem_index| {
|
||||
if (mask_elem orelse continue != @as(i32, @intCast(elem_index))) break;
|
||||
} else {
|
||||
const lhs_mcv = try self.resolveInst(extra.a);
|
||||
if (self.reuseOperand(inst, extra.a, 0, lhs_mcv)) break :result lhs_mcv;
|
||||
const dst_mcv = try self.allocRegOrMem(inst, true);
|
||||
try self.genCopy(dst_ty, dst_mcv, lhs_mcv, .{});
|
||||
break :result dst_mcv;
|
||||
}
|
||||
|
||||
for (mask_elems, 0..) |mask_elem, elem_index| {
|
||||
if (mask_elem orelse continue != ~@as(i32, @intCast(elem_index))) break;
|
||||
} else {
|
||||
const rhs_mcv = try self.resolveInst(extra.b);
|
||||
if (self.reuseOperand(inst, extra.b, 1, rhs_mcv)) break :result rhs_mcv;
|
||||
const dst_mcv = try self.allocRegOrMem(inst, true);
|
||||
try self.genCopy(dst_ty, dst_mcv, rhs_mcv, .{});
|
||||
break :result dst_mcv;
|
||||
}
|
||||
|
||||
const has_avx = self.hasFeature(.avx);
|
||||
shufpd: {
|
||||
if (elem_abi_size != 8) break :shufpd;
|
||||
if (max_abi_size > @as(u32, if (has_avx) 32 else 16)) break :shufpd;
|
||||
|
||||
var control: u4 = 0b0_0_0_0;
|
||||
var sources = [1]?u1{null} ** 2;
|
||||
for (mask_elems, 0..) |maybe_mask_elem, elem_index| {
|
||||
const mask_elem = maybe_mask_elem orelse continue;
|
||||
const mask_elem_index: u2 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem);
|
||||
if (mask_elem_index & 0b10 != elem_index & 0b10) break :shufpd;
|
||||
|
||||
const source = @intFromBool(mask_elem < 0);
|
||||
if (sources[elem_index & 0b01]) |prev_source| {
|
||||
if (source != prev_source) break :shufpd;
|
||||
} else sources[elem_index & 0b01] = source;
|
||||
|
||||
control |= @as(u4, @intCast(mask_elem_index & 0b01)) << @intCast(elem_index);
|
||||
}
|
||||
if (sources[0] orelse break :shufpd == sources[1] orelse break :shufpd) break :shufpd;
|
||||
|
||||
const operands = [2]Air.Inst.Ref{ extra.a, extra.b };
|
||||
const operand_tys = [2]Type{ lhs_ty, rhs_ty };
|
||||
const lhs_mcv = try self.resolveInst(operands[sources[0].?]);
|
||||
const rhs_mcv = try self.resolveInst(operands[sources[1].?]);
|
||||
|
||||
const dst_mcv: MCValue = if (lhs_mcv.isRegister() and
|
||||
self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv))
|
||||
lhs_mcv
|
||||
else if (has_avx and lhs_mcv.isRegister())
|
||||
.{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
|
||||
else
|
||||
try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv);
|
||||
const dst_reg = dst_mcv.getReg().?;
|
||||
const dst_alias = registerAlias(dst_reg, max_abi_size);
|
||||
|
||||
if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
|
||||
.{ .v_pd, .shuf },
|
||||
dst_alias,
|
||||
registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
|
||||
try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)),
|
||||
Immediate.u(control),
|
||||
) else try self.asmRegisterRegisterRegisterImmediate(
|
||||
.{ .v_pd, .shuf },
|
||||
dst_alias,
|
||||
registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
|
||||
registerAlias(if (rhs_mcv.isRegister())
|
||||
rhs_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
|
||||
Immediate.u(control),
|
||||
) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
|
||||
.{ ._pd, .shuf },
|
||||
dst_alias,
|
||||
try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)),
|
||||
Immediate.u(control),
|
||||
) else try self.asmRegisterRegisterImmediate(
|
||||
.{ ._pd, .shuf },
|
||||
dst_alias,
|
||||
registerAlias(if (rhs_mcv.isRegister())
|
||||
rhs_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
|
||||
Immediate.u(control),
|
||||
);
|
||||
break :result dst_mcv;
|
||||
}
|
||||
|
||||
pshufb: {
|
||||
if (max_abi_size > 16) break :pshufb;
|
||||
if (!self.hasFeature(.ssse3)) break :pshufb;
|
||||
|
||||
const temp_regs =
|
||||
try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.sse);
|
||||
const temp_locks = self.register_manager.lockRegsAssumeUnused(2, temp_regs);
|
||||
defer for (temp_locks) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
const lhs_temp_alias = registerAlias(temp_regs[0], max_abi_size);
|
||||
try self.genSetReg(temp_regs[0], lhs_ty, .{ .air_ref = extra.a }, .{});
|
||||
|
||||
const rhs_temp_alias = registerAlias(temp_regs[1], max_abi_size);
|
||||
try self.genSetReg(temp_regs[1], rhs_ty, .{ .air_ref = extra.b }, .{});
|
||||
|
||||
var lhs_mask_elems: [16]InternPool.Index = undefined;
|
||||
for (lhs_mask_elems[0..max_abi_size], 0..) |*lhs_mask_elem, byte_index| {
|
||||
const elem_index = byte_index / elem_abi_size;
|
||||
lhs_mask_elem.* = try mod.intern(.{ .int = .{
|
||||
.ty = .u8_type,
|
||||
.storage = .{ .u64 = if (elem_index >= mask_elems.len) 0b1_00_00000 else elem: {
|
||||
const mask_elem = mask_elems[elem_index] orelse break :elem 0b1_00_00000;
|
||||
if (mask_elem < 0) break :elem 0b1_00_00000;
|
||||
const mask_elem_index: u31 = @intCast(mask_elem);
|
||||
const byte_off: u32 = @intCast(byte_index % elem_abi_size);
|
||||
break :elem @intCast(mask_elem_index * elem_abi_size + byte_off);
|
||||
} },
|
||||
} });
|
||||
}
|
||||
const lhs_mask_ty = try mod.vectorType(.{ .len = max_abi_size, .child = .u8_type });
|
||||
const lhs_mask_mcv = try self.genTypedValue(.{
|
||||
.ty = lhs_mask_ty,
|
||||
.val = Value.fromInterned(try mod.intern(.{ .aggregate = .{
|
||||
.ty = lhs_mask_ty.toIntern(),
|
||||
.storage = .{ .elems = lhs_mask_elems[0..max_abi_size] },
|
||||
} })),
|
||||
});
|
||||
const lhs_mask_mem: Memory = .{
|
||||
.base = .{ .reg = try self.copyToTmpRegister(Type.usize, lhs_mask_mcv.address()) },
|
||||
.mod = .{ .rm = .{ .size = Memory.Size.fromSize(@max(max_abi_size, 16)) } },
|
||||
};
|
||||
if (has_avx) try self.asmRegisterRegisterMemory(
|
||||
.{ .vp_b, .shuf },
|
||||
lhs_temp_alias,
|
||||
lhs_temp_alias,
|
||||
lhs_mask_mem,
|
||||
) else try self.asmRegisterMemory(
|
||||
.{ .p_b, .shuf },
|
||||
lhs_temp_alias,
|
||||
lhs_mask_mem,
|
||||
);
|
||||
|
||||
var rhs_mask_elems: [16]InternPool.Index = undefined;
|
||||
for (rhs_mask_elems[0..max_abi_size], 0..) |*rhs_mask_elem, byte_index| {
|
||||
const elem_index = byte_index / elem_abi_size;
|
||||
rhs_mask_elem.* = try mod.intern(.{ .int = .{
|
||||
.ty = .u8_type,
|
||||
.storage = .{ .u64 = if (elem_index >= mask_elems.len) 0b1_00_00000 else elem: {
|
||||
const mask_elem = mask_elems[elem_index] orelse break :elem 0b1_00_00000;
|
||||
if (mask_elem >= 0) break :elem 0b1_00_00000;
|
||||
const mask_elem_index: u31 = @intCast(~mask_elem);
|
||||
const byte_off: u32 = @intCast(byte_index % elem_abi_size);
|
||||
break :elem @intCast(mask_elem_index * elem_abi_size + byte_off);
|
||||
} },
|
||||
} });
|
||||
}
|
||||
const rhs_mask_ty = try mod.vectorType(.{ .len = max_abi_size, .child = .u8_type });
|
||||
const rhs_mask_mcv = try self.genTypedValue(.{
|
||||
.ty = rhs_mask_ty,
|
||||
.val = Value.fromInterned(try mod.intern(.{ .aggregate = .{
|
||||
.ty = rhs_mask_ty.toIntern(),
|
||||
.storage = .{ .elems = rhs_mask_elems[0..max_abi_size] },
|
||||
} })),
|
||||
});
|
||||
const rhs_mask_mem: Memory = .{
|
||||
.base = .{ .reg = try self.copyToTmpRegister(Type.usize, rhs_mask_mcv.address()) },
|
||||
.mod = .{ .rm = .{ .size = Memory.Size.fromSize(@max(max_abi_size, 16)) } },
|
||||
};
|
||||
if (has_avx) try self.asmRegisterRegisterMemory(
|
||||
.{ .vp_b, .shuf },
|
||||
rhs_temp_alias,
|
||||
rhs_temp_alias,
|
||||
rhs_mask_mem,
|
||||
) else try self.asmRegisterMemory(
|
||||
.{ .p_b, .shuf },
|
||||
rhs_temp_alias,
|
||||
rhs_mask_mem,
|
||||
);
|
||||
|
||||
if (has_avx) try self.asmRegisterRegisterRegister(
|
||||
.{ switch (elem_ty.zigTypeTag(mod)) {
|
||||
else => break :result null,
|
||||
.Int => .vp_,
|
||||
.Float => switch (elem_ty.floatBits(self.target.*)) {
|
||||
32 => .v_ps,
|
||||
64 => .v_pd,
|
||||
16, 80, 128 => break :result null,
|
||||
else => unreachable,
|
||||
},
|
||||
}, .@"or" },
|
||||
lhs_temp_alias,
|
||||
lhs_temp_alias,
|
||||
rhs_temp_alias,
|
||||
) else try self.asmRegisterRegister(
|
||||
.{ switch (elem_ty.zigTypeTag(mod)) {
|
||||
else => break :result null,
|
||||
.Int => .p_,
|
||||
.Float => switch (elem_ty.floatBits(self.target.*)) {
|
||||
32 => ._ps,
|
||||
64 => ._pd,
|
||||
16, 80, 128 => break :result null,
|
||||
else => unreachable,
|
||||
},
|
||||
}, .@"or" },
|
||||
lhs_temp_alias,
|
||||
rhs_temp_alias,
|
||||
);
|
||||
break :result .{ .register = temp_regs[0] };
|
||||
}
|
||||
|
||||
if (max_abi_size <= 16) {
|
||||
const lhs_mcv = try self.resolveInst(extra.a);
|
||||
const lhs_reg = if (lhs_mcv.isRegister())
|
||||
lhs_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(lhs_ty, lhs_mcv);
|
||||
const lhs_lock = self.register_manager.lockRegAssumeUnused(lhs_reg);
|
||||
defer self.register_manager.unlockReg(lhs_lock);
|
||||
|
||||
const rhs_mcv = try self.resolveInst(extra.b);
|
||||
const rhs_reg = if (rhs_mcv.isRegister())
|
||||
rhs_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(rhs_ty, rhs_mcv);
|
||||
const rhs_lock = self.register_manager.lockReg(rhs_reg);
|
||||
defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
//const dst_mcv = try self.register_manager.allocReg(inst, abi.RegisterClass.sse);
|
||||
switch (elem_ty.zigTypeTag(mod)) {
|
||||
.Float => switch (elem_ty.floatBits(self.target.*)) {
|
||||
16, 32 => {},
|
||||
64 => unreachable, // fully handled by shufpd
|
||||
80, 128 => unreachable, // all possible masks already handled
|
||||
else => unreachable,
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
|
||||
break :result null;
|
||||
}) orelse return self.fail("TODO implement airShuffle from {} and {} to {}", .{
|
||||
lhs_ty.fmt(mod), rhs_ty.fmt(mod), dst_ty.fmt(mod),
|
||||
});
|
||||
return self.finishAir(inst, result, .{ extra.a, extra.b, .none });
|
||||
}
|
||||
|
||||
fn airReduce(self: *Self, inst: Air.Inst.Index) !void {
|
||||
@ -17062,7 +17256,7 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
|
||||
},
|
||||
.Array, .Vector => {
|
||||
const elem_ty = result_ty.childType(mod);
|
||||
if (result_ty.isVector(mod) and elem_ty.bitSize(mod) == 1) {
|
||||
if (result_ty.isVector(mod) and elem_ty.toIntern() == .bool_type) {
|
||||
const result_size: u32 = @intCast(result_ty.abiSize(mod));
|
||||
const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
|
||||
try self.asmRegisterRegister(
|
||||
@ -18112,7 +18306,7 @@ fn splitType(self: *Self, ty: Type) ![2]Type {
|
||||
else => unreachable,
|
||||
},
|
||||
.float => Type.f32,
|
||||
.float_combine => try mod.vectorType(.{ .len = 2, .child = .f32_type }),
|
||||
.float_combine => try mod.arrayType(.{ .len = 2, .child = .f32_type }),
|
||||
.sse => Type.f64,
|
||||
else => break,
|
||||
};
|
||||
|
||||
@ -324,7 +324,7 @@ pub const Mnemonic = enum {
|
||||
// SSE3
|
||||
movddup, movshdup, movsldup,
|
||||
// SSSE3
|
||||
pabsb, pabsd, pabsw, palignr,
|
||||
pabsb, pabsd, pabsw, palignr, pshufb,
|
||||
// SSE4.1
|
||||
blendpd, blendps, blendvpd, blendvps,
|
||||
extractps,
|
||||
@ -389,7 +389,7 @@ pub const Mnemonic = enum {
|
||||
vpmovmskb,
|
||||
vpmulhw, vpmulld, vpmullw,
|
||||
vpor,
|
||||
vpshufd, vpshufhw, vpshuflw,
|
||||
vpshufb, vpshufd, vpshufhw, vpshuflw,
|
||||
vpslld, vpslldq, vpsllq, vpsllw,
|
||||
vpsrad, vpsraq, vpsraw,
|
||||
vpsrld, vpsrldq, vpsrlq, vpsrlw,
|
||||
|
||||
@ -1185,6 +1185,8 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .palignr, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0f }, 0, .none, .ssse3 },
|
||||
|
||||
.{ .pshufb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x00 }, 0, .none, .ssse3 },
|
||||
|
||||
// SSE4.1
|
||||
.{ .blendpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .none, .sse4_1 },
|
||||
|
||||
@ -1593,6 +1595,8 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .vpor, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpshufb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x00 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpshufd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x70 }, 0, .vex_128_wig, .avx },
|
||||
|
||||
.{ .vpshufhw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf3, 0x0f, 0x70 }, 0, .vex_128_wig, .avx },
|
||||
@ -1820,6 +1824,7 @@ pub const table = [_]Entry{
|
||||
|
||||
.{ .vpor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
.{ .vpshufb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x00 }, 0, .vex_256_wig, .avx2 },
|
||||
.{ .vpshufd, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x70 }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
.{ .vpshufhw, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0xf3, 0x0f, 0x70 }, 0, .vex_256_wig, .avx2 },
|
||||
|
||||
@ -405,7 +405,7 @@ pub fn generateSymbol(
|
||||
.vector_type => |vector_type| {
|
||||
const abi_size = math.cast(usize, typed_value.ty.abiSize(mod)) orelse
|
||||
return error.Overflow;
|
||||
if (Type.fromInterned(vector_type.child).bitSize(mod) == 1) {
|
||||
if (vector_type.child == .bool_type) {
|
||||
const bytes = try code.addManyAsSlice(abi_size);
|
||||
@memset(bytes, 0xaa);
|
||||
var index: usize = 0;
|
||||
@ -443,7 +443,8 @@ pub fn generateSymbol(
|
||||
},
|
||||
}) byte.* |= mask else byte.* &= ~mask;
|
||||
}
|
||||
} else switch (aggregate.storage) {
|
||||
} else {
|
||||
switch (aggregate.storage) {
|
||||
.bytes => |bytes| try code.appendSlice(bytes),
|
||||
.elems, .repeated_elem => {
|
||||
var index: u64 = 0;
|
||||
@ -465,15 +466,11 @@ pub fn generateSymbol(
|
||||
},
|
||||
}
|
||||
|
||||
const padding = abi_size - (math.cast(usize, math.divCeil(
|
||||
u64,
|
||||
Type.fromInterned(vector_type.child).bitSize(mod) * vector_type.len,
|
||||
8,
|
||||
) catch |err| switch (err) {
|
||||
error.DivisionByZero => unreachable,
|
||||
else => |e| return e,
|
||||
}) orelse return error.Overflow);
|
||||
const padding = abi_size -
|
||||
(math.cast(usize, Type.fromInterned(vector_type.child).abiSize(mod) * vector_type.len) orelse
|
||||
return error.Overflow);
|
||||
if (padding > 0) try code.appendNTimes(0, padding);
|
||||
}
|
||||
},
|
||||
.anon_struct_type => |tuple| {
|
||||
const struct_begin = code.items.len;
|
||||
|
||||
38
src/type.zig
38
src/type.zig
@ -905,12 +905,29 @@ pub const Type = struct {
|
||||
return Type.fromInterned(array_type.child).abiAlignmentAdvanced(mod, strat);
|
||||
},
|
||||
.vector_type => |vector_type| {
|
||||
const bits_u64 = try bitSizeAdvanced(Type.fromInterned(vector_type.child), mod, opt_sema);
|
||||
const bits: u32 = @intCast(bits_u64);
|
||||
const bytes = ((bits * vector_type.len) + 7) / 8;
|
||||
if (vector_type.len == 0) return .{ .scalar = .@"1" };
|
||||
switch (mod.comp.getZigBackend()) {
|
||||
else => {
|
||||
const elem_bits: u32 = @intCast(try Type.fromInterned(vector_type.child).bitSizeAdvanced(mod, opt_sema));
|
||||
if (elem_bits == 0) return .{ .scalar = .@"1" };
|
||||
const bytes = ((elem_bits * vector_type.len) + 7) / 8;
|
||||
const alignment = std.math.ceilPowerOfTwoAssert(u32, bytes);
|
||||
return .{ .scalar = Alignment.fromByteUnits(alignment) };
|
||||
},
|
||||
.stage2_x86_64 => {
|
||||
if (vector_type.child == .bool_type) return .{ .scalar = intAbiAlignment(@intCast(vector_type.len), target) };
|
||||
const elem_bytes: u32 = @intCast((try Type.fromInterned(vector_type.child).abiSizeAdvanced(mod, strat)).scalar);
|
||||
if (elem_bytes == 0) return .{ .scalar = .@"1" };
|
||||
const bytes = elem_bytes * vector_type.len;
|
||||
if (bytes > 32 and std.Target.x86.featureSetHas(target.cpu.features, .avx512f)) return .{ .scalar = .@"64" };
|
||||
if (bytes > 16 and std.Target.x86.featureSetHas(
|
||||
target.cpu.features,
|
||||
if (Type.fromInterned(vector_type.child).isRuntimeFloat()) .avx else .avx2,
|
||||
)) return .{ .scalar = .@"32" };
|
||||
return .{ .scalar = .@"16" };
|
||||
},
|
||||
}
|
||||
},
|
||||
|
||||
.opt_type => return abiAlignmentAdvancedOptional(ty, mod, strat),
|
||||
.error_union_type => |info| return abiAlignmentAdvancedErrorUnion(ty, mod, strat, Type.fromInterned(info.payload_type)),
|
||||
@ -1237,9 +1254,6 @@ pub const Type = struct {
|
||||
.storage = .{ .lazy_size = ty.toIntern() },
|
||||
} }))) },
|
||||
};
|
||||
const elem_bits = try Type.fromInterned(vector_type.child).bitSizeAdvanced(mod, opt_sema);
|
||||
const total_bits = elem_bits * vector_type.len;
|
||||
const total_bytes = (total_bits + 7) / 8;
|
||||
const alignment = switch (try ty.abiAlignmentAdvanced(mod, strat)) {
|
||||
.scalar => |x| x,
|
||||
.val => return .{ .val = Value.fromInterned((try mod.intern(.{ .int = .{
|
||||
@ -1247,6 +1261,18 @@ pub const Type = struct {
|
||||
.storage = .{ .lazy_size = ty.toIntern() },
|
||||
} }))) },
|
||||
};
|
||||
const total_bytes = switch (mod.comp.getZigBackend()) {
|
||||
else => total_bytes: {
|
||||
const elem_bits = try Type.fromInterned(vector_type.child).bitSizeAdvanced(mod, opt_sema);
|
||||
const total_bits = elem_bits * vector_type.len;
|
||||
break :total_bytes (total_bits + 7) / 8;
|
||||
},
|
||||
.stage2_x86_64 => total_bytes: {
|
||||
if (vector_type.child == .bool_type) break :total_bytes std.math.divCeil(u32, vector_type.len, 8) catch unreachable;
|
||||
const elem_bytes: u32 = @intCast((try Type.fromInterned(vector_type.child).abiSizeAdvanced(mod, strat)).scalar);
|
||||
break :total_bytes elem_bytes * vector_type.len;
|
||||
},
|
||||
};
|
||||
return AbiSizeAdvanced{ .scalar = alignment.forward(total_bytes) };
|
||||
},
|
||||
|
||||
|
||||
@ -336,7 +336,7 @@ test "comptime @bitCast packed struct to int and back" {
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
|
||||
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
|
||||
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
|
||||
|
||||
if (builtin.zig_backend == .stage2_llvm and native_endian == .big) {
|
||||
// https://github.com/ziglang/zig/issues/13782
|
||||
|
||||
@ -2441,6 +2441,7 @@ test "@intFromBool on vector" {
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
|
||||
|
||||
const S = struct {
|
||||
fn doTheTest() !void {
|
||||
|
||||
@ -4,10 +4,11 @@ const mem = std.mem;
|
||||
const expect = std.testing.expect;
|
||||
|
||||
test "@shuffle int" {
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64 and
|
||||
!comptime std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3)) return error.SkipZigTest;
|
||||
|
||||
const S = struct {
|
||||
fn doTheTest() !void {
|
||||
|
||||
@ -29,7 +29,7 @@ test "vector wrap operators" {
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64 and
|
||||
!comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO
|
||||
!comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest;
|
||||
|
||||
const S = struct {
|
||||
fn doTheTest() !void {
|
||||
@ -906,22 +906,26 @@ test "vector @reduce comptime" {
|
||||
}
|
||||
|
||||
test "mask parameter of @shuffle is comptime scope" {
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64 and
|
||||
!comptime std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3)) return error.SkipZigTest;
|
||||
|
||||
const __v4hi = @Vector(4, i16);
|
||||
var v4_a = __v4hi{ 0, 0, 0, 0 };
|
||||
var v4_b = __v4hi{ 0, 0, 0, 0 };
|
||||
var v4_a = __v4hi{ 1, 2, 3, 4 };
|
||||
var v4_b = __v4hi{ 5, 6, 7, 8 };
|
||||
_ = .{ &v4_a, &v4_b };
|
||||
const shuffled: __v4hi = @shuffle(i16, v4_a, v4_b, @Vector(4, i32){
|
||||
std.zig.c_translation.shuffleVectorIndex(0, @typeInfo(@TypeOf(v4_a)).Vector.len),
|
||||
std.zig.c_translation.shuffleVectorIndex(0, @typeInfo(@TypeOf(v4_a)).Vector.len),
|
||||
std.zig.c_translation.shuffleVectorIndex(0, @typeInfo(@TypeOf(v4_a)).Vector.len),
|
||||
std.zig.c_translation.shuffleVectorIndex(0, @typeInfo(@TypeOf(v4_a)).Vector.len),
|
||||
std.zig.c_translation.shuffleVectorIndex(2, @typeInfo(@TypeOf(v4_a)).Vector.len),
|
||||
std.zig.c_translation.shuffleVectorIndex(4, @typeInfo(@TypeOf(v4_a)).Vector.len),
|
||||
std.zig.c_translation.shuffleVectorIndex(6, @typeInfo(@TypeOf(v4_a)).Vector.len),
|
||||
});
|
||||
_ = shuffled;
|
||||
try expect(shuffled[0] == 1);
|
||||
try expect(shuffled[1] == 3);
|
||||
try expect(shuffled[2] == 5);
|
||||
try expect(shuffled[3] == 7);
|
||||
}
|
||||
|
||||
test "saturating add" {
|
||||
@ -1177,10 +1181,22 @@ test "@shlWithOverflow" {
|
||||
}
|
||||
|
||||
test "alignment of vectors" {
|
||||
try expect(@alignOf(@Vector(2, u8)) == 2);
|
||||
try expect(@alignOf(@Vector(2, u1)) == 1);
|
||||
try expect(@alignOf(@Vector(1, u1)) == 1);
|
||||
try expect(@alignOf(@Vector(2, u16)) == 4);
|
||||
try expect(@alignOf(@Vector(2, u8)) == switch (builtin.zig_backend) {
|
||||
else => 2,
|
||||
.stage2_x86_64 => 16,
|
||||
});
|
||||
try expect(@alignOf(@Vector(2, u1)) == switch (builtin.zig_backend) {
|
||||
else => 1,
|
||||
.stage2_x86_64 => 16,
|
||||
});
|
||||
try expect(@alignOf(@Vector(1, u1)) == switch (builtin.zig_backend) {
|
||||
else => 1,
|
||||
.stage2_x86_64 => 16,
|
||||
});
|
||||
try expect(@alignOf(@Vector(2, u16)) == switch (builtin.zig_backend) {
|
||||
else => 4,
|
||||
.stage2_x86_64 => 16,
|
||||
});
|
||||
}
|
||||
|
||||
test "loading the second vector from a slice of vectors" {
|
||||
@ -1316,10 +1332,10 @@ test "modRem with zero divisor" {
|
||||
|
||||
test "array operands to shuffle are coerced to vectors" {
|
||||
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
|
||||
|
||||
const mask = [5]i32{ -1, 0, 1, 2, 3 };
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user