compiler: implement better shuffle AIR

Runtime `@shuffle` has two cases which backends generally want to handle
differently for efficiency:

* One runtime vector operand; some result elements may be comptime-known
* Two runtime vector operands; some result elements may be undefined

The latter case happens if both vectors given to `@shuffle` are
runtime-known and they are both used (i.e. the mask refers to them).
Otherwise, if the result is not entirely comptime-known, we are in the
former case. `Sema` now diffentiates these two cases in the AIR so that
backends can easily handle them however they want to. Note that this
*doesn't* really involve Sema doing any more work than it would
otherwise need to, so there's not really a negative here!

Most existing backends have their lowerings for `@shuffle` migrated in
this commit. The LLVM backend uses new lowerings suggested by Jacob as
ones which it will handle effectively. The x86_64 backend has not yet
been migrated; for now there's a panic in there. Jacob will implement
that before this is merged anywhere.
This commit is contained in:
mlugg 2025-05-26 05:07:13 +01:00
parent b48d6ff619
commit add2976a9b
No known key found for this signature in database
GPG Key ID: 3F5B7DCCBF4AF02E
18 changed files with 755 additions and 321 deletions

View File

@ -699,9 +699,21 @@ pub const Inst = struct {
/// equal to the scalar value.
/// Uses the `ty_op` field.
splat,
/// Constructs a vector by selecting elements from `a` and `b` based on `mask`.
/// Uses the `ty_pl` field with payload `Shuffle`.
shuffle,
/// Constructs a vector by selecting elements from a single vector based on a mask. Each
/// mask element is either an index into the vector, or a comptime-known value, or "undef".
/// Uses the `ty_pl` field, where the payload index points to:
/// 1. mask_elem: ShuffleOneMask // for each `mask_len`, which comes from `ty_pl.ty`
/// 2. operand: Ref // guaranteed not to be an interned value
/// See `unwrapShufleOne`.
shuffle_one,
/// Constructs a vector by selecting elements from two vectors based on a mask. Each mask
/// element is either an index into one of the vectors, or "undef".
/// Uses the `ty_pl` field, where the payload index points to:
/// 1. mask_elem: ShuffleOneMask // for each `mask_len`, which comes from `ty_pl.ty`
/// 2. operand_a: Ref // guaranteed not to be an interned value
/// 3. operand_b: Ref // guaranteed not to be an interned value
/// See `unwrapShufleTwo`.
shuffle_two,
/// Constructs a vector element-wise from `a` or `b` based on `pred`.
/// Uses the `pl_op` field with `pred` as operand, and payload `Bin`.
select,
@ -1299,13 +1311,6 @@ pub const FieldParentPtr = struct {
field_index: u32,
};
pub const Shuffle = struct {
a: Inst.Ref,
b: Inst.Ref,
mask: InternPool.Index,
mask_len: u32,
};
pub const VectorCmp = struct {
lhs: Inst.Ref,
rhs: Inst.Ref,
@ -1320,6 +1325,64 @@ pub const VectorCmp = struct {
}
};
/// Used by `Inst.Tag.shuffle_one`. Represents a mask element which either indexes into a
/// runtime-known vector, or is a comptime-known value.
pub const ShuffleOneMask = packed struct(u32) {
index: u31,
kind: enum(u1) { elem, value },
pub fn elem(idx: u32) ShuffleOneMask {
return .{ .index = @intCast(idx), .kind = .elem };
}
pub fn value(val: Value) ShuffleOneMask {
return .{ .index = @intCast(@intFromEnum(val.toIntern())), .kind = .value };
}
pub const Unwrapped = union(enum) {
/// The resulting element is this index into the runtime vector.
elem: u32,
/// The resulting element is this comptime-known value.
/// It is correctly typed. It might be `undefined`.
value: InternPool.Index,
};
pub fn unwrap(raw: ShuffleOneMask) Unwrapped {
return switch (raw.kind) {
.elem => .{ .elem = raw.index },
.value => .{ .value = @enumFromInt(raw.index) },
};
}
};
/// Used by `Inst.Tag.shuffle_two`. Represents a mask element which either indexes into one
/// of two runtime-known vectors, or is undefined.
pub const ShuffleTwoMask = enum(u32) {
undef = std.math.maxInt(u32),
_,
pub fn aElem(idx: u32) ShuffleTwoMask {
return @enumFromInt(idx << 1);
}
pub fn bElem(idx: u32) ShuffleTwoMask {
return @enumFromInt(idx << 1 | 1);
}
pub const Unwrapped = union(enum) {
/// The resulting element is this index into the first runtime vector.
a_elem: u32,
/// The resulting element is this index into the second runtime vector.
b_elem: u32,
/// The resulting element is `undefined`.
undef,
};
pub fn unwrap(raw: ShuffleTwoMask) Unwrapped {
switch (raw) {
.undef => return .undef,
_ => {},
}
const x = @intFromEnum(raw);
return switch (@as(u1, @truncate(x))) {
0 => .{ .a_elem = x >> 1 },
1 => .{ .b_elem = x >> 1 },
};
}
};
/// Trailing:
/// 0. `Inst.Ref` for every outputs_len
/// 1. `Inst.Ref` for every inputs_len
@ -1503,7 +1566,6 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool)
.cmpxchg_weak,
.cmpxchg_strong,
.slice,
.shuffle,
.aggregate_init,
.union_init,
.field_parent_ptr,
@ -1517,6 +1579,8 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool)
.ptr_sub,
.try_ptr,
.try_ptr_cold,
.shuffle_one,
.shuffle_two,
=> return datas[@intFromEnum(inst)].ty_pl.ty.toType(),
.not,
@ -1903,7 +1967,8 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool {
.reduce,
.reduce_optimized,
.splat,
.shuffle,
.shuffle_one,
.shuffle_two,
.select,
.is_named_enum_value,
.tag_name,
@ -2030,6 +2095,48 @@ pub fn unwrapSwitch(air: *const Air, switch_inst: Inst.Index) UnwrappedSwitch {
};
}
pub fn unwrapShuffleOne(air: *const Air, zcu: *const Zcu, inst_index: Inst.Index) struct {
result_ty: Type,
operand: Inst.Ref,
mask: []const ShuffleOneMask,
} {
const inst = air.instructions.get(@intFromEnum(inst_index));
switch (inst.tag) {
.shuffle_one => {},
else => unreachable, // assertion failure
}
const result_ty: Type = .fromInterned(inst.data.ty_pl.ty.toInterned().?);
const mask_len: u32 = result_ty.vectorLen(zcu);
const extra_idx = inst.data.ty_pl.payload;
return .{
.result_ty = result_ty,
.operand = @enumFromInt(air.extra.items[extra_idx + mask_len]),
.mask = @ptrCast(air.extra.items[extra_idx..][0..mask_len]),
};
}
pub fn unwrapShuffleTwo(air: *const Air, zcu: *const Zcu, inst_index: Inst.Index) struct {
result_ty: Type,
operand_a: Inst.Ref,
operand_b: Inst.Ref,
mask: []const ShuffleTwoMask,
} {
const inst = air.instructions.get(@intFromEnum(inst_index));
switch (inst.tag) {
.shuffle_two => {},
else => unreachable, // assertion failure
}
const result_ty: Type = .fromInterned(inst.data.ty_pl.ty.toInterned().?);
const mask_len: u32 = result_ty.vectorLen(zcu);
const extra_idx = inst.data.ty_pl.payload;
return .{
.result_ty = result_ty,
.operand_a = @enumFromInt(air.extra.items[extra_idx + mask_len + 0]),
.operand_b = @enumFromInt(air.extra.items[extra_idx + mask_len + 1]),
.mask = @ptrCast(air.extra.items[extra_idx..][0..mask_len]),
};
}
pub const typesFullyResolved = types_resolved.typesFullyResolved;
pub const typeFullyResolved = types_resolved.checkType;
pub const valFullyResolved = types_resolved.checkVal;

View File

@ -521,7 +521,8 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
}
},
.splat,
.shuffle,
.shuffle_one,
.shuffle_two,
=> {},
.select,
=> if (l.features.contains(.scalarize_select)) continue :inst try l.scalarize(inst, .select_pl_op_bin),

View File

@ -15,6 +15,7 @@ const Liveness = @This();
const trace = @import("../tracy.zig").trace;
const Air = @import("../Air.zig");
const InternPool = @import("../InternPool.zig");
const Zcu = @import("../Zcu.zig");
pub const Verify = @import("Liveness/Verify.zig");
@ -136,12 +137,15 @@ fn LivenessPassData(comptime pass: LivenessPass) type {
};
}
pub fn analyze(gpa: Allocator, air: Air, intern_pool: *InternPool) Allocator.Error!Liveness {
pub fn analyze(zcu: *Zcu, air: Air, intern_pool: *InternPool) Allocator.Error!Liveness {
const tracy = trace(@src());
defer tracy.end();
const gpa = zcu.gpa;
var a: Analysis = .{
.gpa = gpa,
.zcu = zcu,
.air = air,
.tomb_bits = try gpa.alloc(
usize,
@ -220,6 +224,7 @@ const OperandCategory = enum {
pub fn categorizeOperand(
l: Liveness,
air: Air,
zcu: *Zcu,
inst: Air.Inst.Index,
operand: Air.Inst.Index,
ip: *const InternPool,
@ -511,10 +516,15 @@ pub fn categorizeOperand(
if (extra.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 2, .none);
return .none;
},
.shuffle => {
const extra = air.extraData(Air.Shuffle, air_datas[@intFromEnum(inst)].ty_pl.payload).data;
if (extra.a == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
if (extra.b == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
.shuffle_one => {
const unwrapped = air.unwrapShuffleOne(zcu, inst);
if (unwrapped.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
return .none;
},
.shuffle_two => {
const unwrapped = air.unwrapShuffleTwo(zcu, inst);
if (unwrapped.operand_a == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
if (unwrapped.operand_b == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
return .none;
},
.reduce, .reduce_optimized => {
@ -639,7 +649,7 @@ pub fn categorizeOperand(
var operand_live: bool = true;
for (&[_]Air.Inst.Index{ then_body[0], else_body[0] }) |cond_inst| {
if (l.categorizeOperand(air, cond_inst, operand, ip) == .tomb)
if (l.categorizeOperand(air, zcu, cond_inst, operand, ip) == .tomb)
operand_live = false;
switch (air_tags[@intFromEnum(cond_inst)]) {
@ -824,6 +834,7 @@ pub const BigTomb = struct {
/// In-progress data; on successful analysis converted into `Liveness`.
const Analysis = struct {
gpa: Allocator,
zcu: *Zcu,
air: Air,
intern_pool: *InternPool,
tomb_bits: []usize,
@ -1119,9 +1130,13 @@ fn analyzeInst(
const extra = a.air.extraData(Air.Bin, pl_op.payload).data;
return analyzeOperands(a, pass, data, inst, .{ pl_op.operand, extra.lhs, extra.rhs });
},
.shuffle => {
const extra = a.air.extraData(Air.Shuffle, inst_datas[@intFromEnum(inst)].ty_pl.payload).data;
return analyzeOperands(a, pass, data, inst, .{ extra.a, extra.b, .none });
.shuffle_one => {
const unwrapped = a.air.unwrapShuffleOne(a.zcu, inst);
return analyzeOperands(a, pass, data, inst, .{ unwrapped.operand, .none, .none });
},
.shuffle_two => {
const unwrapped = a.air.unwrapShuffleTwo(a.zcu, inst);
return analyzeOperands(a, pass, data, inst, .{ unwrapped.operand_a, unwrapped.operand_b, .none });
},
.reduce, .reduce_optimized => {
const reduce = inst_datas[@intFromEnum(inst)].reduce;

View File

@ -1,6 +1,7 @@
//! Verifies that Liveness information is valid.
gpa: std.mem.Allocator,
zcu: *Zcu,
air: Air,
liveness: Liveness,
live: LiveMap = .{},
@ -287,10 +288,13 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
try self.verifyInstOperands(inst, .{ extra.lhs, extra.rhs, .none });
},
.shuffle => {
const ty_pl = data[@intFromEnum(inst)].ty_pl;
const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data;
try self.verifyInstOperands(inst, .{ extra.a, extra.b, .none });
.shuffle_one => {
const unwrapped = self.air.unwrapShuffleOne(self.zcu, inst);
try self.verifyInstOperands(inst, .{ unwrapped.operand, .none, .none });
},
.shuffle_two => {
const unwrapped = self.air.unwrapShuffleTwo(self.zcu, inst);
try self.verifyInstOperands(inst, .{ unwrapped.operand_a, unwrapped.operand_b, .none });
},
.cmp_vector,
.cmp_vector_optimized,
@ -639,4 +643,5 @@ const log = std.log.scoped(.liveness_verify);
const Air = @import("../../Air.zig");
const Liveness = @import("../Liveness.zig");
const InternPool = @import("../../InternPool.zig");
const Zcu = @import("../../Zcu.zig");
const Verify = @This();

View File

@ -249,12 +249,22 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool {
if (!checkRef(extra.struct_operand, zcu)) return false;
},
.shuffle => {
const extra = air.extraData(Air.Shuffle, data.ty_pl.payload).data;
if (!checkType(data.ty_pl.ty.toType(), zcu)) return false;
if (!checkRef(extra.a, zcu)) return false;
if (!checkRef(extra.b, zcu)) return false;
if (!checkVal(Value.fromInterned(extra.mask), zcu)) return false;
.shuffle_one => {
const unwrapped = air.unwrapShuffleOne(zcu, inst);
if (!checkType(unwrapped.result_ty, zcu)) return false;
if (!checkRef(unwrapped.operand, zcu)) return false;
for (unwrapped.mask) |m| switch (m.unwrap()) {
.elem => {},
.value => |val| if (!checkVal(.fromInterned(val), zcu)) return false,
};
},
.shuffle_two => {
const unwrapped = air.unwrapShuffleTwo(zcu, inst);
if (!checkType(unwrapped.result_ty, zcu)) return false;
if (!checkRef(unwrapped.operand_a, zcu)) return false;
if (!checkRef(unwrapped.operand_b, zcu)) return false;
// No values to check because there are no comptime-known values other than undef
},
.cmpxchg_weak,

View File

@ -24256,8 +24256,8 @@ fn analyzeShuffle(
block: *Block,
src_node: std.zig.Ast.Node.Offset,
elem_ty: Type,
a_arg: Air.Inst.Ref,
b_arg: Air.Inst.Ref,
a_uncoerced: Air.Inst.Ref,
b_uncoerced: Air.Inst.Ref,
mask: Value,
mask_len: u32,
) CompileError!Air.Inst.Ref {
@ -24266,150 +24266,154 @@ fn analyzeShuffle(
const a_src = block.builtinCallArgSrc(src_node, 1);
const b_src = block.builtinCallArgSrc(src_node, 2);
const mask_src = block.builtinCallArgSrc(src_node, 3);
var a = a_arg;
var b = b_arg;
const res_ty = try pt.vectorType(.{
.len = mask_len,
.child = elem_ty.toIntern(),
});
const maybe_a_len = switch (sema.typeOf(a).zigTypeTag(zcu)) {
.array, .vector => sema.typeOf(a).arrayLen(zcu),
.undefined => null,
else => return sema.fail(block, a_src, "expected vector or array with element type '{}', found '{}'", .{
elem_ty.fmt(pt),
sema.typeOf(a).fmt(pt),
}),
// If the type of `a` is `@Type(.undefined)`, i.e. the argument is untyped, this is 0, because it is an error to index into this vector.
const a_len: u32 = switch (sema.typeOf(a_uncoerced).zigTypeTag(zcu)) {
.array, .vector => @intCast(sema.typeOf(a_uncoerced).arrayLen(zcu)),
.undefined => 0,
else => return sema.fail(block, a_src, "expected vector of '{}', found '{}'", .{ elem_ty.fmt(pt), sema.typeOf(a_uncoerced).fmt(pt) }),
};
const maybe_b_len = switch (sema.typeOf(b).zigTypeTag(zcu)) {
.array, .vector => sema.typeOf(b).arrayLen(zcu),
.undefined => null,
else => return sema.fail(block, b_src, "expected vector or array with element type '{}', found '{}'", .{
elem_ty.fmt(pt),
sema.typeOf(b).fmt(pt),
}),
const a_ty = try pt.vectorType(.{ .len = a_len, .child = elem_ty.toIntern() });
const a_coerced = try sema.coerce(block, a_ty, a_uncoerced, a_src);
// If the type of `b` is `@Type(.undefined)`, i.e. the argument is untyped, this is 0, because it is an error to index into this vector.
const b_len: u32 = switch (sema.typeOf(b_uncoerced).zigTypeTag(zcu)) {
.array, .vector => @intCast(sema.typeOf(b_uncoerced).arrayLen(zcu)),
.undefined => 0,
else => return sema.fail(block, b_src, "expected vector of '{}', found '{}'", .{ elem_ty.fmt(pt), sema.typeOf(b_uncoerced).fmt(pt) }),
};
if (maybe_a_len == null and maybe_b_len == null) {
return pt.undefRef(res_ty);
}
const a_len: u32 = @intCast(maybe_a_len orelse maybe_b_len.?);
const b_len: u32 = @intCast(maybe_b_len orelse a_len);
const b_ty = try pt.vectorType(.{ .len = b_len, .child = elem_ty.toIntern() });
const b_coerced = try sema.coerce(block, b_ty, b_uncoerced, b_src);
const a_ty = try pt.vectorType(.{
.len = a_len,
.child = elem_ty.toIntern(),
});
const b_ty = try pt.vectorType(.{
.len = b_len,
.child = elem_ty.toIntern(),
});
const result_ty = try pt.vectorType(.{ .len = mask_len, .child = elem_ty.toIntern() });
if (maybe_a_len == null) a = try pt.undefRef(a_ty) else a = try sema.coerce(block, a_ty, a, a_src);
if (maybe_b_len == null) b = try pt.undefRef(b_ty) else b = try sema.coerce(block, b_ty, b, b_src);
// We're going to pre-emptively reserve space in `sema.air_extra`. The reason for this is we need
// a `u32` buffer of length `mask_len` anyway, and putting it in `sema.air_extra` avoids a copy
// in the runtime case. If the result is comptime-known, we'll shrink `air_extra` back.
const air_extra_idx: u32 = @intCast(sema.air_extra.items.len);
const air_mask_buf = try sema.air_extra.addManyAsSlice(sema.gpa, mask_len);
const operand_info = [2]std.meta.Tuple(&.{ u64, LazySrcLoc, Type }){
.{ a_len, a_src, a_ty },
.{ b_len, b_src, b_ty },
};
// We want to interpret that buffer in `air_extra` in a few ways. Initially, we'll consider its
// elements as `Air.Inst.ShuffleTwoMask`, essentially representing the raw mask values; then, we'll
// convert it to `InternPool.Index` or `Air.Inst.ShuffleOneMask` if there are comptime-known operands.
const mask_ip_index: []InternPool.Index = @ptrCast(air_mask_buf);
const mask_shuffle_one: []Air.ShuffleOneMask = @ptrCast(air_mask_buf);
const mask_shuffle_two: []Air.ShuffleTwoMask = @ptrCast(air_mask_buf);
for (0..@intCast(mask_len)) |i| {
const elem = try mask.elemValue(pt, i);
if (elem.isUndef(zcu)) continue;
const elem_resolved = try sema.resolveLazyValue(elem);
const int = elem_resolved.toSignedInt(zcu);
var unsigned: u32 = undefined;
var chosen: u32 = undefined;
if (int >= 0) {
unsigned = @intCast(int);
chosen = 0;
} else {
unsigned = @intCast(~int);
chosen = 1;
}
if (unsigned >= operand_info[chosen][0]) {
const msg = msg: {
const msg = try sema.errMsg(mask_src, "mask index '{d}' has out-of-bounds selection", .{i});
errdefer msg.destroy(sema.gpa);
try sema.errNote(operand_info[chosen][1], msg, "selected index '{d}' out of bounds of '{}'", .{
unsigned,
operand_info[chosen][2].fmt(pt),
});
if (chosen == 0) {
try sema.errNote(b_src, msg, "selections from the second vector are specified with negative numbers", .{});
}
break :msg msg;
};
return sema.failWithOwnedErrorMsg(block, msg);
}
}
if (try sema.resolveValue(a)) |a_val| {
if (try sema.resolveValue(b)) |b_val| {
const values = try sema.arena.alloc(InternPool.Index, mask_len);
for (values, 0..) |*value, i| {
const mask_elem_val = try mask.elemValue(pt, i);
if (mask_elem_val.isUndef(zcu)) {
value.* = try pt.intern(.{ .undef = elem_ty.toIntern() });
// Initial loop: check mask elements, populate `mask_shuffle_two`.
var a_used = false;
var b_used = false;
for (mask_shuffle_two, 0..mask_len) |*out, mask_idx| {
const mask_val = try mask.elemValue(pt, mask_idx);
if (mask_val.isUndef(zcu)) {
out.* = .undef;
continue;
}
const int = mask_elem_val.toSignedInt(zcu);
const unsigned: u32 = @intCast(if (int >= 0) int else ~int);
values[i] = (try (if (int >= 0) a_val else b_val).elemValue(pt, unsigned)).toIntern();
// Safe because mask elements are `i32` and we already checked for undef:
const raw = (try sema.resolveLazyValue(mask_val)).toSignedInt(zcu);
if (raw >= 0) {
const idx: u32 = @intCast(raw);
a_used = true;
out.* = .aElem(idx);
if (idx >= a_len) return sema.failWithOwnedErrorMsg(block, msg: {
const msg = try sema.errMsg(mask_src, "mask element at index '{d}' selects out-of-bounds index", .{mask_idx});
errdefer msg.destroy(sema.gpa);
try sema.errNote(a_src, msg, "index '{d}' exceeds bounds of '{}' given here", .{ idx, a_ty.fmt(pt) });
if (idx < b_len) {
try sema.errNote(b_src, msg, "use '~@as(u32, {d})' to index into second vector given here", .{idx});
}
return Air.internedToRef((try pt.intern(.{ .aggregate = .{
.ty = res_ty.toIntern(),
.storage = .{ .elems = values },
} })));
}
}
// All static analysis passed, and not comptime.
// For runtime codegen, vectors a and b must be the same length. Here we
// recursively @shuffle the smaller vector to append undefined elements
// to it up to the length of the longer vector. This recursion terminates
// in 1 call because these calls to analyzeShuffle guarantee a_len == b_len.
if (a_len != b_len) {
const min_len = @min(a_len, b_len);
const max_src = if (a_len > b_len) a_src else b_src;
const max_len = try sema.usizeCast(block, max_src, @max(a_len, b_len));
const expand_mask_values = try sema.arena.alloc(InternPool.Index, max_len);
for (@intCast(0)..@intCast(min_len)) |i| {
expand_mask_values[i] = (try pt.intValue(.comptime_int, i)).toIntern();
}
for (@intCast(min_len)..@intCast(max_len)) |i| {
expand_mask_values[i] = .negative_one;
}
const expand_mask = try pt.intern(.{ .aggregate = .{
.ty = (try pt.vectorType(.{ .len = @intCast(max_len), .child = .comptime_int_type })).toIntern(),
.storage = .{ .elems = expand_mask_values },
} });
if (a_len < b_len) {
const undef = try pt.undefRef(a_ty);
a = try sema.analyzeShuffle(block, src_node, elem_ty, a, undef, Value.fromInterned(expand_mask), @intCast(max_len));
break :msg msg;
});
} else {
const undef = try pt.undefRef(b_ty);
b = try sema.analyzeShuffle(block, src_node, elem_ty, b, undef, Value.fromInterned(expand_mask), @intCast(max_len));
const idx: u32 = @intCast(~raw);
b_used = true;
out.* = .bElem(idx);
if (idx >= b_len) return sema.failWithOwnedErrorMsg(block, msg: {
const msg = try sema.errMsg(mask_src, "mask element at index '{d}' selects out-of-bounds index", .{mask_idx});
errdefer msg.destroy(sema.gpa);
try sema.errNote(b_src, msg, "index '{d}' exceeds bounds of '{}' given here", .{ idx, b_ty.fmt(pt) });
break :msg msg;
});
}
}
const maybe_a_val = try sema.resolveValue(a_coerced);
const maybe_b_val = try sema.resolveValue(b_coerced);
const a_rt = a_used and maybe_a_val == null;
const b_rt = b_used and maybe_b_val == null;
if (a_rt and b_rt) {
// Both operands are needed and runtime-known. We need a `[]ShuffleTwomask`... which is
// exactly what we already have in `mask_shuffle_two`! So, we're basically done already.
// We just need to append the two operands.
try sema.air_extra.ensureUnusedCapacity(sema.gpa, 2);
sema.appendRefsAssumeCapacity(&.{ a_coerced, b_coerced });
return block.addInst(.{
.tag = .shuffle,
.tag = .shuffle_two,
.data = .{ .ty_pl = .{
.ty = Air.internedToRef(res_ty.toIntern()),
.payload = try block.sema.addExtra(Air.Shuffle{
.a = a,
.b = b,
.mask = mask.toIntern(),
.mask_len = mask_len,
}),
.ty = Air.internedToRef(result_ty.toIntern()),
.payload = air_extra_idx,
} },
});
} else if (a_rt) {
// We need to convert the `ShuffleTwoMask` values to `ShuffleOneMask`.
for (mask_shuffle_two, mask_shuffle_one) |in, *out| {
out.* = switch (in.unwrap()) {
.undef => .value(try pt.undefValue(elem_ty)),
.a_elem => |idx| .elem(idx),
.b_elem => |idx| .value(try maybe_b_val.?.elemValue(pt, idx)),
};
}
// Now just append our single runtime operand, and we're done.
try sema.air_extra.ensureUnusedCapacity(sema.gpa, 1);
sema.appendRefsAssumeCapacity(&.{a_coerced});
return block.addInst(.{
.tag = .shuffle_one,
.data = .{ .ty_pl = .{
.ty = Air.internedToRef(result_ty.toIntern()),
.payload = air_extra_idx,
} },
});
} else if (b_rt) {
// We need to convert the `ShuffleTwoMask` values to `ShuffleOneMask`.
for (mask_shuffle_two, mask_shuffle_one) |in, *out| {
out.* = switch (in.unwrap()) {
.undef => .value(try pt.undefValue(elem_ty)),
.a_elem => |idx| .value(try maybe_a_val.?.elemValue(pt, idx)),
.b_elem => |idx| .elem(idx),
};
}
// Now just append our single runtime operand, and we're done.
try sema.air_extra.ensureUnusedCapacity(sema.gpa, 1);
sema.appendRefsAssumeCapacity(&.{b_coerced});
return block.addInst(.{
.tag = .shuffle_one,
.data = .{ .ty_pl = .{
.ty = Air.internedToRef(result_ty.toIntern()),
.payload = air_extra_idx,
} },
});
} else {
// The result will be comptime-known. We must convert the `ShuffleTwoMask` values to
// `InternPool.Index` values using the known operands.
for (mask_shuffle_two, mask_ip_index) |in, *out| {
const val: Value = switch (in.unwrap()) {
.undef => try pt.undefValue(elem_ty),
.a_elem => |idx| try maybe_a_val.?.elemValue(pt, idx),
.b_elem => |idx| try maybe_b_val.?.elemValue(pt, idx),
};
out.* = val.toIntern();
}
const res = try pt.intern(.{ .aggregate = .{
.ty = result_ty.toIntern(),
.storage = .{ .elems = mask_ip_index },
} });
// We have a comptime-known result, so didn't need `air_mask_buf` -- remove it from `sema.air_extra`.
assert(sema.air_extra.items.len == air_extra_idx + air_mask_buf.len);
sema.air_extra.shrinkRetainingCapacity(air_extra_idx);
return Air.internedToRef(res);
}
}
fn zirSelect(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstData) CompileError!Air.Inst.Ref {

View File

@ -1745,7 +1745,7 @@ pub fn linkerUpdateFunc(pt: Zcu.PerThread, func_index: InternPool.Index, air: *A
try air.legalize(pt, @import("../codegen.zig").legalizeFeatures(pt, nav_index) orelse break :legalize);
}
var liveness = try Air.Liveness.analyze(gpa, air.*, ip);
var liveness = try Air.Liveness.analyze(zcu, air.*, ip);
defer liveness.deinit(gpa);
if (build_options.enable_debug_extensions and comp.verbose_air) {
@ -1757,6 +1757,7 @@ pub fn linkerUpdateFunc(pt: Zcu.PerThread, func_index: InternPool.Index, air: *A
if (std.debug.runtime_safety) {
var verify: Air.Liveness.Verify = .{
.gpa = gpa,
.zcu = zcu,
.air = air.*,
.liveness = liveness,
.intern_pool = ip,

View File

@ -778,7 +778,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
.error_name => try self.airErrorName(inst),
.splat => try self.airSplat(inst),
.select => try self.airSelect(inst),
.shuffle => try self.airShuffle(inst),
.shuffle_one => try self.airShuffleOne(inst),
.shuffle_two => try self.airShuffleTwo(inst),
.reduce => try self.airReduce(inst),
.aggregate_init => try self.airAggregateInit(inst),
.union_init => try self.airUnionInit(inst),
@ -6049,11 +6050,14 @@ fn airSelect(self: *Self, inst: Air.Inst.Index) InnerError!void {
return self.finishAir(inst, result, .{ pl_op.operand, extra.lhs, extra.rhs });
}
fn airShuffle(self: *Self, inst: Air.Inst.Index) InnerError!void {
const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data;
const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airShuffle for {}", .{self.target.cpu.arch});
return self.finishAir(inst, result, .{ extra.a, extra.b, .none });
fn airShuffleOne(self: *Self, inst: Air.Inst.Index) InnerError!void {
_ = inst;
return self.fail("TODO implement airShuffleOne for {}", .{self.target.cpu.arch});
}
fn airShuffleTwo(self: *Self, inst: Air.Inst.Index) InnerError!void {
_ = inst;
return self.fail("TODO implement airShuffleTwo for {}", .{self.target.cpu.arch});
}
fn airReduce(self: *Self, inst: Air.Inst.Index) InnerError!void {

View File

@ -767,7 +767,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
.error_name => try self.airErrorName(inst),
.splat => try self.airSplat(inst),
.select => try self.airSelect(inst),
.shuffle => try self.airShuffle(inst),
.shuffle_one => try self.airShuffleOne(inst),
.shuffle_two => try self.airShuffleTwo(inst),
.reduce => try self.airReduce(inst),
.aggregate_init => try self.airAggregateInit(inst),
.union_init => try self.airUnionInit(inst),
@ -6021,10 +6022,14 @@ fn airSelect(self: *Self, inst: Air.Inst.Index) !void {
return self.finishAir(inst, result, .{ pl_op.operand, extra.lhs, extra.rhs });
}
fn airShuffle(self: *Self, inst: Air.Inst.Index) !void {
const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airShuffle for arm", .{});
return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
fn airShuffleOne(self: *Self, inst: Air.Inst.Index) !void {
_ = inst;
return self.fail("TODO implement airShuffleOne for arm", .{});
}
fn airShuffleTwo(self: *Self, inst: Air.Inst.Index) !void {
_ = inst;
return self.fail("TODO implement airShuffleTwo for arm", .{});
}
fn airReduce(self: *Self, inst: Air.Inst.Index) !void {

View File

@ -1586,7 +1586,8 @@ fn genBody(func: *Func, body: []const Air.Inst.Index) InnerError!void {
.error_name => try func.airErrorName(inst),
.splat => try func.airSplat(inst),
.select => try func.airSelect(inst),
.shuffle => try func.airShuffle(inst),
.shuffle_one => try func.airShuffleOne(inst),
.shuffle_two => try func.airShuffleTwo(inst),
.reduce => try func.airReduce(inst),
.aggregate_init => try func.airAggregateInit(inst),
.union_init => try func.airUnionInit(inst),
@ -8030,10 +8031,14 @@ fn airSelect(func: *Func, inst: Air.Inst.Index) !void {
return func.finishAir(inst, result, .{ pl_op.operand, extra.lhs, extra.rhs });
}
fn airShuffle(func: *Func, inst: Air.Inst.Index) !void {
const ty_op = func.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
const result: MCValue = if (func.liveness.isUnused(inst)) .unreach else return func.fail("TODO implement airShuffle for riscv64", .{});
return func.finishAir(inst, result, .{ ty_op.operand, .none, .none });
fn airShuffleOne(func: *Func, inst: Air.Inst.Index) !void {
_ = inst;
return func.fail("TODO implement airShuffleOne for riscv64", .{});
}
fn airShuffleTwo(func: *Func, inst: Air.Inst.Index) !void {
_ = inst;
return func.fail("TODO implement airShuffleTwo for riscv64", .{});
}
fn airReduce(func: *Func, inst: Air.Inst.Index) !void {

View File

@ -621,7 +621,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
.error_name => try self.airErrorName(inst),
.splat => try self.airSplat(inst),
.select => @panic("TODO try self.airSelect(inst)"),
.shuffle => @panic("TODO try self.airShuffle(inst)"),
.shuffle_one => @panic("TODO try self.airShuffleOne(inst)"),
.shuffle_two => @panic("TODO try self.airShuffleTwo(inst)"),
.reduce => @panic("TODO try self.airReduce(inst)"),
.aggregate_init => try self.airAggregateInit(inst),
.union_init => try self.airUnionInit(inst),

View File

@ -2004,7 +2004,8 @@ fn genInst(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
.ret_load => cg.airRetLoad(inst),
.splat => cg.airSplat(inst),
.select => cg.airSelect(inst),
.shuffle => cg.airShuffle(inst),
.shuffle_one => cg.airShuffleOne(inst),
.shuffle_two => cg.airShuffleTwo(inst),
.reduce => cg.airReduce(inst),
.aggregate_init => cg.airAggregateInit(inst),
.union_init => cg.airUnionInit(inst),
@ -5177,66 +5178,100 @@ fn airSelect(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
return cg.fail("TODO: Implement wasm airSelect", .{});
}
fn airShuffle(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
fn airShuffleOne(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
const pt = cg.pt;
const zcu = pt.zcu;
const inst_ty = cg.typeOfIndex(inst);
const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
const extra = cg.air.extraData(Air.Shuffle, ty_pl.payload).data;
const a = try cg.resolveInst(extra.a);
const b = try cg.resolveInst(extra.b);
const mask = Value.fromInterned(extra.mask);
const mask_len = extra.mask_len;
const unwrapped = cg.air.unwrapShuffleOne(zcu, inst);
const result_ty = unwrapped.result_ty;
const mask = unwrapped.mask;
const operand = try cg.resolveInst(unwrapped.operand);
const child_ty = inst_ty.childType(zcu);
const elem_size = child_ty.abiSize(zcu);
const elem_ty = result_ty.childType(zcu);
const elem_size = elem_ty.abiSize(zcu);
// TODO: One of them could be by ref; handle in loop
if (isByRef(cg.typeOf(extra.a), zcu, cg.target) or isByRef(inst_ty, zcu, cg.target)) {
const result = try cg.allocStack(inst_ty);
// TODO: this function could have an `i8x16_shuffle` fast path like `airShuffleTwo` if we were
// to lower the comptime-known operands to a non-by-ref vector value.
for (0..mask_len) |index| {
const value = (try mask.elemValue(pt, index)).toSignedInt(zcu);
// TODO: this is incorrect if either operand or the result is *not* by-ref, which is possible.
// I tried to fix it, but I couldn't make much sense of how this backend handles memory.
try cg.emitWValue(result);
const loaded = if (value >= 0)
try cg.load(a, child_ty, @as(u32, @intCast(@as(i64, @intCast(elem_size)) * value)))
else
try cg.load(b, child_ty, @as(u32, @intCast(@as(i64, @intCast(elem_size)) * ~value)));
try cg.store(.stack, loaded, child_ty, result.stack_offset.value + @as(u32, @intCast(elem_size)) * @as(u32, @intCast(index)));
const dest_alloc = try cg.allocStack(result_ty);
for (mask, 0..) |mask_elem, out_idx| {
try cg.emitWValue(dest_alloc);
const elem_val = switch (mask_elem.unwrap()) {
.elem => |idx| try cg.load(operand, elem_ty, @intCast(elem_size * idx)),
.value => |val| try cg.lowerConstant(.fromInterned(val), elem_ty),
};
try cg.store(.stack, elem_val, elem_ty, @intCast(dest_alloc.offset() + elem_size * out_idx));
}
return cg.finishAir(inst, dest_alloc, &.{unwrapped.operand});
}
return cg.finishAir(inst, result, &.{ extra.a, extra.b });
} else {
var operands = [_]u32{
@intFromEnum(std.wasm.SimdOpcode.i8x16_shuffle),
} ++ [1]u32{undefined} ** 4;
fn airShuffleTwo(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
const pt = cg.pt;
const zcu = pt.zcu;
var lanes = mem.asBytes(operands[1..]);
for (0..@as(usize, @intCast(mask_len))) |index| {
const mask_elem = (try mask.elemValue(pt, index)).toSignedInt(zcu);
const base_index = if (mask_elem >= 0)
@as(u8, @intCast(@as(i64, @intCast(elem_size)) * mask_elem))
else
16 + @as(u8, @intCast(@as(i64, @intCast(elem_size)) * ~mask_elem));
const unwrapped = cg.air.unwrapShuffleTwo(zcu, inst);
const result_ty = unwrapped.result_ty;
const mask = unwrapped.mask;
const operand_a = try cg.resolveInst(unwrapped.operand_a);
const operand_b = try cg.resolveInst(unwrapped.operand_b);
for (0..@as(usize, @intCast(elem_size))) |byte_offset| {
lanes[index * @as(usize, @intCast(elem_size)) + byte_offset] = base_index + @as(u8, @intCast(byte_offset));
const a_ty = cg.typeOf(unwrapped.operand_a);
const b_ty = cg.typeOf(unwrapped.operand_b);
const elem_ty = result_ty.childType(zcu);
const elem_size = elem_ty.abiSize(zcu);
// WASM has `i8x16_shuffle`, which we can apply if the element type bit size is a multiple of 8
// and the input and output vectors have a bit size of 128 (and are hence not by-ref). Otherwise,
// we fall back to a naive loop lowering.
if (!isByRef(a_ty, zcu, cg.target) and
!isByRef(b_ty, zcu, cg.target) and
!isByRef(result_ty, zcu, cg.target) and
elem_ty.bitSize(zcu) % 8 == 0)
{
var lane_map: [16]u8 align(4) = undefined;
const lanes_per_elem = elem_ty.bitSize(zcu) / 8;
for (mask, 0..) |mask_elem, out_idx| {
const out_first_lane = out_idx * lanes_per_elem;
const in_first_lane = switch (mask_elem.unwrap()) {
.a_elem => |i| i * lanes_per_elem,
.b_elem => |i| i * lanes_per_elem + 16,
.undef => 0, // doesn't matter
};
for (lane_map[out_first_lane..][0..lanes_per_elem], in_first_lane..) |*out, in| {
out.* = @intCast(in);
}
}
try cg.emitWValue(a);
try cg.emitWValue(b);
try cg.emitWValue(operand_a);
try cg.emitWValue(operand_b);
const extra_index = cg.extraLen();
try cg.mir_extra.appendSlice(cg.gpa, &operands);
try cg.mir_extra.appendSlice(cg.gpa, &.{
@intFromEnum(std.wasm.SimdOpcode.i8x16_shuffle),
@bitCast(lane_map[0..4].*),
@bitCast(lane_map[4..8].*),
@bitCast(lane_map[8..12].*),
@bitCast(lane_map[12..].*),
});
try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } });
return cg.finishAir(inst, .stack, &.{ extra.a, extra.b });
return cg.finishAir(inst, .stack, &.{ unwrapped.operand_a, unwrapped.operand_b });
}
// TODO: this is incorrect if either operand or the result is *not* by-ref, which is possible.
// I tried to fix it, but I couldn't make much sense of how this backend handles memory.
const dest_alloc = try cg.allocStack(result_ty);
for (mask, 0..) |mask_elem, out_idx| {
try cg.emitWValue(dest_alloc);
const elem_val = switch (mask_elem.unwrap()) {
.a_elem => |idx| try cg.load(operand_a, elem_ty, @intCast(elem_size * idx)),
.b_elem => |idx| try cg.load(operand_b, elem_ty, @intCast(elem_size * idx)),
.undef => try cg.emitUndefined(elem_ty),
};
try cg.store(.stack, elem_val, elem_ty, @intCast(dest_alloc.offset() + elem_size * out_idx));
}
return cg.finishAir(inst, dest_alloc, &.{ unwrapped.operand_a, unwrapped.operand_b });
}
fn airReduce(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {

View File

@ -2490,7 +2490,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
switch (air_tags[@intFromEnum(inst)]) {
// zig fmt: off
.select => try cg.airSelect(inst),
.shuffle => try cg.airShuffle(inst),
.shuffle_one, .shuffle_two => @panic("x86_64 TODO: shuffle_one/shuffle_two"),
// zig fmt: on
.arg => if (cg.debug_output != .none) {

View File

@ -3374,7 +3374,8 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail,
.error_name => try airErrorName(f, inst),
.splat => try airSplat(f, inst),
.select => try airSelect(f, inst),
.shuffle => try airShuffle(f, inst),
.shuffle_one => try airShuffleOne(f, inst),
.shuffle_two => try airShuffleTwo(f, inst),
.reduce => try airReduce(f, inst),
.aggregate_init => try airAggregateInit(f, inst),
.union_init => try airUnionInit(f, inst),
@ -7163,34 +7164,73 @@ fn airSelect(f: *Function, inst: Air.Inst.Index) !CValue {
return local;
}
fn airShuffle(f: *Function, inst: Air.Inst.Index) !CValue {
fn airShuffleOne(f: *Function, inst: Air.Inst.Index) !CValue {
const pt = f.object.dg.pt;
const zcu = pt.zcu;
const ty_pl = f.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
const extra = f.air.extraData(Air.Shuffle, ty_pl.payload).data;
const mask = Value.fromInterned(extra.mask);
const lhs = try f.resolveInst(extra.a);
const rhs = try f.resolveInst(extra.b);
const inst_ty = f.typeOfIndex(inst);
const unwrapped = f.air.unwrapShuffleOne(zcu, inst);
const mask = unwrapped.mask;
const operand = try f.resolveInst(unwrapped.operand);
const inst_ty = unwrapped.result_ty;
const writer = f.object.writer();
const local = try f.allocLocal(inst, inst_ty);
try reap(f, inst, &.{ extra.a, extra.b }); // local cannot alias operands
for (0..extra.mask_len) |index| {
try reap(f, inst, &.{unwrapped.operand}); // local cannot alias operand
for (mask, 0..) |mask_elem, out_idx| {
try f.writeCValue(writer, local, .Other);
try writer.writeByte('[');
try f.object.dg.renderValue(writer, try pt.intValue(.usize, index), .Other);
try f.object.dg.renderValue(writer, try pt.intValue(.usize, out_idx), .Other);
try writer.writeAll("] = ");
const mask_elem = (try mask.elemValue(pt, index)).toSignedInt(zcu);
const src_val = try pt.intValue(.usize, @as(u64, @intCast(mask_elem ^ mask_elem >> 63)));
try f.writeCValue(writer, if (mask_elem >= 0) lhs else rhs, .Other);
switch (mask_elem.unwrap()) {
.elem => |src_idx| {
try f.writeCValue(writer, operand, .Other);
try writer.writeByte('[');
try f.object.dg.renderValue(writer, src_val, .Other);
try writer.writeAll("];\n");
try f.object.dg.renderValue(writer, try pt.intValue(.usize, src_idx), .Other);
try writer.writeByte(']');
},
.value => |val| try f.object.dg.renderValue(writer, .fromInterned(val), .Other),
}
try writer.writeAll(";\n");
}
return local;
}
fn airShuffleTwo(f: *Function, inst: Air.Inst.Index) !CValue {
const pt = f.object.dg.pt;
const zcu = pt.zcu;
const unwrapped = f.air.unwrapShuffleTwo(zcu, inst);
const mask = unwrapped.mask;
const operand_a = try f.resolveInst(unwrapped.operand_a);
const operand_b = try f.resolveInst(unwrapped.operand_b);
const inst_ty = unwrapped.result_ty;
const elem_ty = inst_ty.childType(zcu);
const writer = f.object.writer();
const local = try f.allocLocal(inst, inst_ty);
try reap(f, inst, &.{ unwrapped.operand_a, unwrapped.operand_b }); // local cannot alias operands
for (mask, 0..) |mask_elem, out_idx| {
try f.writeCValue(writer, local, .Other);
try writer.writeByte('[');
try f.object.dg.renderValue(writer, try pt.intValue(.usize, out_idx), .Other);
try writer.writeAll("] = ");
switch (mask_elem.unwrap()) {
.a_elem => |src_idx| {
try f.writeCValue(writer, operand_a, .Other);
try writer.writeByte('[');
try f.object.dg.renderValue(writer, try pt.intValue(.usize, src_idx), .Other);
try writer.writeByte(']');
},
.b_elem => |src_idx| {
try f.writeCValue(writer, operand_b, .Other);
try writer.writeByte('[');
try f.object.dg.renderValue(writer, try pt.intValue(.usize, src_idx), .Other);
try writer.writeByte(']');
},
.undef => try f.object.dg.renderUndefValue(writer, elem_ty, .Other),
}
try writer.writeAll(";\n");
}
return local;

View File

@ -4969,7 +4969,8 @@ pub const FuncGen = struct {
.error_name => try self.airErrorName(inst),
.splat => try self.airSplat(inst),
.select => try self.airSelect(inst),
.shuffle => try self.airShuffle(inst),
.shuffle_one => try self.airShuffleOne(inst),
.shuffle_two => try self.airShuffleTwo(inst),
.aggregate_init => try self.airAggregateInit(inst),
.union_init => try self.airUnionInit(inst),
.prefetch => try self.airPrefetch(inst),
@ -9666,7 +9667,7 @@ pub const FuncGen = struct {
const zcu = o.pt.zcu;
const ip = &zcu.intern_pool;
for (body_tail[1..]) |body_inst| {
switch (fg.liveness.categorizeOperand(fg.air, body_inst, body_tail[0], ip)) {
switch (fg.liveness.categorizeOperand(fg.air, zcu, body_inst, body_tail[0], ip)) {
.none => continue,
.write, .noret, .complex => return false,
.tomb => return true,
@ -10421,42 +10422,192 @@ pub const FuncGen = struct {
return self.wip.select(.normal, pred, a, b, "");
}
fn airShuffle(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
const o = self.ng.object;
fn airShuffleOne(fg: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
const o = fg.ng.object;
const pt = o.pt;
const zcu = pt.zcu;
const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data;
const a = try self.resolveInst(extra.a);
const b = try self.resolveInst(extra.b);
const mask = Value.fromInterned(extra.mask);
const mask_len = extra.mask_len;
const a_len = self.typeOf(extra.a).vectorLen(zcu);
const gpa = zcu.gpa;
// LLVM uses integers larger than the length of the first array to
// index into the second array. This was deemed unnecessarily fragile
// when changing code, so Zig uses negative numbers to index the
// second vector. These start at -1 and go down, and are easiest to use
// with the ~ operator. Here we convert between the two formats.
const values = try self.gpa.alloc(Builder.Constant, mask_len);
defer self.gpa.free(values);
const unwrapped = fg.air.unwrapShuffleOne(zcu, inst);
for (values, 0..) |*val, i| {
const elem = try mask.elemValue(pt, i);
if (elem.isUndef(zcu)) {
val.* = try o.builder.undefConst(.i32);
} else {
const int = elem.toSignedInt(zcu);
const unsigned: u32 = @intCast(if (int >= 0) int else ~int + a_len);
val.* = try o.builder.intConst(.i32, unsigned);
}
const operand = try fg.resolveInst(unwrapped.operand);
const mask = unwrapped.mask;
const operand_ty = fg.typeOf(unwrapped.operand);
const llvm_operand_ty = try o.lowerType(operand_ty);
const llvm_result_ty = try o.lowerType(unwrapped.result_ty);
const llvm_elem_ty = try o.lowerType(unwrapped.result_ty.childType(zcu));
const llvm_poison_elem = try o.builder.poisonConst(llvm_elem_ty);
const llvm_poison_mask_elem = try o.builder.poisonConst(.i32);
const llvm_mask_ty = try o.builder.vectorType(.normal, @intCast(mask.len), .i32);
// LLVM requires that the two input vectors have the same length, so lowering isn't trivial.
// And, in the words of jacobly0: "llvm sucks at shuffles so we do have to hold its hand at
// least a bit". So, there are two cases here.
//
// If the operand length equals the mask length, we do just the one `shufflevector`, where
// the second operand is a constant vector with comptime-known elements at the right indices
// and poison values elsewhere (in the indices which won't be selected).
//
// Otherwise, we lower to *two* `shufflevector` instructions. The first shuffles the runtime
// operand with an all-poison vector to extract and correctly position all of the runtime
// elements. We also make a constant vector with all of the comptime elements correctly
// positioned. Then, our second instruction selects elements from those "runtime-or-poison"
// and "comptime-or-poison" vectors to compute the result.
// This buffer is used primarily for the mask constants.
const llvm_elem_buf = try gpa.alloc(Builder.Constant, mask.len);
defer gpa.free(llvm_elem_buf);
// ...but first, we'll collect all of the comptime-known values.
var any_defined_comptime_value = false;
for (mask, llvm_elem_buf) |mask_elem, *llvm_elem| {
llvm_elem.* = switch (mask_elem.unwrap()) {
.elem => llvm_poison_elem,
.value => |val| if (!Value.fromInterned(val).isUndef(zcu)) elem: {
any_defined_comptime_value = true;
break :elem try o.lowerValue(val);
} else llvm_poison_elem,
};
}
// This vector is like the result, but runtime elements are replaced with poison.
const comptime_and_poison: Builder.Value = if (any_defined_comptime_value) vec: {
break :vec try o.builder.vectorValue(llvm_result_ty, llvm_elem_buf);
} else try o.builder.poisonValue(llvm_result_ty);
const llvm_mask_value = try o.builder.vectorValue(
try o.builder.vectorType(.normal, mask_len, .i32),
values,
if (operand_ty.vectorLen(zcu) == mask.len) {
// input length equals mask/output length, so we lower to one instruction
for (mask, llvm_elem_buf, 0..) |mask_elem, *llvm_elem, elem_idx| {
llvm_elem.* = switch (mask_elem.unwrap()) {
.elem => |idx| try o.builder.intConst(.i32, idx),
.value => |val| if (!Value.fromInterned(val).isUndef(zcu)) mask_val: {
break :mask_val try o.builder.intConst(.i32, mask.len + elem_idx);
} else llvm_poison_mask_elem,
};
}
return fg.wip.shuffleVector(
operand,
comptime_and_poison,
try o.builder.vectorValue(llvm_mask_ty, llvm_elem_buf),
"",
);
}
for (mask, llvm_elem_buf) |mask_elem, *llvm_elem| {
llvm_elem.* = switch (mask_elem.unwrap()) {
.elem => |idx| try o.builder.intConst(.i32, idx),
.value => llvm_poison_mask_elem,
};
}
// This vector is like our result, but all comptime-known elements are poison.
const runtime_and_poison = try fg.wip.shuffleVector(
operand,
try o.builder.poisonValue(llvm_operand_ty),
try o.builder.vectorValue(llvm_mask_ty, llvm_elem_buf),
"",
);
if (!any_defined_comptime_value) {
// `comptime_and_poison` is just poison; a second shuffle would be a nop.
return runtime_and_poison;
}
// In this second shuffle, the inputs, the mask, and the output all have the same length.
for (mask, llvm_elem_buf, 0..) |mask_elem, *llvm_elem, elem_idx| {
llvm_elem.* = switch (mask_elem.unwrap()) {
.elem => try o.builder.intConst(.i32, elem_idx),
.value => |val| if (!Value.fromInterned(val).isUndef(zcu)) mask_val: {
break :mask_val try o.builder.intConst(.i32, mask.len + elem_idx);
} else llvm_poison_mask_elem,
};
}
// Merge the runtime and comptime elements with the mask we just built.
return fg.wip.shuffleVector(
runtime_and_poison,
comptime_and_poison,
try o.builder.vectorValue(llvm_mask_ty, llvm_elem_buf),
"",
);
}
fn airShuffleTwo(fg: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
const o = fg.ng.object;
const pt = o.pt;
const zcu = pt.zcu;
const gpa = zcu.gpa;
const unwrapped = fg.air.unwrapShuffleTwo(zcu, inst);
const mask = unwrapped.mask;
const llvm_elem_ty = try o.lowerType(unwrapped.result_ty.childType(zcu));
const llvm_mask_ty = try o.builder.vectorType(.normal, @intCast(mask.len), .i32);
const llvm_poison_mask_elem = try o.builder.poisonConst(.i32);
// This is kind of simpler than in `airShuffleOne`. We extend the shorter vector to the
// length of the longer one with an initial `shufflevector` if necessary, and then do the
// actual computation with a second `shufflevector`.
const operand_a_len = fg.typeOf(unwrapped.operand_a).vectorLen(zcu);
const operand_b_len = fg.typeOf(unwrapped.operand_b).vectorLen(zcu);
const operand_len: u32 = @max(operand_a_len, operand_b_len);
// If we need to extend an operand, this is the type that mask will have.
const llvm_operand_mask_ty = try o.builder.vectorType(.normal, operand_len, .i32);
const llvm_elem_buf = try gpa.alloc(Builder.Constant, @max(mask.len, operand_len));
defer gpa.free(llvm_elem_buf);
const operand_a: Builder.Value = extend: {
const raw = try fg.resolveInst(unwrapped.operand_a);
if (operand_a_len == operand_len) break :extend raw;
// Extend with a `shufflevector`, with a mask `<0, 1, ..., n, poison, poison, ..., poison>`
const mask_elems = llvm_elem_buf[0..operand_len];
for (mask_elems[0..operand_a_len], 0..) |*llvm_elem, elem_idx| {
llvm_elem.* = try o.builder.intConst(.i32, elem_idx);
}
@memset(mask_elems[operand_a_len..], llvm_poison_mask_elem);
const llvm_this_operand_ty = try o.builder.vectorType(.normal, operand_a_len, llvm_elem_ty);
break :extend try fg.wip.shuffleVector(
raw,
try o.builder.poisonValue(llvm_this_operand_ty),
try o.builder.vectorValue(llvm_operand_mask_ty, mask_elems),
"",
);
};
const operand_b: Builder.Value = extend: {
const raw = try fg.resolveInst(unwrapped.operand_b);
if (operand_b_len == operand_len) break :extend raw;
// Extend with a `shufflevector`, with a mask `<0, 1, ..., n, poison, poison, ..., poison>`
const mask_elems = llvm_elem_buf[0..operand_len];
for (mask_elems[0..operand_b_len], 0..) |*llvm_elem, elem_idx| {
llvm_elem.* = try o.builder.intConst(.i32, elem_idx);
}
@memset(mask_elems[operand_b_len..], llvm_poison_mask_elem);
const llvm_this_operand_ty = try o.builder.vectorType(.normal, operand_b_len, llvm_elem_ty);
break :extend try fg.wip.shuffleVector(
raw,
try o.builder.poisonValue(llvm_this_operand_ty),
try o.builder.vectorValue(llvm_operand_mask_ty, mask_elems),
"",
);
};
// `operand_a` and `operand_b` now have the same length (we've extended the shorter one with
// an initial shuffle if necessary). Now for the easy bit.
const mask_elems = llvm_elem_buf[0..mask.len];
for (mask, mask_elems) |mask_elem, *llvm_mask_elem| {
llvm_mask_elem.* = switch (mask_elem.unwrap()) {
.a_elem => |idx| try o.builder.intConst(.i32, idx),
.b_elem => |idx| try o.builder.intConst(.i32, operand_len + idx),
.undef => llvm_poison_mask_elem,
};
}
return fg.wip.shuffleVector(
operand_a,
operand_b,
try o.builder.vectorValue(llvm_mask_ty, mask_elems),
"",
);
return self.wip.shuffleVector(a, b, llvm_mask_value, "");
}
/// Reduce a vector by repeatedly applying `llvm_fn` to produce an accumulated result.

View File

@ -3252,7 +3252,8 @@ const NavGen = struct {
.splat => try self.airSplat(inst),
.reduce, .reduce_optimized => try self.airReduce(inst),
.shuffle => try self.airShuffle(inst),
.shuffle_one => try self.airShuffleOne(inst),
.shuffle_two => try self.airShuffleTwo(inst),
.ptr_add => try self.airPtrAdd(inst),
.ptr_sub => try self.airPtrSub(inst),
@ -4047,40 +4048,57 @@ const NavGen = struct {
return result_id;
}
fn airShuffle(self: *NavGen, inst: Air.Inst.Index) !?IdRef {
const pt = self.pt;
fn airShuffleOne(ng: *NavGen, inst: Air.Inst.Index) !?IdRef {
const pt = ng.pt;
const zcu = pt.zcu;
const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data;
const a = try self.resolve(extra.a);
const b = try self.resolve(extra.b);
const mask = Value.fromInterned(extra.mask);
const gpa = zcu.gpa;
// Note: number of components in the result, a, and b may differ.
const result_ty = self.typeOfIndex(inst);
const scalar_ty = result_ty.scalarType(zcu);
const scalar_ty_id = try self.resolveType(scalar_ty, .direct);
const unwrapped = ng.air.unwrapShuffleOne(zcu, inst);
const mask = unwrapped.mask;
const result_ty = unwrapped.result_ty;
const elem_ty = result_ty.childType(zcu);
const operand = try ng.resolve(unwrapped.operand);
const constituents = try self.gpa.alloc(IdRef, result_ty.vectorLen(zcu));
defer self.gpa.free(constituents);
const constituents = try gpa.alloc(IdRef, mask.len);
defer gpa.free(constituents);
for (constituents, 0..) |*id, i| {
const elem = try mask.elemValue(pt, i);
if (elem.isUndef(zcu)) {
id.* = try self.spv.constUndef(scalar_ty_id);
continue;
for (constituents, mask) |*id, mask_elem| {
id.* = switch (mask_elem.unwrap()) {
.elem => |idx| try ng.extractVectorComponent(elem_ty, operand, idx),
.value => |val| try ng.constant(elem_ty, .fromInterned(val), .direct),
};
}
const index = elem.toSignedInt(zcu);
if (index >= 0) {
id.* = try self.extractVectorComponent(scalar_ty, a, @intCast(index));
} else {
id.* = try self.extractVectorComponent(scalar_ty, b, @intCast(~index));
}
const result_ty_id = try ng.resolveType(result_ty, .direct);
return try ng.constructComposite(result_ty_id, constituents);
}
const result_ty_id = try self.resolveType(result_ty, .direct);
return try self.constructComposite(result_ty_id, constituents);
fn airShuffleTwo(ng: *NavGen, inst: Air.Inst.Index) !?IdRef {
const pt = ng.pt;
const zcu = pt.zcu;
const gpa = zcu.gpa;
const unwrapped = ng.air.unwrapShuffleTwo(zcu, inst);
const mask = unwrapped.mask;
const result_ty = unwrapped.result_ty;
const elem_ty = result_ty.childType(zcu);
const elem_ty_id = try ng.resolveType(elem_ty, .direct);
const operand_a = try ng.resolve(unwrapped.operand_a);
const operand_b = try ng.resolve(unwrapped.operand_b);
const constituents = try gpa.alloc(IdRef, mask.len);
defer gpa.free(constituents);
for (constituents, mask) |*id, mask_elem| {
id.* = switch (mask_elem.unwrap()) {
.a_elem => |idx| try ng.extractVectorComponent(elem_ty, operand_a, idx),
.b_elem => |idx| try ng.extractVectorComponent(elem_ty, operand_b, idx),
.undef => try ng.spv.constUndef(elem_ty_id),
};
}
const result_ty_id = try ng.resolveType(result_ty, .direct);
return try ng.constructComposite(result_ty_id, constituents);
}
fn indicesToIds(self: *NavGen, indices: []const u32) ![]IdRef {

View File

@ -315,7 +315,8 @@ const Writer = struct {
.wasm_memory_grow => try w.writeWasmMemoryGrow(s, inst),
.mul_add => try w.writeMulAdd(s, inst),
.select => try w.writeSelect(s, inst),
.shuffle => try w.writeShuffle(s, inst),
.shuffle_one => try w.writeShuffleOne(s, inst),
.shuffle_two => try w.writeShuffleTwo(s, inst),
.reduce, .reduce_optimized => try w.writeReduce(s, inst),
.cmp_vector, .cmp_vector_optimized => try w.writeCmpVector(s, inst),
.vector_store_elem => try w.writeVectorStoreElem(s, inst),
@ -499,14 +500,39 @@ const Writer = struct {
try w.writeOperand(s, inst, 2, pl_op.operand);
}
fn writeShuffle(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
const ty_pl = w.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
const extra = w.air.extraData(Air.Shuffle, ty_pl.payload).data;
try w.writeOperand(s, inst, 0, extra.a);
fn writeShuffleOne(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
const unwrapped = w.air.unwrapShuffleOne(w.pt.zcu, inst);
try w.writeType(s, unwrapped.result_ty);
try s.writeAll(", ");
try w.writeOperand(s, inst, 1, extra.b);
try s.print(", mask {d}, len {d}", .{ extra.mask, extra.mask_len });
try w.writeOperand(s, inst, 0, unwrapped.operand);
try s.writeAll(", [");
for (unwrapped.mask, 0..) |mask_elem, mask_idx| {
if (mask_idx > 0) try s.writeAll(", ");
switch (mask_elem.unwrap()) {
.elem => |idx| try s.print("elem {d}", .{idx}),
.value => |val| try s.print("val {}", .{Value.fromInterned(val).fmtValue(w.pt)}),
}
}
try s.writeByte(']');
}
fn writeShuffleTwo(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
const unwrapped = w.air.unwrapShuffleTwo(w.pt.zcu, inst);
try w.writeType(s, unwrapped.result_ty);
try s.writeAll(", ");
try w.writeOperand(s, inst, 0, unwrapped.operand_a);
try s.writeAll(", ");
try w.writeOperand(s, inst, 1, unwrapped.operand_b);
try s.writeAll(", [");
for (unwrapped.mask, 0..) |mask_elem, mask_idx| {
if (mask_idx > 0) try s.writeAll(", ");
switch (mask_elem.unwrap()) {
.a_elem => |idx| try s.print("a_elem {d}", .{idx}),
.b_elem => |idx| try s.print("b_elem {d}", .{idx}),
.undef => try s.writeAll("undef"),
}
}
try s.writeByte(']');
}
fn writeSelect(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {

View File

@ -1,14 +1,20 @@
export fn entry() void {
const v: @Vector(4, u32) = [4]u32{ 10, 11, 12, 13 };
const x: @Vector(4, u32) = [4]u32{ 14, 15, 16, 17 };
const z = @shuffle(u32, v, x, [8]i32{ 0, 1, 2, 3, 7, 6, 5, 4 });
_ = z;
export fn foo() void {
// Here, the bad index ('7') is not less than 'b.len', so the error shouldn't have a note suggesting a negative index.
const a: @Vector(4, u32) = .{ 10, 11, 12, 13 };
const b: @Vector(4, u32) = .{ 14, 15, 16, 17 };
_ = @shuffle(u32, a, b, [8]i32{ 0, 1, 2, 3, 7, 6, 5, 4 });
}
export fn bar() void {
// Here, the bad index ('7') *is* less than 'b.len', so the error *should* have a note suggesting a negative index.
const a: @Vector(4, u32) = .{ 10, 11, 12, 13 };
const b: @Vector(9, u32) = .{ 14, 15, 16, 17, 18, 19, 20, 21, 22 };
_ = @shuffle(u32, a, b, [8]i32{ 0, 1, 2, 3, 7, 6, 5, 4 });
}
// error
// backend=stage2
// target=native
//
// :4:41: error: mask index '4' has out-of-bounds selection
// :4:29: note: selected index '7' out of bounds of '@Vector(4, u32)'
// :4:32: note: selections from the second vector are specified with negative numbers
// :5:35: error: mask element at index '4' selects out-of-bounds index
// :5:23: note: index '7' exceeds bounds of '@Vector(4, u32)' given here
// :11:35: error: mask element at index '4' selects out-of-bounds index
// :11:23: note: index '7' exceeds bounds of '@Vector(4, u32)' given here
// :11:26: note: use '~@as(u32, 7)' to index into second vector given here