mirror of
https://github.com/ziglang/zig.git
synced 2026-01-21 14:55:25 +00:00
Legalize: implement scalarization of @shuffle
This commit is contained in:
parent
add2976a9b
commit
ec579aa0f3
@ -1246,11 +1246,7 @@ pub const Cpu = struct {
|
||||
|
||||
/// Adds the specified feature set but not its dependencies.
|
||||
pub fn addFeatureSet(set: *Set, other_set: Set) void {
|
||||
if (builtin.zig_backend == .stage2_x86_64 and builtin.object_format == .coff) {
|
||||
for (&set.ints, other_set.ints) |*set_int, other_set_int| set_int.* |= other_set_int;
|
||||
} else {
|
||||
set.ints = @as(@Vector(usize_count, usize), set.ints) | @as(@Vector(usize_count, usize), other_set.ints);
|
||||
}
|
||||
set.ints = @as(@Vector(usize_count, usize), set.ints) | @as(@Vector(usize_count, usize), other_set.ints);
|
||||
}
|
||||
|
||||
/// Removes the specified feature but not its dependents.
|
||||
@ -1262,11 +1258,7 @@ pub const Cpu = struct {
|
||||
|
||||
/// Removes the specified feature but not its dependents.
|
||||
pub fn removeFeatureSet(set: *Set, other_set: Set) void {
|
||||
if (builtin.zig_backend == .stage2_x86_64 and builtin.object_format == .coff) {
|
||||
for (&set.ints, other_set.ints) |*set_int, other_set_int| set_int.* &= ~other_set_int;
|
||||
} else {
|
||||
set.ints = @as(@Vector(usize_count, usize), set.ints) & ~@as(@Vector(usize_count, usize), other_set.ints);
|
||||
}
|
||||
set.ints = @as(@Vector(usize_count, usize), set.ints) & ~@as(@Vector(usize_count, usize), other_set.ints);
|
||||
}
|
||||
|
||||
pub fn populateDependencies(set: *Set, all_features_list: []const Cpu.Feature) void {
|
||||
@ -1295,17 +1287,10 @@ pub const Cpu = struct {
|
||||
}
|
||||
|
||||
pub fn isSuperSetOf(set: Set, other_set: Set) bool {
|
||||
if (builtin.zig_backend == .stage2_x86_64 and builtin.object_format == .coff) {
|
||||
var result = true;
|
||||
for (&set.ints, other_set.ints) |*set_int, other_set_int|
|
||||
result = result and (set_int.* & other_set_int) == other_set_int;
|
||||
return result;
|
||||
} else {
|
||||
const V = @Vector(usize_count, usize);
|
||||
const set_v: V = set.ints;
|
||||
const other_v: V = other_set.ints;
|
||||
return @reduce(.And, (set_v & other_v) == other_v);
|
||||
}
|
||||
const V = @Vector(usize_count, usize);
|
||||
const set_v: V = set.ints;
|
||||
const other_v: V = other_set.ints;
|
||||
return @reduce(.And, (set_v & other_v) == other_v);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -889,19 +889,10 @@ pub fn ArrayHashMapUnmanaged(
|
||||
self.pointer_stability.lock();
|
||||
defer self.pointer_stability.unlock();
|
||||
|
||||
if (new_capacity <= linear_scan_max) {
|
||||
try self.entries.ensureTotalCapacity(gpa, new_capacity);
|
||||
return;
|
||||
}
|
||||
|
||||
if (self.index_header) |header| {
|
||||
if (new_capacity <= header.capacity()) {
|
||||
try self.entries.ensureTotalCapacity(gpa, new_capacity);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
try self.entries.ensureTotalCapacity(gpa, new_capacity);
|
||||
if (new_capacity <= linear_scan_max) return;
|
||||
if (self.index_header) |header| if (new_capacity <= header.capacity()) return;
|
||||
|
||||
const new_bit_index = try IndexHeader.findBitIndex(new_capacity);
|
||||
const new_header = try IndexHeader.alloc(gpa, new_bit_index);
|
||||
|
||||
@ -2116,7 +2107,7 @@ const IndexHeader = struct {
|
||||
|
||||
fn findBitIndex(desired_capacity: usize) Allocator.Error!u8 {
|
||||
if (desired_capacity > max_capacity) return error.OutOfMemory;
|
||||
var new_bit_index = @as(u8, @intCast(std.math.log2_int_ceil(usize, desired_capacity)));
|
||||
var new_bit_index: u8 = @intCast(std.math.log2_int_ceil(usize, desired_capacity));
|
||||
if (desired_capacity > index_capacities[new_bit_index]) new_bit_index += 1;
|
||||
if (new_bit_index < min_bit_index) new_bit_index = min_bit_index;
|
||||
assert(desired_capacity <= index_capacities[new_bit_index]);
|
||||
|
||||
@ -499,15 +499,12 @@ fn ChaChaNonVecImpl(comptime rounds_nb: usize) type {
|
||||
fn ChaChaImpl(comptime rounds_nb: usize) type {
|
||||
switch (builtin.cpu.arch) {
|
||||
.x86_64 => {
|
||||
const has_avx2 = std.Target.x86.featureSetHas(builtin.cpu.features, .avx2);
|
||||
const has_avx512f = std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f);
|
||||
if (builtin.zig_backend != .stage2_x86_64 and has_avx512f) return ChaChaVecImpl(rounds_nb, 4);
|
||||
if (has_avx2) return ChaChaVecImpl(rounds_nb, 2);
|
||||
if (builtin.zig_backend != .stage2_x86_64 and std.Target.x86.featureSetHas(builtin.cpu.features, .avx512f)) return ChaChaVecImpl(rounds_nb, 4);
|
||||
if (std.Target.x86.featureSetHas(builtin.cpu.features, .avx2)) return ChaChaVecImpl(rounds_nb, 2);
|
||||
return ChaChaVecImpl(rounds_nb, 1);
|
||||
},
|
||||
.aarch64 => {
|
||||
const has_neon = std.Target.aarch64.featureSetHas(builtin.cpu.features, .neon);
|
||||
if (has_neon) return ChaChaVecImpl(rounds_nb, 4);
|
||||
if (builtin.zig_backend != .stage2_aarch64 and std.Target.aarch64.featureSetHas(builtin.cpu.features, .neon)) return ChaChaVecImpl(rounds_nb, 4);
|
||||
return ChaChaNonVecImpl(rounds_nb);
|
||||
},
|
||||
else => return ChaChaNonVecImpl(rounds_nb),
|
||||
|
||||
@ -780,7 +780,6 @@ fn testExpect(comptime H: type, seed: anytype, input: []const u8, expected: u64)
|
||||
}
|
||||
|
||||
test "xxhash3" {
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
|
||||
if (builtin.cpu.arch.isMIPS64() and (builtin.abi == .gnuabin32 or builtin.abi == .muslabin32)) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/23807
|
||||
|
||||
const H = XxHash3;
|
||||
@ -814,7 +813,6 @@ test "xxhash3" {
|
||||
}
|
||||
|
||||
test "xxhash3 smhasher" {
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
|
||||
if (builtin.cpu.arch.isMIPS64() and (builtin.abi == .gnuabin32 or builtin.abi == .muslabin32)) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/23807
|
||||
|
||||
const Test = struct {
|
||||
@ -828,7 +826,6 @@ test "xxhash3 smhasher" {
|
||||
}
|
||||
|
||||
test "xxhash3 iterative api" {
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
|
||||
if (builtin.cpu.arch.isMIPS64() and (builtin.abi == .gnuabin32 or builtin.abi == .muslabin32)) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/23807
|
||||
|
||||
const Test = struct {
|
||||
|
||||
@ -231,8 +231,6 @@ pub fn extract(
|
||||
}
|
||||
|
||||
test "vector patterns" {
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
|
||||
|
||||
const base = @Vector(4, u32){ 10, 20, 30, 40 };
|
||||
const other_base = @Vector(4, u32){ 55, 66, 77, 88 };
|
||||
|
||||
@ -302,8 +300,6 @@ pub fn reverseOrder(vec: anytype) @TypeOf(vec) {
|
||||
}
|
||||
|
||||
test "vector shifting" {
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
|
||||
|
||||
const base = @Vector(4, u32){ 10, 20, 30, 40 };
|
||||
|
||||
try std.testing.expectEqual([4]u32{ 30, 40, 999, 999 }, shiftElementsLeft(base, 2, 999));
|
||||
|
||||
@ -704,7 +704,7 @@ pub const Inst = struct {
|
||||
/// Uses the `ty_pl` field, where the payload index points to:
|
||||
/// 1. mask_elem: ShuffleOneMask // for each `mask_len`, which comes from `ty_pl.ty`
|
||||
/// 2. operand: Ref // guaranteed not to be an interned value
|
||||
/// See `unwrapShufleOne`.
|
||||
/// See `unwrapShuffleOne`.
|
||||
shuffle_one,
|
||||
/// Constructs a vector by selecting elements from two vectors based on a mask. Each mask
|
||||
/// element is either an index into one of the vectors, or "undef".
|
||||
@ -712,7 +712,7 @@ pub const Inst = struct {
|
||||
/// 1. mask_elem: ShuffleOneMask // for each `mask_len`, which comes from `ty_pl.ty`
|
||||
/// 2. operand_a: Ref // guaranteed not to be an interned value
|
||||
/// 3. operand_b: Ref // guaranteed not to be an interned value
|
||||
/// See `unwrapShufleTwo`.
|
||||
/// See `unwrapShuffleTwo`.
|
||||
shuffle_two,
|
||||
/// Constructs a vector element-wise from `a` or `b` based on `pred`.
|
||||
/// Uses the `pl_op` field with `pred` as operand, and payload `Bin`.
|
||||
|
||||
@ -74,6 +74,8 @@ pub const Feature = enum {
|
||||
scalarize_int_from_float,
|
||||
scalarize_int_from_float_optimized,
|
||||
scalarize_float_from_int,
|
||||
scalarize_shuffle_one,
|
||||
scalarize_shuffle_two,
|
||||
scalarize_select,
|
||||
scalarize_mul_add,
|
||||
|
||||
@ -168,7 +170,9 @@ pub const Feature = enum {
|
||||
.int_from_float => .scalarize_int_from_float,
|
||||
.int_from_float_optimized => .scalarize_int_from_float_optimized,
|
||||
.float_from_int => .scalarize_float_from_int,
|
||||
.select => .scalarize_select,
|
||||
.shuffle_one => .scalarize_shuffle_one,
|
||||
.shuffle_two => .scalarize_shuffle_two,
|
||||
.select => .scalarize_selects,
|
||||
.mul_add => .scalarize_mul_add,
|
||||
};
|
||||
}
|
||||
@ -521,11 +525,10 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
}
|
||||
},
|
||||
.splat,
|
||||
.shuffle_one,
|
||||
.shuffle_two,
|
||||
=> {},
|
||||
.select,
|
||||
=> if (l.features.contains(.scalarize_select)) continue :inst try l.scalarize(inst, .select_pl_op_bin),
|
||||
.shuffle_one => if (l.features.contains(.scalarize_shuffle_one)) continue :inst try l.scalarize(inst, .shuffle_one),
|
||||
.shuffle_two => if (l.features.contains(.scalarize_shuffle_two)) continue :inst try l.scalarize(inst, .shuffle_two),
|
||||
.select => if (l.features.contains(.scalarize_select)) continue :inst try l.scalarize(inst, .select),
|
||||
.memset,
|
||||
.memset_safe,
|
||||
.memcpy,
|
||||
@ -573,25 +576,26 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
}
|
||||
}
|
||||
|
||||
const ScalarizeDataTag = enum { un_op, ty_op, bin_op, ty_pl_vector_cmp, pl_op_bin, select_pl_op_bin };
|
||||
inline fn scalarize(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_tag: ScalarizeDataTag) Error!Air.Inst.Tag {
|
||||
return l.replaceInst(orig_inst, .block, try l.scalarizeBlockPayload(orig_inst, data_tag));
|
||||
const ScalarizeForm = enum { un_op, ty_op, bin_op, ty_pl_vector_cmp, pl_op_bin, shuffle_one, shuffle_two, select };
|
||||
inline fn scalarize(l: *Legalize, orig_inst: Air.Inst.Index, comptime form: ScalarizeForm) Error!Air.Inst.Tag {
|
||||
return l.replaceInst(orig_inst, .block, try l.scalarizeBlockPayload(orig_inst, form));
|
||||
}
|
||||
fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_tag: ScalarizeDataTag) Error!Air.Inst.Data {
|
||||
fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime form: ScalarizeForm) Error!Air.Inst.Data {
|
||||
const pt = l.pt;
|
||||
const zcu = pt.zcu;
|
||||
|
||||
const orig = l.air_instructions.get(@intFromEnum(orig_inst));
|
||||
const res_ty = l.typeOfIndex(orig_inst);
|
||||
const res_len = res_ty.vectorLen(zcu);
|
||||
|
||||
var inst_buf: [
|
||||
5 + switch (data_tag) {
|
||||
.un_op, .ty_op => 1,
|
||||
.bin_op, .ty_pl_vector_cmp => 2,
|
||||
.pl_op_bin => 3,
|
||||
.select_pl_op_bin => 6,
|
||||
} + 9
|
||||
]Air.Inst.Index = undefined;
|
||||
const extra_insts = switch (form) {
|
||||
.un_op, .ty_op => 1,
|
||||
.bin_op, .ty_pl_vector_cmp => 2,
|
||||
.pl_op_bin => 3,
|
||||
.shuffle_one, .shuffle_two => 13,
|
||||
.select => 6,
|
||||
};
|
||||
var inst_buf: [5 + extra_insts + 9]Air.Inst.Index = undefined;
|
||||
try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
|
||||
|
||||
var res_block: Block = .init(&inst_buf);
|
||||
@ -628,7 +632,7 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_
|
||||
.vector_ptr = res_alloc_inst.toRef(),
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = cur_index_inst.toRef(),
|
||||
.rhs = res_elem: switch (data_tag) {
|
||||
.rhs = res_elem: switch (form) {
|
||||
.un_op => loop.block.add(l, .{
|
||||
.tag = orig.tag,
|
||||
.data = .{ .un_op = loop.block.add(l, .{
|
||||
@ -638,7 +642,7 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef() },
|
||||
}),
|
||||
}).toRef(),
|
||||
.ty_op => loop.block.add(l, .{
|
||||
.tag = orig.tag,
|
||||
.data = .{ .ty_op = .{
|
||||
@ -651,7 +655,7 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
}),
|
||||
}).toRef(),
|
||||
.bin_op => loop.block.add(l, .{
|
||||
.tag = orig.tag,
|
||||
.data = .{ .bin_op = .{
|
||||
@ -670,10 +674,10 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
}),
|
||||
}).toRef(),
|
||||
.ty_pl_vector_cmp => {
|
||||
const extra = l.extraData(Air.VectorCmp, orig.data.ty_pl.payload).data;
|
||||
break :res_elem try loop.block.addCmp(
|
||||
break :res_elem (try loop.block.addCmp(
|
||||
l,
|
||||
extra.compareOperator(),
|
||||
loop.block.add(l, .{
|
||||
@ -695,7 +699,7 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_
|
||||
.cmp_vector => false,
|
||||
.cmp_vector_optimized => true,
|
||||
} },
|
||||
);
|
||||
)).toRef();
|
||||
},
|
||||
.pl_op_bin => {
|
||||
const extra = l.extraData(Air.Bin, orig.data.pl_op.payload).data;
|
||||
@ -726,12 +730,223 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
}).toRef();
|
||||
},
|
||||
.select_pl_op_bin => {
|
||||
.shuffle_one, .shuffle_two => {
|
||||
const ip = &zcu.intern_pool;
|
||||
const unwrapped = switch (form) {
|
||||
else => comptime unreachable,
|
||||
.shuffle_one => l.getTmpAir().unwrapShuffleOne(zcu, orig_inst),
|
||||
.shuffle_two => l.getTmpAir().unwrapShuffleTwo(zcu, orig_inst),
|
||||
};
|
||||
const operand_a = switch (form) {
|
||||
else => comptime unreachable,
|
||||
.shuffle_one => unwrapped.operand,
|
||||
.shuffle_two => unwrapped.operand_a,
|
||||
};
|
||||
const operand_a_len = l.typeOf(operand_a).vectorLen(zcu);
|
||||
const elem_ty = unwrapped.result_ty.scalarType(zcu);
|
||||
var res_elem: Result = .init(l, elem_ty, &loop.block);
|
||||
res_elem.block = .init(loop.block.stealCapacity(extra_insts));
|
||||
{
|
||||
const ExpectedContents = extern struct {
|
||||
mask_elems: [128]InternPool.Index,
|
||||
ct_elems: switch (form) {
|
||||
else => unreachable,
|
||||
.shuffle_one => extern struct {
|
||||
keys: [152]InternPool.Index,
|
||||
header: u8 align(@alignOf(u32)),
|
||||
index: [256][2]u8,
|
||||
},
|
||||
.shuffle_two => void,
|
||||
},
|
||||
};
|
||||
var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) =
|
||||
std.heap.stackFallback(@sizeOf(ExpectedContents), zcu.gpa);
|
||||
const gpa = stack.get();
|
||||
|
||||
const mask_elems = try gpa.alloc(InternPool.Index, res_len);
|
||||
defer gpa.free(mask_elems);
|
||||
|
||||
var ct_elems: switch (form) {
|
||||
else => unreachable,
|
||||
.shuffle_one => std.AutoArrayHashMapUnmanaged(InternPool.Index, void),
|
||||
.shuffle_two => struct {
|
||||
const empty: @This() = .{};
|
||||
inline fn deinit(_: @This(), _: std.mem.Allocator) void {}
|
||||
inline fn ensureTotalCapacity(_: @This(), _: std.mem.Allocator, _: usize) error{}!void {}
|
||||
},
|
||||
} = .empty;
|
||||
defer ct_elems.deinit(gpa);
|
||||
try ct_elems.ensureTotalCapacity(gpa, res_len);
|
||||
|
||||
const mask_elem_ty = try pt.intType(.signed, 1 + Type.smallestUnsignedBits(@max(operand_a_len, switch (form) {
|
||||
else => comptime unreachable,
|
||||
.shuffle_one => res_len,
|
||||
.shuffle_two => l.typeOf(unwrapped.operand_b).vectorLen(zcu),
|
||||
})));
|
||||
for (mask_elems, unwrapped.mask) |*mask_elem_val, mask_elem| mask_elem_val.* = (try pt.intValue(mask_elem_ty, switch (form) {
|
||||
else => comptime unreachable,
|
||||
.shuffle_one => switch (mask_elem.unwrap()) {
|
||||
.elem => |index| index,
|
||||
.value => |elem_val| if (ip.isUndef(elem_val))
|
||||
operand_a_len
|
||||
else
|
||||
~@as(i33, @intCast((ct_elems.getOrPutAssumeCapacity(elem_val)).index)),
|
||||
},
|
||||
.shuffle_two => switch (mask_elem.unwrap()) {
|
||||
.a_elem => |a_index| a_index,
|
||||
.b_elem => |b_index| ~@as(i33, b_index),
|
||||
.undef => operand_a_len,
|
||||
},
|
||||
})).toIntern();
|
||||
const mask_ty = try pt.arrayType(.{
|
||||
.len = res_len,
|
||||
.child = mask_elem_ty.toIntern(),
|
||||
});
|
||||
const mask_elem_inst = res_elem.block.add(l, .{
|
||||
.tag = .ptr_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = Air.internedToRef(try pt.intern(.{ .ptr = .{
|
||||
.ty = (try pt.manyConstPtrType(mask_elem_ty)).toIntern(),
|
||||
.base_addr = .{ .uav = .{
|
||||
.val = try pt.intern(.{ .aggregate = .{
|
||||
.ty = mask_ty.toIntern(),
|
||||
.storage = .{ .elems = mask_elems },
|
||||
} }),
|
||||
.orig_ty = (try pt.singleConstPtrType(mask_ty)).toIntern(),
|
||||
} },
|
||||
.byte_offset = 0,
|
||||
} })),
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
});
|
||||
var def_cond_br: CondBr = .init(l, (try res_elem.block.addCmp(
|
||||
l,
|
||||
.lt,
|
||||
mask_elem_inst.toRef(),
|
||||
try pt.intRef(mask_elem_ty, operand_a_len),
|
||||
.{},
|
||||
)).toRef(), &res_elem.block, .{});
|
||||
def_cond_br.then_block = .init(res_elem.block.stealRemainingCapacity());
|
||||
{
|
||||
const operand_b_used = switch (form) {
|
||||
else => comptime unreachable,
|
||||
.shuffle_one => ct_elems.count() > 0,
|
||||
.shuffle_two => true,
|
||||
};
|
||||
var operand_cond_br: CondBr = undefined;
|
||||
operand_cond_br.then_block = if (operand_b_used) then_block: {
|
||||
operand_cond_br = .init(l, (try def_cond_br.then_block.addCmp(
|
||||
l,
|
||||
.gte,
|
||||
mask_elem_inst.toRef(),
|
||||
try pt.intRef(mask_elem_ty, 0),
|
||||
.{},
|
||||
)).toRef(), &def_cond_br.then_block, .{});
|
||||
break :then_block .init(def_cond_br.then_block.stealRemainingCapacity());
|
||||
} else def_cond_br.then_block;
|
||||
_ = operand_cond_br.then_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = res_elem.inst,
|
||||
.operand = operand_cond_br.then_block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = operand_a,
|
||||
.rhs = operand_cond_br.then_block.add(l, .{
|
||||
.tag = .intcast,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = .usize_type,
|
||||
.operand = mask_elem_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
if (operand_b_used) {
|
||||
operand_cond_br.else_block = .init(operand_cond_br.then_block.stealRemainingCapacity());
|
||||
_ = operand_cond_br.else_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = res_elem.inst,
|
||||
.operand = if (switch (form) {
|
||||
else => comptime unreachable,
|
||||
.shuffle_one => ct_elems.count() > 1,
|
||||
.shuffle_two => true,
|
||||
}) operand_cond_br.else_block.add(l, .{
|
||||
.tag = switch (form) {
|
||||
else => comptime unreachable,
|
||||
.shuffle_one => .ptr_elem_val,
|
||||
.shuffle_two => .array_elem_val,
|
||||
},
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = operand_b: switch (form) {
|
||||
else => comptime unreachable,
|
||||
.shuffle_one => {
|
||||
const ct_elems_ty = try pt.arrayType(.{
|
||||
.len = ct_elems.count(),
|
||||
.child = elem_ty.toIntern(),
|
||||
});
|
||||
break :operand_b Air.internedToRef(try pt.intern(.{ .ptr = .{
|
||||
.ty = (try pt.manyConstPtrType(elem_ty)).toIntern(),
|
||||
.base_addr = .{ .uav = .{
|
||||
.val = try pt.intern(.{ .aggregate = .{
|
||||
.ty = ct_elems_ty.toIntern(),
|
||||
.storage = .{ .elems = ct_elems.keys() },
|
||||
} }),
|
||||
.orig_ty = (try pt.singleConstPtrType(ct_elems_ty)).toIntern(),
|
||||
} },
|
||||
.byte_offset = 0,
|
||||
} }));
|
||||
},
|
||||
.shuffle_two => unwrapped.operand_b,
|
||||
},
|
||||
.rhs = operand_cond_br.else_block.add(l, .{
|
||||
.tag = .intcast,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = .usize_type,
|
||||
.operand = operand_cond_br.else_block.add(l, .{
|
||||
.tag = .not,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = Air.internedToRef(mask_elem_ty.toIntern()),
|
||||
.operand = mask_elem_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
}).toRef() else res_elem_br: {
|
||||
_ = operand_cond_br.else_block.stealCapacity(3);
|
||||
break :res_elem_br Air.internedToRef(ct_elems.keys()[0]);
|
||||
},
|
||||
} },
|
||||
});
|
||||
def_cond_br.else_block = .init(operand_cond_br.else_block.stealRemainingCapacity());
|
||||
try operand_cond_br.finish(l);
|
||||
} else {
|
||||
def_cond_br.then_block = operand_cond_br.then_block;
|
||||
_ = def_cond_br.then_block.stealCapacity(6);
|
||||
def_cond_br.else_block = .init(def_cond_br.then_block.stealRemainingCapacity());
|
||||
}
|
||||
}
|
||||
_ = def_cond_br.else_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = res_elem.inst,
|
||||
.operand = try pt.undefRef(elem_ty),
|
||||
} },
|
||||
});
|
||||
try def_cond_br.finish(l);
|
||||
}
|
||||
try res_elem.finish(l);
|
||||
break :res_elem res_elem.inst.toRef();
|
||||
},
|
||||
.select => {
|
||||
const extra = l.extraData(Air.Bin, orig.data.pl_op.payload).data;
|
||||
var res_elem: Result = .init(l, l.typeOf(extra.lhs).scalarType(zcu), &loop.block);
|
||||
res_elem.block = .init(loop.block.stealCapacity(6));
|
||||
res_elem.block = .init(loop.block.stealCapacity(extra_insts));
|
||||
{
|
||||
var select_cond_br: CondBr = .init(l, res_elem.block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
@ -741,43 +956,39 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_
|
||||
} },
|
||||
}).toRef(), &res_elem.block, .{});
|
||||
select_cond_br.then_block = .init(res_elem.block.stealRemainingCapacity());
|
||||
{
|
||||
_ = select_cond_br.then_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = res_elem.inst,
|
||||
.operand = select_cond_br.then_block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = extra.lhs,
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
}
|
||||
_ = select_cond_br.then_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = res_elem.inst,
|
||||
.operand = select_cond_br.then_block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = extra.lhs,
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
select_cond_br.else_block = .init(select_cond_br.then_block.stealRemainingCapacity());
|
||||
{
|
||||
_ = select_cond_br.else_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = res_elem.inst,
|
||||
.operand = select_cond_br.else_block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = extra.rhs,
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
}
|
||||
_ = select_cond_br.else_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = res_elem.inst,
|
||||
.operand = select_cond_br.else_block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = extra.rhs,
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
try select_cond_br.finish(l);
|
||||
}
|
||||
try res_elem.finish(l);
|
||||
break :res_elem res_elem.inst;
|
||||
break :res_elem res_elem.inst.toRef();
|
||||
},
|
||||
}.toRef(),
|
||||
},
|
||||
}),
|
||||
} },
|
||||
});
|
||||
@ -786,7 +997,7 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_
|
||||
l,
|
||||
.lt,
|
||||
cur_index_inst.toRef(),
|
||||
try pt.intRef(.usize, res_ty.vectorLen(zcu) - 1),
|
||||
try pt.intRef(.usize, res_len - 1),
|
||||
.{},
|
||||
)).toRef(), &loop.block, .{});
|
||||
loop_cond_br.then_block = .init(loop.block.stealRemainingCapacity());
|
||||
@ -810,21 +1021,19 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime data_
|
||||
});
|
||||
}
|
||||
loop_cond_br.else_block = .init(loop_cond_br.then_block.stealRemainingCapacity());
|
||||
{
|
||||
_ = loop_cond_br.else_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = orig_inst,
|
||||
.operand = loop_cond_br.else_block.add(l, .{
|
||||
.tag = .load,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = Air.internedToRef(res_ty.toIntern()),
|
||||
.operand = res_alloc_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
}
|
||||
_ = loop_cond_br.else_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = orig_inst,
|
||||
.operand = loop_cond_br.else_block.add(l, .{
|
||||
.tag = .load,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = Air.internedToRef(res_ty.toIntern()),
|
||||
.operand = res_alloc_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
try loop_cond_br.finish(l);
|
||||
}
|
||||
try loop.finish(l);
|
||||
@ -1337,6 +1546,7 @@ inline fn replaceInst(l: *Legalize, inst: Air.Inst.Index, tag: Air.Inst.Tag, dat
|
||||
const Air = @import("../Air.zig");
|
||||
const assert = std.debug.assert;
|
||||
const dev = @import("../dev.zig");
|
||||
const InternPool = @import("../InternPool.zig");
|
||||
const Legalize = @This();
|
||||
const std = @import("std");
|
||||
const Type = @import("../Type.zig");
|
||||
|
||||
@ -5195,6 +5195,8 @@ fn airShuffleOne(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
|
||||
|
||||
// TODO: this is incorrect if either operand or the result is *not* by-ref, which is possible.
|
||||
// I tried to fix it, but I couldn't make much sense of how this backend handles memory.
|
||||
if (!isByRef(result_ty, zcu, cg.target) or
|
||||
!isByRef(cg.typeOf(unwrapped.operand), zcu, cg.target)) return cg.fail("TODO: handle mixed by-ref shuffle", .{});
|
||||
|
||||
const dest_alloc = try cg.allocStack(result_ty);
|
||||
for (mask, 0..) |mask_elem, out_idx| {
|
||||
@ -5232,7 +5234,7 @@ fn airShuffleTwo(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
|
||||
elem_ty.bitSize(zcu) % 8 == 0)
|
||||
{
|
||||
var lane_map: [16]u8 align(4) = undefined;
|
||||
const lanes_per_elem = elem_ty.bitSize(zcu) / 8;
|
||||
const lanes_per_elem: usize = @intCast(elem_ty.bitSize(zcu) / 8);
|
||||
for (mask, 0..) |mask_elem, out_idx| {
|
||||
const out_first_lane = out_idx * lanes_per_elem;
|
||||
const in_first_lane = switch (mask_elem.unwrap()) {
|
||||
@ -5260,6 +5262,9 @@ fn airShuffleTwo(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
|
||||
|
||||
// TODO: this is incorrect if either operand or the result is *not* by-ref, which is possible.
|
||||
// I tried to fix it, but I couldn't make much sense of how this backend handles memory.
|
||||
if (!isByRef(result_ty, zcu, cg.target) or
|
||||
!isByRef(a_ty, zcu, cg.target) or
|
||||
!isByRef(b_ty, zcu, cg.target)) return cg.fail("TODO: handle mixed by-ref shuffle", .{});
|
||||
|
||||
const dest_alloc = try cg.allocStack(result_ty);
|
||||
for (mask, 0..) |mask_elem, out_idx| {
|
||||
|
||||
@ -53,11 +53,14 @@ pub fn legalizeFeatures(target: *const std.Target) *const Air.Legalize.Features
|
||||
.scalarize_div_exact_optimized = use_old,
|
||||
.scalarize_max = use_old,
|
||||
.scalarize_min = use_old,
|
||||
.scalarize_bit_and = use_old,
|
||||
.scalarize_bit_or = use_old,
|
||||
.scalarize_shr = true,
|
||||
.scalarize_shr_exact = true,
|
||||
.scalarize_shl = true,
|
||||
.scalarize_shl_exact = true,
|
||||
.scalarize_shl_sat = true,
|
||||
.scalarize_xor = use_old,
|
||||
.scalarize_not = use_old,
|
||||
.scalarize_clz = use_old,
|
||||
.scalarize_ctz = true,
|
||||
@ -84,6 +87,8 @@ pub fn legalizeFeatures(target: *const std.Target) *const Air.Legalize.Features
|
||||
.scalarize_int_from_float = use_old,
|
||||
.scalarize_int_from_float_optimized = use_old,
|
||||
.scalarize_float_from_int = use_old,
|
||||
.scalarize_shuffle_one = true,
|
||||
.scalarize_shuffle_two = true,
|
||||
.scalarize_select = true,
|
||||
.scalarize_mul_add = use_old,
|
||||
|
||||
@ -2299,11 +2304,17 @@ fn gen(self: *CodeGen) InnerError!void {
|
||||
try self.genBody(self.air.getMainBody());
|
||||
|
||||
const epilogue = if (self.epilogue_relocs.items.len > 0) epilogue: {
|
||||
const epilogue_relocs_last_index = self.epilogue_relocs.items.len - 1;
|
||||
for (if (self.epilogue_relocs.items[epilogue_relocs_last_index] == self.mir_instructions.len - 1) epilogue_relocs: {
|
||||
_ = self.mir_instructions.pop();
|
||||
break :epilogue_relocs self.epilogue_relocs.items[0..epilogue_relocs_last_index];
|
||||
} else self.epilogue_relocs.items) |epilogue_reloc| self.performReloc(epilogue_reloc);
|
||||
var last_inst: Mir.Inst.Index = @intCast(self.mir_instructions.len - 1);
|
||||
while (self.epilogue_relocs.getLastOrNull() == last_inst) {
|
||||
self.epilogue_relocs.items.len -= 1;
|
||||
self.mir_instructions.set(last_inst, .{
|
||||
.tag = .pseudo,
|
||||
.ops = .pseudo_dead_none,
|
||||
.data = undefined,
|
||||
});
|
||||
last_inst -= 1;
|
||||
}
|
||||
for (self.epilogue_relocs.items) |epilogue_reloc| self.performReloc(epilogue_reloc);
|
||||
|
||||
if (self.debug_output != .none) try self.asmPseudo(.pseudo_dbg_epilogue_begin_none);
|
||||
const backpatch_stack_dealloc = try self.asmPlaceholder();
|
||||
@ -174143,17 +174154,23 @@ fn lowerBlock(self: *CodeGen, inst: Air.Inst.Index, body: []const Air.Inst.Index
|
||||
var block_data = self.blocks.fetchRemove(inst).?;
|
||||
defer block_data.value.deinit(self.gpa);
|
||||
if (block_data.value.relocs.items.len > 0) {
|
||||
var last_inst: Mir.Inst.Index = @intCast(self.mir_instructions.len - 1);
|
||||
while (block_data.value.relocs.getLastOrNull() == last_inst) {
|
||||
block_data.value.relocs.items.len -= 1;
|
||||
self.mir_instructions.set(last_inst, .{
|
||||
.tag = .pseudo,
|
||||
.ops = .pseudo_dead_none,
|
||||
.data = undefined,
|
||||
});
|
||||
last_inst -= 1;
|
||||
}
|
||||
for (block_data.value.relocs.items) |block_reloc| self.performReloc(block_reloc);
|
||||
try self.restoreState(block_data.value.state, liveness.deaths, .{
|
||||
.emit_instructions = false,
|
||||
.update_tracking = true,
|
||||
.resurrect = true,
|
||||
.close_scope = true,
|
||||
});
|
||||
const block_relocs_last_index = block_data.value.relocs.items.len - 1;
|
||||
for (if (block_data.value.relocs.items[block_relocs_last_index] == self.mir_instructions.len - 1) block_relocs: {
|
||||
_ = self.mir_instructions.pop();
|
||||
break :block_relocs block_data.value.relocs.items[0..block_relocs_last_index];
|
||||
} else block_data.value.relocs.items) |block_reloc| self.performReloc(block_reloc);
|
||||
}
|
||||
|
||||
if (std.debug.runtime_safety) assert(self.inst_tracking.getIndex(inst).? == inst_tracking_i);
|
||||
|
||||
@ -10,8 +10,6 @@ test "@shuffle int" {
|
||||
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
|
||||
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
|
||||
if (builtin.zig_backend == .stage2_x86_64 and
|
||||
!comptime std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3)) return error.SkipZigTest;
|
||||
|
||||
const S = struct {
|
||||
fn doTheTest() !void {
|
||||
@ -53,7 +51,6 @@ test "@shuffle int" {
|
||||
|
||||
test "@shuffle int strange sizes" {
|
||||
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
|
||||
@ -136,7 +133,6 @@ fn testShuffle(
|
||||
|
||||
test "@shuffle bool 1" {
|
||||
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
|
||||
@ -160,7 +156,6 @@ test "@shuffle bool 1" {
|
||||
|
||||
test "@shuffle bool 2" {
|
||||
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
|
||||
|
||||
@ -906,8 +906,6 @@ test "mask parameter of @shuffle is comptime scope" {
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
|
||||
if (builtin.zig_backend == .stage2_x86_64 and
|
||||
!comptime std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3)) return error.SkipZigTest;
|
||||
|
||||
const __v4hi = @Vector(4, i16);
|
||||
var v4_a = __v4hi{ 1, 2, 3, 4 };
|
||||
@ -1357,7 +1355,6 @@ test "array operands to shuffle are coerced to vectors" {
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
|
||||
if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
|
||||
|
||||
const mask = [5]i32{ -1, 0, 1, 2, 3 };
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user