Legalize: implement scalarization of unary operations

This commit is contained in:
Jacob Young 2025-05-29 06:11:28 -04:00
parent c907866d55
commit c1e9ef9eaa
8 changed files with 642 additions and 154 deletions

View File

@ -1,21 +1,48 @@
zcu: *const Zcu,
air: Air,
features: std.enums.EnumSet(Feature),
pt: Zcu.PerThread,
air_instructions: std.MultiArrayList(Air.Inst),
air_extra: std.ArrayListUnmanaged(u32),
features: *const Features,
pub const Feature = enum {
scalarize_not,
scalarize_clz,
scalarize_ctz,
scalarize_popcount,
scalarize_byte_swap,
scalarize_bit_reverse,
scalarize_sqrt,
scalarize_sin,
scalarize_cos,
scalarize_tan,
scalarize_exp,
scalarize_exp2,
scalarize_log,
scalarize_log2,
scalarize_log10,
scalarize_abs,
scalarize_floor,
scalarize_ceil,
scalarize_round,
scalarize_trunc_float,
scalarize_neg,
scalarize_neg_optimized,
/// Legalize (shift lhs, (splat rhs)) -> (shift lhs, rhs)
remove_shift_vector_rhs_splat,
/// Legalize reduce of a one element vector to a bitcast
reduce_one_elem_to_bitcast,
};
pub const Features = std.enums.EnumFieldStruct(Feature, bool, false);
pub const Features = std.enums.EnumSet(Feature);
pub fn legalize(air: *Air, backend: std.builtin.CompilerBackend, zcu: *const Zcu) std.mem.Allocator.Error!void {
pub const Error = std.mem.Allocator.Error;
pub fn legalize(air: *Air, backend: std.builtin.CompilerBackend, pt: Zcu.PerThread) Error!void {
var l: Legalize = .{
.zcu = zcu,
.air = air.*,
.features = features: switch (backend) {
.pt = pt,
.air_instructions = air.instructions.toMultiArrayList(),
.air_extra = air.extra,
.features = &features: switch (backend) {
.other, .stage1 => unreachable,
inline .stage2_llvm,
.stage2_c,
@ -30,34 +57,85 @@ pub fn legalize(air: *Air, backend: std.builtin.CompilerBackend, zcu: *const Zcu
.stage2_powerpc,
=> |ct_backend| {
const Backend = codegen.importBackend(ct_backend) orelse break :features .initEmpty();
break :features if (@hasDecl(Backend, "legalize_features"))
.init(Backend.legalize_features)
else
.initEmpty();
break :features if (@hasDecl(Backend, "legalize_features")) Backend.legalize_features else .initEmpty();
},
_ => unreachable,
},
};
defer air.* = l.air;
if (!l.features.bits.eql(.initEmpty())) try l.legalizeBody(l.air.getMainBody());
if (l.features.bits.eql(.initEmpty())) return;
defer air.* = l.getTmpAir();
const main_extra = l.extraData(Air.Block, l.air_extra.items[@intFromEnum(Air.ExtraIndex.main_block)]);
try l.legalizeBody(main_extra.end, main_extra.data.body_len);
}
fn legalizeBody(l: *Legalize, body: []const Air.Inst.Index) std.mem.Allocator.Error!void {
const zcu = l.zcu;
fn getTmpAir(l: *const Legalize) Air {
return .{
.instructions = l.air_instructions.slice(),
.extra = l.air_extra,
};
}
fn typeOf(l: *const Legalize, ref: Air.Inst.Ref) Type {
return l.getTmpAir().typeOf(ref, &l.pt.zcu.intern_pool);
}
fn typeOfIndex(l: *const Legalize, inst: Air.Inst.Index) Type {
return l.getTmpAir().typeOfIndex(inst, &l.pt.zcu.intern_pool);
}
fn extraData(l: *const Legalize, comptime T: type, index: usize) @TypeOf(Air.extraData(undefined, T, undefined)) {
return l.getTmpAir().extraData(T, index);
}
fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
const zcu = l.pt.zcu;
const ip = &zcu.intern_pool;
const tags = l.air.instructions.items(.tag);
const data = l.air.instructions.items(.data);
for (body) |inst| inst: switch (tags[@intFromEnum(inst)]) {
for (body_start..body_start + body_len) |inst_extra_index| {
const inst: Air.Inst.Index = @enumFromInt(l.air_extra.items[inst_extra_index]);
inst: switch (l.air_instructions.items(.tag)[@intFromEnum(inst)]) {
else => {},
inline .not,
.clz,
.ctz,
.popcount,
.byte_swap,
.bit_reverse,
.abs,
=> |air_tag| if (l.features.contains(@field(Feature, "scalarize_" ++ @tagName(air_tag)))) done: {
const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
if (!ty_op.ty.toType().isVector(zcu)) break :done;
continue :inst try l.scalarizeUnary(inst, .ty_op, ty_op.operand);
},
inline .sqrt,
.sin,
.cos,
.tan,
.exp,
.exp2,
.log,
.log2,
.log10,
.floor,
.ceil,
.round,
.trunc_float,
.neg,
.neg_optimized,
=> |air_tag| if (l.features.contains(@field(Feature, "scalarize_" ++ @tagName(air_tag)))) done: {
const un_op = l.air_instructions.items(.data)[@intFromEnum(inst)].un_op;
if (!l.typeOf(un_op).isVector(zcu)) break :done;
continue :inst try l.scalarizeUnary(inst, .un_op, un_op);
},
.shl,
.shl_exact,
.shl_sat,
.shr,
.shr_exact,
=> |air_tag| if (l.features.contains(.remove_shift_vector_rhs_splat)) done: {
const bin_op = data[@intFromEnum(inst)].bin_op;
const ty = l.air.typeOf(bin_op.rhs, ip);
const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
const ty = l.typeOf(bin_op.rhs);
if (!ty.isVector(zcu)) break :done;
if (bin_op.rhs.toInterned()) |rhs_ip_index| switch (ip.indexToKey(rhs_ip_index)) {
else => {},
@ -70,11 +148,11 @@ fn legalizeBody(l: *Legalize, body: []const Air.Inst.Index) std.mem.Allocator.Er
},
} else {
const rhs_inst = bin_op.rhs.toIndex().?;
switch (tags[@intFromEnum(rhs_inst)]) {
switch (l.air_instructions.items(.tag)[@intFromEnum(rhs_inst)]) {
else => {},
.splat => continue :inst l.replaceInst(inst, air_tag, .{ .bin_op = .{
.lhs = bin_op.lhs,
.rhs = data[@intFromEnum(rhs_inst)].ty_op.operand,
.rhs = l.air_instructions.items(.data)[@intFromEnum(rhs_inst)].ty_op.operand,
} }),
}
}
@ -83,8 +161,8 @@ fn legalizeBody(l: *Legalize, body: []const Air.Inst.Index) std.mem.Allocator.Er
.reduce,
.reduce_optimized,
=> if (l.features.contains(.reduce_one_elem_to_bitcast)) done: {
const reduce = data[@intFromEnum(inst)].reduce;
const vector_ty = l.air.typeOf(reduce.operand, ip);
const reduce = l.air_instructions.items(.data)[@intFromEnum(inst)].reduce;
const vector_ty = l.typeOf(reduce.operand);
switch (vector_ty.vectorLen(zcu)) {
0 => unreachable,
1 => continue :inst l.replaceInst(inst, .bitcast, .{ .ty_op = .{
@ -96,52 +174,248 @@ fn legalizeBody(l: *Legalize, body: []const Air.Inst.Index) std.mem.Allocator.Er
},
.@"try", .try_cold => {
const pl_op = data[@intFromEnum(inst)].pl_op;
const extra = l.air.extraData(Air.Try, pl_op.payload);
try l.legalizeBody(@ptrCast(l.air.extra.items[extra.end..][0..extra.data.body_len]));
const pl_op = l.air_instructions.items(.data)[@intFromEnum(inst)].pl_op;
const extra = l.extraData(Air.Try, pl_op.payload);
try l.legalizeBody(extra.end, extra.data.body_len);
},
.try_ptr, .try_ptr_cold => {
const ty_pl = data[@intFromEnum(inst)].ty_pl;
const extra = l.air.extraData(Air.TryPtr, ty_pl.payload);
try l.legalizeBody(@ptrCast(l.air.extra.items[extra.end..][0..extra.data.body_len]));
const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl;
const extra = l.extraData(Air.TryPtr, ty_pl.payload);
try l.legalizeBody(extra.end, extra.data.body_len);
},
.block, .loop => {
const ty_pl = data[@intFromEnum(inst)].ty_pl;
const extra = l.air.extraData(Air.Block, ty_pl.payload);
try l.legalizeBody(@ptrCast(l.air.extra.items[extra.end..][0..extra.data.body_len]));
const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl;
const extra = l.extraData(Air.Block, ty_pl.payload);
try l.legalizeBody(extra.end, extra.data.body_len);
},
.dbg_inline_block => {
const ty_pl = data[@intFromEnum(inst)].ty_pl;
const extra = l.air.extraData(Air.DbgInlineBlock, ty_pl.payload);
try l.legalizeBody(@ptrCast(l.air.extra.items[extra.end..][0..extra.data.body_len]));
const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl;
const extra = l.extraData(Air.DbgInlineBlock, ty_pl.payload);
try l.legalizeBody(extra.end, extra.data.body_len);
},
.cond_br => {
const pl_op = data[@intFromEnum(inst)].pl_op;
const extra = l.air.extraData(Air.CondBr, pl_op.payload);
try l.legalizeBody(@ptrCast(l.air.extra.items[extra.end..][0..extra.data.then_body_len]));
try l.legalizeBody(@ptrCast(l.air.extra.items[extra.end + extra.data.then_body_len ..][0..extra.data.else_body_len]));
const pl_op = l.air_instructions.items(.data)[@intFromEnum(inst)].pl_op;
const extra = l.extraData(Air.CondBr, pl_op.payload);
try l.legalizeBody(extra.end, extra.data.then_body_len);
try l.legalizeBody(extra.end + extra.data.then_body_len, extra.data.else_body_len);
},
.switch_br, .loop_switch_br => {
const switch_br = l.air.unwrapSwitch(inst);
var it = switch_br.iterateCases();
while (it.next()) |case| try l.legalizeBody(case.body);
try l.legalizeBody(it.elseBody());
const pl_op = l.air_instructions.items(.data)[@intFromEnum(inst)].pl_op;
const extra = l.extraData(Air.SwitchBr, pl_op.payload);
const hint_bag_count = std.math.divCeil(usize, extra.data.cases_len + 1, 10) catch unreachable;
var extra_index = extra.end + hint_bag_count;
for (0..extra.data.cases_len) |_| {
const case_extra = l.extraData(Air.SwitchBr.Case, extra_index);
const case_body_start = case_extra.end + case_extra.data.items_len + case_extra.data.ranges_len * 2;
try l.legalizeBody(case_body_start, case_extra.data.body_len);
extra_index = case_body_start + case_extra.data.body_len;
}
try l.legalizeBody(extra_index, extra.data.else_body_len);
},
};
}
}
}
const UnaryDataTag = enum { un_op, ty_op };
inline fn scalarizeUnary(l: *Legalize, inst: Air.Inst.Index, data_tag: UnaryDataTag, un_op: Air.Inst.Ref) Error!Air.Inst.Tag {
return l.replaceInst(inst, .block, try l.scalarizeUnaryBlockPayload(inst, data_tag, un_op));
}
fn scalarizeUnaryBlockPayload(
l: *Legalize,
inst: Air.Inst.Index,
data_tag: UnaryDataTag,
un_op: Air.Inst.Ref,
) Error!Air.Inst.Data {
const pt = l.pt;
const zcu = pt.zcu;
const gpa = zcu.gpa;
const res_ty = l.typeOfIndex(inst);
try l.air_instructions.ensureUnusedCapacity(gpa, 15);
const res_alloc_inst = l.addInstAssumeCapacity(.{
.tag = .alloc,
.data = .{ .ty = try pt.singleMutPtrType(res_ty) },
});
const index_alloc_inst = l.addInstAssumeCapacity(.{
.tag = .alloc,
.data = .{ .ty = try pt.singleMutPtrType(.usize) },
});
const index_init_inst = l.addInstAssumeCapacity(.{
.tag = .store,
.data = .{ .bin_op = .{
.lhs = index_alloc_inst.toRef(),
.rhs = try pt.intRef(.usize, 0),
} },
});
const cur_index_inst = l.addInstAssumeCapacity(.{
.tag = .load,
.data = .{ .ty_op = .{
.ty = .usize_type,
.operand = index_alloc_inst.toRef(),
} },
});
const get_elem_inst = l.addInstAssumeCapacity(.{
.tag = .array_elem_val,
.data = .{ .bin_op = .{
.lhs = un_op,
.rhs = cur_index_inst.toRef(),
} },
});
const op_elem_inst = l.addInstAssumeCapacity(.{
.tag = l.air_instructions.items(.tag)[@intFromEnum(inst)],
.data = switch (data_tag) {
.un_op => .{ .un_op = get_elem_inst.toRef() },
.ty_op => .{ .ty_op = .{
.ty = Air.internedToRef(res_ty.scalarType(zcu).toIntern()),
.operand = get_elem_inst.toRef(),
} },
},
});
const set_elem_inst = l.addInstAssumeCapacity(.{
.tag = .vector_store_elem,
.data = .{ .vector_store_elem = .{
.vector_ptr = res_alloc_inst.toRef(),
.payload = try l.addExtra(Air.Bin, .{
.lhs = cur_index_inst.toRef(),
.rhs = op_elem_inst.toRef(),
}),
} },
});
const not_done_inst = l.addInstAssumeCapacity(.{
.tag = .cmp_lt,
.data = .{ .bin_op = .{
.lhs = cur_index_inst.toRef(),
.rhs = try pt.intRef(.usize, res_ty.vectorLen(zcu)),
} },
});
const next_index_inst = l.addInstAssumeCapacity(.{
.tag = .add,
.data = .{ .bin_op = .{
.lhs = cur_index_inst.toRef(),
.rhs = try pt.intRef(.usize, 1),
} },
});
const set_index_inst = l.addInstAssumeCapacity(.{
.tag = .store,
.data = .{ .bin_op = .{
.lhs = index_alloc_inst.toRef(),
.rhs = next_index_inst.toRef(),
} },
});
const loop_inst: Air.Inst.Index = @enumFromInt(l.air_instructions.len + 4);
const repeat_inst = l.addInstAssumeCapacity(.{
.tag = .repeat,
.data = .{ .repeat = .{ .loop_inst = loop_inst } },
});
const final_res_inst = l.addInstAssumeCapacity(.{
.tag = .load,
.data = .{ .ty_op = .{
.ty = Air.internedToRef(res_ty.toIntern()),
.operand = res_alloc_inst.toRef(),
} },
});
const br_res_inst = l.addInstAssumeCapacity(.{
.tag = .br,
.data = .{ .br = .{
.block_inst = inst,
.operand = final_res_inst.toRef(),
} },
});
const done_br_inst = l.addInstAssumeCapacity(.{
.tag = .cond_br,
.data = .{ .pl_op = .{
.operand = not_done_inst.toRef(),
.payload = try l.addCondBrBodies(&.{
next_index_inst,
set_index_inst,
repeat_inst,
}, &.{
final_res_inst,
br_res_inst,
}),
} },
});
assert(loop_inst == l.addInstAssumeCapacity(.{
.tag = .loop,
.data = .{ .ty_pl = .{
.ty = .noreturn_type,
.payload = try l.addBlockBody(&.{
cur_index_inst,
get_elem_inst,
op_elem_inst,
set_elem_inst,
not_done_inst,
done_br_inst,
}),
} },
}));
return .{ .ty_pl = .{
.ty = Air.internedToRef(res_ty.toIntern()),
.payload = try l.addBlockBody(&.{
res_alloc_inst,
index_alloc_inst,
index_init_inst,
loop_inst,
}),
} };
}
fn addInstAssumeCapacity(l: *Legalize, inst: Air.Inst) Air.Inst.Index {
defer l.air_instructions.appendAssumeCapacity(inst);
return @enumFromInt(l.air_instructions.len);
}
fn addExtra(l: *Legalize, comptime Extra: type, extra: Extra) Error!u32 {
const extra_fields = @typeInfo(Extra).@"struct".fields;
try l.air_extra.ensureUnusedCapacity(l.pt.zcu.gpa, extra_fields.len);
defer inline for (extra_fields) |field| l.air_extra.appendAssumeCapacity(switch (field.type) {
u32 => @field(extra, field.name),
Air.Inst.Ref => @intFromEnum(@field(extra, field.name)),
else => @compileError(@typeName(field.type)),
});
return @intCast(l.air_extra.items.len);
}
fn addBlockBody(l: *Legalize, body: []const Air.Inst.Index) Error!u32 {
try l.air_extra.ensureUnusedCapacity(l.pt.zcu.gpa, 1 + body.len);
defer {
l.air_extra.appendAssumeCapacity(@intCast(body.len));
l.air_extra.appendSliceAssumeCapacity(@ptrCast(body));
}
return @intCast(l.air_extra.items.len);
}
fn addCondBrBodies(l: *Legalize, then_body: []const Air.Inst.Index, else_body: []const Air.Inst.Index) Error!u32 {
try l.air_extra.ensureUnusedCapacity(l.pt.zcu.gpa, 3 + then_body.len + else_body.len);
defer {
l.air_extra.appendSliceAssumeCapacity(&.{
@intCast(then_body.len),
@intCast(else_body.len),
@bitCast(Air.CondBr.BranchHints{
.true = .none,
.false = .none,
.then_cov = .none,
.else_cov = .none,
}),
});
l.air_extra.appendSliceAssumeCapacity(@ptrCast(then_body));
l.air_extra.appendSliceAssumeCapacity(@ptrCast(else_body));
}
return @intCast(l.air_extra.items.len);
}
// inline to propagate comptime `tag`s
inline fn replaceInst(l: *Legalize, inst: Air.Inst.Index, tag: Air.Inst.Tag, data: Air.Inst.Data) Air.Inst.Tag {
const ip = &l.zcu.intern_pool;
const orig_ty = if (std.debug.runtime_safety) l.air.typeOfIndex(inst, ip) else {};
l.air.instructions.items(.tag)[@intFromEnum(inst)] = tag;
l.air.instructions.items(.data)[@intFromEnum(inst)] = data;
if (std.debug.runtime_safety) std.debug.assert(l.air.typeOfIndex(inst, ip).toIntern() == orig_ty.toIntern());
const orig_ty = if (std.debug.runtime_safety) l.typeOfIndex(inst) else {};
l.air_instructions.set(@intFromEnum(inst), .{ .tag = tag, .data = data });
if (std.debug.runtime_safety) assert(l.typeOfIndex(inst).toIntern() == orig_ty.toIntern());
return tag;
}
const Air = @import("../Air.zig");
const assert = std.debug.assert;
const codegen = @import("../codegen.zig");
const Legalize = @This();
const std = @import("std");
const Type = @import("../Type.zig");
const Zcu = @import("../Zcu.zig");

View File

@ -1742,7 +1742,7 @@ pub fn linkerUpdateFunc(pt: Zcu.PerThread, func_index: InternPool.Index, air: *A
}
const backend = target_util.zigBackend(zcu.root_mod.resolved_target.result, zcu.comp.config.use_llvm);
try air.legalize(backend, zcu);
try air.legalize(backend, pt);
var liveness = try Air.Liveness.analyze(gpa, air.*, ip);
defer liveness.deinit(gpa);

View File

@ -32,10 +32,15 @@ const FrameIndex = bits.FrameIndex;
const InnerError = codegen.CodeGenError || error{OutOfRegisters};
pub const legalize_features: Air.Legalize.Features = .{
pub const legalize_features: Air.Legalize.Features = .init(.{
.scalarize_ctz = true,
.scalarize_popcount = true,
.scalarize_byte_swap = true,
.scalarize_bit_reverse = true,
.remove_shift_vector_rhs_splat = false,
.reduce_one_elem_to_bitcast = true,
};
});
/// Set this to `false` to uncover Sema OPV bugs.
/// https://github.com/ziglang/zig/issues/22419
@ -63352,14 +63357,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
defer assert(cg.loops.remove(inst));
try cg.genBodyBlock(@ptrCast(cg.air.extra.items[block.end..][0..block.data.body_len]));
},
.repeat => if (use_old) try cg.airRepeat(inst) else {
.repeat => {
const repeat = air_datas[@intFromEnum(inst)].repeat;
const loop = cg.loops.get(repeat.loop_inst).?;
try cg.restoreState(loop.state, &.{}, .{
.emit_instructions = true,
.update_tracking = false,
.resurrect = false,
.close_scope = true,
.close_scope = false,
});
_ = try cg.asmJmpReloc(loop.target);
},
@ -162356,6 +162361,136 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.each = .{ .once = &.{
.{ ._, ._, .mov, .leasi(.src0w, .@"2", .src1), .src2w, ._, ._ },
} },
}, .{
.required_features = .{ .avx, null, null, null },
.src_constraints = .{ .any, .any, .{ .float = .word } },
.patterns = &.{
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
},
.each = .{ .once = &.{
.{ ._, .vp_w, .extr, .leaa(.src0w, .add_src0_elem_size_mul_src1), .src2x, .ui(0), ._ },
} },
}, .{
.required_features = .{ .sse4_1, null, null, null },
.src_constraints = .{ .any, .any, .{ .float = .word } },
.patterns = &.{
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
},
.each = .{ .once = &.{
.{ ._, .p_w, .extr, .leaa(.src0w, .add_src0_elem_size_mul_src1), .src2x, .ui(0), ._ },
} },
}, .{
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{ .any, .any, .{ .float = .word } },
.patterns = &.{
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
},
.extra_temps = .{
.{ .type = .f16, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.each = .{ .once = &.{
.{ ._, .p_w, .extr, .tmp0d, .src2x, .ui(0), ._ },
.{ ._, ._, .mov, .leaa(.src0w, .add_src0_elem_size_mul_src1), .tmp0w, ._, ._ },
} },
}, .{
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .any, .any, .{ .float = .word } },
.patterns = &.{
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
},
.extra_temps = .{
.{ .type = .f32, .kind = .mem },
.{ .type = .f16, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.each = .{ .once = &.{
.{ ._, ._ss, .mov, .mem(.tmp1d), .src2x, ._, ._ },
.{ ._, ._, .mov, .tmp1d, .mem(.tmp1d), ._, ._ },
.{ ._, ._, .mov, .leaa(.src0w, .add_src0_elem_size_mul_src1), .tmp1w, ._, ._ },
} },
}, .{
.required_features = .{ .avx, null, null, null },
.src_constraints = .{ .any, .any, .{ .float = .word } },
.patterns = &.{
.{ .src = .{ .to_gpr, .to_gpr, .to_sse } },
},
.each = .{ .once = &.{
.{ ._, .vp_w, .extr, .leasi(.src0w, .@"2", .src1), .src2x, .ui(0), ._ },
} },
}, .{
.required_features = .{ .sse4_1, null, null, null },
.src_constraints = .{ .any, .any, .{ .float = .word } },
.patterns = &.{
.{ .src = .{ .to_gpr, .to_gpr, .to_sse } },
},
.each = .{ .once = &.{
.{ ._, .p_w, .extr, .leasi(.src0w, .@"2", .src1), .src2x, .ui(0), ._ },
} },
}, .{
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{ .any, .any, .{ .float = .word } },
.patterns = &.{
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
},
.extra_temps = .{
.{ .type = .f16, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.each = .{ .once = &.{
.{ ._, .p_w, .extr, .tmp0d, .src2x, .ui(0), ._ },
.{ ._, ._, .mov, .leasi(.src0w, .@"2", .src1), .tmp0w, ._, ._ },
} },
}, .{
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .any, .any, .{ .float = .word } },
.patterns = &.{
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
},
.extra_temps = .{
.{ .type = .f32, .kind = .mem },
.{ .type = .f16, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.each = .{ .once = &.{
.{ ._, ._ss, .mov, .mem(.tmp1d), .src2x, ._, ._ },
.{ ._, ._, .mov, .tmp1d, .mem(.tmp1d), ._, ._ },
.{ ._, ._, .mov, .leasi(.src0w, .@"2", .src1), .tmp1w, ._, ._ },
} },
}, .{
.src_constraints = .{ .any, .any, .{ .int = .dword } },
.patterns = &.{
@ -162375,29 +162510,119 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.{ ._, ._, .mov, .leasi(.src0d, .@"4", .src1), .src2d, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.dst_constraints = .{ .{ .int = .qword }, .any },
.required_features = .{ .avx, null, null, null },
.src_constraints = .{ .any, .any, .{ .float = .dword } },
.patterns = &.{
.{ .src = .{ .to_mem, .simm32, .simm32 } },
.{ .src = .{ .to_mem, .simm32, .to_gpr } },
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
},
.each = .{ .once = &.{
.{ ._, .v_ss, .mov, .leaa(.src0d, .add_src0_elem_size_mul_src1), .src2x, ._, ._ },
} },
}, .{
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .any, .any, .{ .float = .dword } },
.patterns = &.{
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
},
.each = .{ .once = &.{
.{ ._, ._ss, .mov, .leaa(.src0d, .add_src0_elem_size_mul_src1), .src2x, ._, ._ },
} },
}, .{
.required_features = .{ .avx, null, null, null },
.src_constraints = .{ .any, .any, .{ .float = .dword } },
.patterns = &.{
.{ .src = .{ .to_gpr, .to_gpr, .to_sse } },
},
.each = .{ .once = &.{
.{ ._, .v_ss, .mov, .leasi(.src0d, .@"4", .src1), .src2x, ._, ._ },
} },
}, .{
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .any, .any, .{ .float = .dword } },
.patterns = &.{
.{ .src = .{ .to_gpr, .to_gpr, .to_sse } },
},
.each = .{ .once = &.{
.{ ._, ._ss, .mov, .leasi(.src0d, .@"4", .src1), .src2x, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{ .any, .any, .{ .int = .qword } },
.patterns = &.{
.{ .src = .{ .to_gpr, .simm32, .simm32 } },
.{ .src = .{ .to_gpr, .simm32, .to_gpr } },
},
.each = .{ .once = &.{
.{ ._, ._, .mov, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2q, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.dst_constraints = .{ .{ .int = .qword }, .any },
.src_constraints = .{ .any, .any, .{ .int = .qword } },
.patterns = &.{
.{ .src = .{ .to_mem, .to_gpr, .simm32 } },
.{ .src = .{ .to_mem, .to_gpr, .to_gpr } },
.{ .src = .{ .to_gpr, .to_gpr, .simm32 } },
.{ .src = .{ .to_gpr, .to_gpr, .to_gpr } },
},
.each = .{ .once = &.{
.{ ._, ._, .mov, .leasi(.src0q, .@"8", .src1), .src2q, ._, ._ },
} },
}, .{
.required_features = .{ .avx, null, null, null },
.src_constraints = .{ .any, .any, .{ .float = .qword } },
.patterns = &.{
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
},
.each = .{ .once = &.{
.{ ._, .v_sd, .mov, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2x, ._, ._ },
} },
}, .{
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{ .any, .any, .{ .float = .qword } },
.patterns = &.{
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
},
.each = .{ .once = &.{
.{ ._, ._sd, .mov, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2x, ._, ._ },
} },
}, .{
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .any, .any, .{ .float = .qword } },
.patterns = &.{
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
},
.each = .{ .once = &.{
.{ ._, ._ps, .movl, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2x, ._, ._ },
} },
}, .{
.required_features = .{ .avx, null, null, null },
.src_constraints = .{ .any, .any, .{ .float = .qword } },
.patterns = &.{
.{ .src = .{ .to_gpr, .to_gpr, .to_sse } },
},
.each = .{ .once = &.{
.{ ._, .v_sd, .mov, .leasi(.src0q, .@"8", .src1), .src2x, ._, ._ },
} },
}, .{
.required_features = .{ .sse2, null, null, null },
.src_constraints = .{ .any, .any, .{ .float = .qword } },
.patterns = &.{
.{ .src = .{ .to_gpr, .to_gpr, .to_sse } },
},
.each = .{ .once = &.{
.{ ._, ._sd, .mov, .leasi(.src0q, .@"8", .src1), .src2x, ._, ._ },
} },
}, .{
.required_features = .{ .sse, null, null, null },
.src_constraints = .{ .any, .any, .{ .float = .qword } },
.patterns = &.{
.{ .src = .{ .to_gpr, .to_gpr, .to_sse } },
},
.each = .{ .once = &.{
.{ ._, ._ps, .movl, .leasi(.src0q, .@"8", .src1), .src2x, ._, ._ },
} },
} }) catch |err| switch (err) {
error.SelectFailed => {
const elem_size = cg.typeOf(bin_op.rhs).abiSize(zcu);
while (try ops[0].toBase(false, cg) or
while (try ops[0].toRegClass(true, .general_purpose, cg) or
try ops[1].toRegClass(true, .general_purpose, cg))
{}
const base_reg = ops[0].tracking(cg).short.register.to64();
@ -162410,11 +162635,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
rhs_reg,
.u(elem_size),
);
try cg.asmRegisterMemory(
.{ ._, .lea },
base_reg,
try ops[0].tracking(cg).short.mem(cg, .{ .index = rhs_reg }),
);
try cg.asmRegisterMemory(.{ ._, .lea }, base_reg, .{
.base = .{ .reg = base_reg },
.mod = .{ .rm = .{ .index = rhs_reg } },
});
} else if (elem_size > 8) {
try cg.spillEflagsIfOccupied();
try cg.asmRegisterImmediate(
@ -162422,20 +162646,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
rhs_reg,
.u(std.math.log2_int(u64, elem_size)),
);
try cg.asmRegisterMemory(
.{ ._, .lea },
base_reg,
try ops[0].tracking(cg).short.mem(cg, .{ .index = rhs_reg }),
);
} else try cg.asmRegisterMemory(
.{ ._, .lea },
base_reg,
try ops[0].tracking(cg).short.mem(cg, .{
try cg.asmRegisterMemory(.{ ._, .lea }, base_reg, .{
.base = .{ .reg = base_reg },
.mod = .{ .rm = .{ .index = rhs_reg } },
});
} else try cg.asmRegisterMemory(.{ ._, .lea }, base_reg, .{
.base = .{ .reg = base_reg },
.mod = .{ .rm = .{
.index = rhs_reg,
.scale = .fromFactor(@intCast(elem_size)),
}),
);
try ops[0].store(&ops[1], .{}, cg);
} },
});
try ops[0].store(&ops[2], .{}, cg);
},
else => |e| return e,
};
@ -174453,18 +174675,6 @@ fn airBr(self: *CodeGen, inst: Air.Inst.Index) !void {
try self.freeValue(block_tracking.short);
}
fn airRepeat(self: *CodeGen, inst: Air.Inst.Index) !void {
const loop_inst = self.air.instructions.items(.data)[@intFromEnum(inst)].repeat.loop_inst;
const repeat_info = self.loops.get(loop_inst).?;
try self.restoreState(repeat_info.state, &.{}, .{
.emit_instructions = true,
.update_tracking = false,
.resurrect = false,
.close_scope = true,
});
_ = try self.asmJmpReloc(repeat_info.target);
}
fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void {
@setEvalBranchQuota(1_100);
const pt = self.pt;

View File

@ -123,12 +123,12 @@ fn vector8() !void {
test "bitReverse vectors u8" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
try comptime vector8();
try vector8();
@ -144,12 +144,12 @@ fn vector16() !void {
test "bitReverse vectors u16" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
try comptime vector16();
try vector16();
@ -165,12 +165,12 @@ fn vector24() !void {
test "bitReverse vectors u24" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
try comptime vector24();
try vector24();

View File

@ -95,12 +95,12 @@ fn vector8() !void {
test "@byteSwap vectors u8" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
try comptime vector8();
try vector8();
@ -116,12 +116,12 @@ fn vector16() !void {
test "@byteSwap vectors u16" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
try comptime vector16();
try vector16();
@ -137,12 +137,12 @@ fn vector24() !void {
test "@byteSwap vectors u24" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
try comptime vector24();
try vector24();

View File

@ -193,12 +193,12 @@ fn testCtz128() !void {
test "@ctz vectors" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
try testCtzVectors();
try comptime testCtzVectors();

View File

@ -77,12 +77,12 @@ fn testPopCountIntegers() !void {
test "@popCount vectors" {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest;
try comptime testPopCountVectors();
try testPopCountVectors();

View File

@ -4828,6 +4828,7 @@ inline fn ctz(comptime Type: type, rhs: Type) @TypeOf(@ctz(rhs)) {
test ctz {
const test_ctz = unary(ctz, .{});
try test_ctz.testInts();
try test_ctz.testIntVectors();
}
inline fn popCount(comptime Type: type, rhs: Type) @TypeOf(@popCount(rhs)) {
@ -4836,6 +4837,7 @@ inline fn popCount(comptime Type: type, rhs: Type) @TypeOf(@popCount(rhs)) {
test popCount {
const test_pop_count = unary(popCount, .{});
try test_pop_count.testInts();
try test_pop_count.testIntVectors();
}
inline fn byteSwap(comptime Type: type, rhs: Type) RoundBitsUp(Type, 8) {
@ -4844,6 +4846,7 @@ inline fn byteSwap(comptime Type: type, rhs: Type) RoundBitsUp(Type, 8) {
test byteSwap {
const test_byte_swap = unary(byteSwap, .{});
try test_byte_swap.testInts();
try test_byte_swap.testIntVectors();
}
inline fn bitReverse(comptime Type: type, rhs: Type) @TypeOf(@bitReverse(rhs)) {
@ -4852,6 +4855,7 @@ inline fn bitReverse(comptime Type: type, rhs: Type) @TypeOf(@bitReverse(rhs)) {
test bitReverse {
const test_bit_reverse = unary(bitReverse, .{});
try test_bit_reverse.testInts();
try test_bit_reverse.testIntVectors();
}
inline fn sqrt(comptime Type: type, rhs: Type) @TypeOf(@sqrt(rhs)) {