diff --git a/src/Air/Legalize.zig b/src/Air/Legalize.zig index 3d38798aa3..2e8d09da22 100644 --- a/src/Air/Legalize.zig +++ b/src/Air/Legalize.zig @@ -1,21 +1,48 @@ -zcu: *const Zcu, -air: Air, -features: std.enums.EnumSet(Feature), +pt: Zcu.PerThread, +air_instructions: std.MultiArrayList(Air.Inst), +air_extra: std.ArrayListUnmanaged(u32), +features: *const Features, pub const Feature = enum { + scalarize_not, + scalarize_clz, + scalarize_ctz, + scalarize_popcount, + scalarize_byte_swap, + scalarize_bit_reverse, + scalarize_sqrt, + scalarize_sin, + scalarize_cos, + scalarize_tan, + scalarize_exp, + scalarize_exp2, + scalarize_log, + scalarize_log2, + scalarize_log10, + scalarize_abs, + scalarize_floor, + scalarize_ceil, + scalarize_round, + scalarize_trunc_float, + scalarize_neg, + scalarize_neg_optimized, + /// Legalize (shift lhs, (splat rhs)) -> (shift lhs, rhs) remove_shift_vector_rhs_splat, /// Legalize reduce of a one element vector to a bitcast reduce_one_elem_to_bitcast, }; -pub const Features = std.enums.EnumFieldStruct(Feature, bool, false); +pub const Features = std.enums.EnumSet(Feature); -pub fn legalize(air: *Air, backend: std.builtin.CompilerBackend, zcu: *const Zcu) std.mem.Allocator.Error!void { +pub const Error = std.mem.Allocator.Error; + +pub fn legalize(air: *Air, backend: std.builtin.CompilerBackend, pt: Zcu.PerThread) Error!void { var l: Legalize = .{ - .zcu = zcu, - .air = air.*, - .features = features: switch (backend) { + .pt = pt, + .air_instructions = air.instructions.toMultiArrayList(), + .air_extra = air.extra, + .features = &features: switch (backend) { .other, .stage1 => unreachable, inline .stage2_llvm, .stage2_c, @@ -30,118 +57,365 @@ pub fn legalize(air: *Air, backend: std.builtin.CompilerBackend, zcu: *const Zcu .stage2_powerpc, => |ct_backend| { const Backend = codegen.importBackend(ct_backend) orelse break :features .initEmpty(); - break :features if (@hasDecl(Backend, "legalize_features")) - .init(Backend.legalize_features) - else - .initEmpty(); + break :features if (@hasDecl(Backend, "legalize_features")) Backend.legalize_features else .initEmpty(); }, _ => unreachable, }, }; - defer air.* = l.air; - if (!l.features.bits.eql(.initEmpty())) try l.legalizeBody(l.air.getMainBody()); + if (l.features.bits.eql(.initEmpty())) return; + defer air.* = l.getTmpAir(); + const main_extra = l.extraData(Air.Block, l.air_extra.items[@intFromEnum(Air.ExtraIndex.main_block)]); + try l.legalizeBody(main_extra.end, main_extra.data.body_len); } -fn legalizeBody(l: *Legalize, body: []const Air.Inst.Index) std.mem.Allocator.Error!void { - const zcu = l.zcu; - const ip = &zcu.intern_pool; - const tags = l.air.instructions.items(.tag); - const data = l.air.instructions.items(.data); - for (body) |inst| inst: switch (tags[@intFromEnum(inst)]) { - else => {}, - - .shl, - .shl_exact, - .shl_sat, - .shr, - .shr_exact, - => |air_tag| if (l.features.contains(.remove_shift_vector_rhs_splat)) done: { - const bin_op = data[@intFromEnum(inst)].bin_op; - const ty = l.air.typeOf(bin_op.rhs, ip); - if (!ty.isVector(zcu)) break :done; - if (bin_op.rhs.toInterned()) |rhs_ip_index| switch (ip.indexToKey(rhs_ip_index)) { - else => {}, - .aggregate => |aggregate| switch (aggregate.storage) { - else => {}, - .repeated_elem => |splat| continue :inst l.replaceInst(inst, air_tag, .{ .bin_op = .{ - .lhs = bin_op.lhs, - .rhs = Air.internedToRef(splat), - } }), - }, - } else { - const rhs_inst = bin_op.rhs.toIndex().?; - switch (tags[@intFromEnum(rhs_inst)]) { - else => {}, - .splat => continue :inst l.replaceInst(inst, air_tag, .{ .bin_op = .{ - .lhs = bin_op.lhs, - .rhs = data[@intFromEnum(rhs_inst)].ty_op.operand, - } }), - } - } - }, - - .reduce, - .reduce_optimized, - => if (l.features.contains(.reduce_one_elem_to_bitcast)) done: { - const reduce = data[@intFromEnum(inst)].reduce; - const vector_ty = l.air.typeOf(reduce.operand, ip); - switch (vector_ty.vectorLen(zcu)) { - 0 => unreachable, - 1 => continue :inst l.replaceInst(inst, .bitcast, .{ .ty_op = .{ - .ty = Air.internedToRef(vector_ty.scalarType(zcu).toIntern()), - .operand = reduce.operand, - } }), - else => break :done, - } - }, - - .@"try", .try_cold => { - const pl_op = data[@intFromEnum(inst)].pl_op; - const extra = l.air.extraData(Air.Try, pl_op.payload); - try l.legalizeBody(@ptrCast(l.air.extra.items[extra.end..][0..extra.data.body_len])); - }, - .try_ptr, .try_ptr_cold => { - const ty_pl = data[@intFromEnum(inst)].ty_pl; - const extra = l.air.extraData(Air.TryPtr, ty_pl.payload); - try l.legalizeBody(@ptrCast(l.air.extra.items[extra.end..][0..extra.data.body_len])); - }, - .block, .loop => { - const ty_pl = data[@intFromEnum(inst)].ty_pl; - const extra = l.air.extraData(Air.Block, ty_pl.payload); - try l.legalizeBody(@ptrCast(l.air.extra.items[extra.end..][0..extra.data.body_len])); - }, - .dbg_inline_block => { - const ty_pl = data[@intFromEnum(inst)].ty_pl; - const extra = l.air.extraData(Air.DbgInlineBlock, ty_pl.payload); - try l.legalizeBody(@ptrCast(l.air.extra.items[extra.end..][0..extra.data.body_len])); - }, - .cond_br => { - const pl_op = data[@intFromEnum(inst)].pl_op; - const extra = l.air.extraData(Air.CondBr, pl_op.payload); - try l.legalizeBody(@ptrCast(l.air.extra.items[extra.end..][0..extra.data.then_body_len])); - try l.legalizeBody(@ptrCast(l.air.extra.items[extra.end + extra.data.then_body_len ..][0..extra.data.else_body_len])); - }, - .switch_br, .loop_switch_br => { - const switch_br = l.air.unwrapSwitch(inst); - var it = switch_br.iterateCases(); - while (it.next()) |case| try l.legalizeBody(case.body); - try l.legalizeBody(it.elseBody()); - }, +fn getTmpAir(l: *const Legalize) Air { + return .{ + .instructions = l.air_instructions.slice(), + .extra = l.air_extra, }; } +fn typeOf(l: *const Legalize, ref: Air.Inst.Ref) Type { + return l.getTmpAir().typeOf(ref, &l.pt.zcu.intern_pool); +} + +fn typeOfIndex(l: *const Legalize, inst: Air.Inst.Index) Type { + return l.getTmpAir().typeOfIndex(inst, &l.pt.zcu.intern_pool); +} + +fn extraData(l: *const Legalize, comptime T: type, index: usize) @TypeOf(Air.extraData(undefined, T, undefined)) { + return l.getTmpAir().extraData(T, index); +} + +fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void { + const zcu = l.pt.zcu; + const ip = &zcu.intern_pool; + for (body_start..body_start + body_len) |inst_extra_index| { + const inst: Air.Inst.Index = @enumFromInt(l.air_extra.items[inst_extra_index]); + inst: switch (l.air_instructions.items(.tag)[@intFromEnum(inst)]) { + else => {}, + + inline .not, + .clz, + .ctz, + .popcount, + .byte_swap, + .bit_reverse, + .abs, + => |air_tag| if (l.features.contains(@field(Feature, "scalarize_" ++ @tagName(air_tag)))) done: { + const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op; + if (!ty_op.ty.toType().isVector(zcu)) break :done; + continue :inst try l.scalarizeUnary(inst, .ty_op, ty_op.operand); + }, + inline .sqrt, + .sin, + .cos, + .tan, + .exp, + .exp2, + .log, + .log2, + .log10, + .floor, + .ceil, + .round, + .trunc_float, + .neg, + .neg_optimized, + => |air_tag| if (l.features.contains(@field(Feature, "scalarize_" ++ @tagName(air_tag)))) done: { + const un_op = l.air_instructions.items(.data)[@intFromEnum(inst)].un_op; + if (!l.typeOf(un_op).isVector(zcu)) break :done; + continue :inst try l.scalarizeUnary(inst, .un_op, un_op); + }, + + .shl, + .shl_exact, + .shl_sat, + .shr, + .shr_exact, + => |air_tag| if (l.features.contains(.remove_shift_vector_rhs_splat)) done: { + const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op; + const ty = l.typeOf(bin_op.rhs); + if (!ty.isVector(zcu)) break :done; + if (bin_op.rhs.toInterned()) |rhs_ip_index| switch (ip.indexToKey(rhs_ip_index)) { + else => {}, + .aggregate => |aggregate| switch (aggregate.storage) { + else => {}, + .repeated_elem => |splat| continue :inst l.replaceInst(inst, air_tag, .{ .bin_op = .{ + .lhs = bin_op.lhs, + .rhs = Air.internedToRef(splat), + } }), + }, + } else { + const rhs_inst = bin_op.rhs.toIndex().?; + switch (l.air_instructions.items(.tag)[@intFromEnum(rhs_inst)]) { + else => {}, + .splat => continue :inst l.replaceInst(inst, air_tag, .{ .bin_op = .{ + .lhs = bin_op.lhs, + .rhs = l.air_instructions.items(.data)[@intFromEnum(rhs_inst)].ty_op.operand, + } }), + } + } + }, + + .reduce, + .reduce_optimized, + => if (l.features.contains(.reduce_one_elem_to_bitcast)) done: { + const reduce = l.air_instructions.items(.data)[@intFromEnum(inst)].reduce; + const vector_ty = l.typeOf(reduce.operand); + switch (vector_ty.vectorLen(zcu)) { + 0 => unreachable, + 1 => continue :inst l.replaceInst(inst, .bitcast, .{ .ty_op = .{ + .ty = Air.internedToRef(vector_ty.scalarType(zcu).toIntern()), + .operand = reduce.operand, + } }), + else => break :done, + } + }, + + .@"try", .try_cold => { + const pl_op = l.air_instructions.items(.data)[@intFromEnum(inst)].pl_op; + const extra = l.extraData(Air.Try, pl_op.payload); + try l.legalizeBody(extra.end, extra.data.body_len); + }, + .try_ptr, .try_ptr_cold => { + const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl; + const extra = l.extraData(Air.TryPtr, ty_pl.payload); + try l.legalizeBody(extra.end, extra.data.body_len); + }, + .block, .loop => { + const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl; + const extra = l.extraData(Air.Block, ty_pl.payload); + try l.legalizeBody(extra.end, extra.data.body_len); + }, + .dbg_inline_block => { + const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl; + const extra = l.extraData(Air.DbgInlineBlock, ty_pl.payload); + try l.legalizeBody(extra.end, extra.data.body_len); + }, + .cond_br => { + const pl_op = l.air_instructions.items(.data)[@intFromEnum(inst)].pl_op; + const extra = l.extraData(Air.CondBr, pl_op.payload); + try l.legalizeBody(extra.end, extra.data.then_body_len); + try l.legalizeBody(extra.end + extra.data.then_body_len, extra.data.else_body_len); + }, + .switch_br, .loop_switch_br => { + const pl_op = l.air_instructions.items(.data)[@intFromEnum(inst)].pl_op; + const extra = l.extraData(Air.SwitchBr, pl_op.payload); + const hint_bag_count = std.math.divCeil(usize, extra.data.cases_len + 1, 10) catch unreachable; + var extra_index = extra.end + hint_bag_count; + for (0..extra.data.cases_len) |_| { + const case_extra = l.extraData(Air.SwitchBr.Case, extra_index); + const case_body_start = case_extra.end + case_extra.data.items_len + case_extra.data.ranges_len * 2; + try l.legalizeBody(case_body_start, case_extra.data.body_len); + extra_index = case_body_start + case_extra.data.body_len; + } + try l.legalizeBody(extra_index, extra.data.else_body_len); + }, + } + } +} + +const UnaryDataTag = enum { un_op, ty_op }; +inline fn scalarizeUnary(l: *Legalize, inst: Air.Inst.Index, data_tag: UnaryDataTag, un_op: Air.Inst.Ref) Error!Air.Inst.Tag { + return l.replaceInst(inst, .block, try l.scalarizeUnaryBlockPayload(inst, data_tag, un_op)); +} +fn scalarizeUnaryBlockPayload( + l: *Legalize, + inst: Air.Inst.Index, + data_tag: UnaryDataTag, + un_op: Air.Inst.Ref, +) Error!Air.Inst.Data { + const pt = l.pt; + const zcu = pt.zcu; + const gpa = zcu.gpa; + + const res_ty = l.typeOfIndex(inst); + try l.air_instructions.ensureUnusedCapacity(gpa, 15); + const res_alloc_inst = l.addInstAssumeCapacity(.{ + .tag = .alloc, + .data = .{ .ty = try pt.singleMutPtrType(res_ty) }, + }); + const index_alloc_inst = l.addInstAssumeCapacity(.{ + .tag = .alloc, + .data = .{ .ty = try pt.singleMutPtrType(.usize) }, + }); + const index_init_inst = l.addInstAssumeCapacity(.{ + .tag = .store, + .data = .{ .bin_op = .{ + .lhs = index_alloc_inst.toRef(), + .rhs = try pt.intRef(.usize, 0), + } }, + }); + const cur_index_inst = l.addInstAssumeCapacity(.{ + .tag = .load, + .data = .{ .ty_op = .{ + .ty = .usize_type, + .operand = index_alloc_inst.toRef(), + } }, + }); + const get_elem_inst = l.addInstAssumeCapacity(.{ + .tag = .array_elem_val, + .data = .{ .bin_op = .{ + .lhs = un_op, + .rhs = cur_index_inst.toRef(), + } }, + }); + const op_elem_inst = l.addInstAssumeCapacity(.{ + .tag = l.air_instructions.items(.tag)[@intFromEnum(inst)], + .data = switch (data_tag) { + .un_op => .{ .un_op = get_elem_inst.toRef() }, + .ty_op => .{ .ty_op = .{ + .ty = Air.internedToRef(res_ty.scalarType(zcu).toIntern()), + .operand = get_elem_inst.toRef(), + } }, + }, + }); + const set_elem_inst = l.addInstAssumeCapacity(.{ + .tag = .vector_store_elem, + .data = .{ .vector_store_elem = .{ + .vector_ptr = res_alloc_inst.toRef(), + .payload = try l.addExtra(Air.Bin, .{ + .lhs = cur_index_inst.toRef(), + .rhs = op_elem_inst.toRef(), + }), + } }, + }); + const not_done_inst = l.addInstAssumeCapacity(.{ + .tag = .cmp_lt, + .data = .{ .bin_op = .{ + .lhs = cur_index_inst.toRef(), + .rhs = try pt.intRef(.usize, res_ty.vectorLen(zcu)), + } }, + }); + const next_index_inst = l.addInstAssumeCapacity(.{ + .tag = .add, + .data = .{ .bin_op = .{ + .lhs = cur_index_inst.toRef(), + .rhs = try pt.intRef(.usize, 1), + } }, + }); + const set_index_inst = l.addInstAssumeCapacity(.{ + .tag = .store, + .data = .{ .bin_op = .{ + .lhs = index_alloc_inst.toRef(), + .rhs = next_index_inst.toRef(), + } }, + }); + const loop_inst: Air.Inst.Index = @enumFromInt(l.air_instructions.len + 4); + const repeat_inst = l.addInstAssumeCapacity(.{ + .tag = .repeat, + .data = .{ .repeat = .{ .loop_inst = loop_inst } }, + }); + const final_res_inst = l.addInstAssumeCapacity(.{ + .tag = .load, + .data = .{ .ty_op = .{ + .ty = Air.internedToRef(res_ty.toIntern()), + .operand = res_alloc_inst.toRef(), + } }, + }); + const br_res_inst = l.addInstAssumeCapacity(.{ + .tag = .br, + .data = .{ .br = .{ + .block_inst = inst, + .operand = final_res_inst.toRef(), + } }, + }); + const done_br_inst = l.addInstAssumeCapacity(.{ + .tag = .cond_br, + .data = .{ .pl_op = .{ + .operand = not_done_inst.toRef(), + .payload = try l.addCondBrBodies(&.{ + next_index_inst, + set_index_inst, + repeat_inst, + }, &.{ + final_res_inst, + br_res_inst, + }), + } }, + }); + assert(loop_inst == l.addInstAssumeCapacity(.{ + .tag = .loop, + .data = .{ .ty_pl = .{ + .ty = .noreturn_type, + .payload = try l.addBlockBody(&.{ + cur_index_inst, + get_elem_inst, + op_elem_inst, + set_elem_inst, + not_done_inst, + done_br_inst, + }), + } }, + })); + return .{ .ty_pl = .{ + .ty = Air.internedToRef(res_ty.toIntern()), + .payload = try l.addBlockBody(&.{ + res_alloc_inst, + index_alloc_inst, + index_init_inst, + loop_inst, + }), + } }; +} + +fn addInstAssumeCapacity(l: *Legalize, inst: Air.Inst) Air.Inst.Index { + defer l.air_instructions.appendAssumeCapacity(inst); + return @enumFromInt(l.air_instructions.len); +} + +fn addExtra(l: *Legalize, comptime Extra: type, extra: Extra) Error!u32 { + const extra_fields = @typeInfo(Extra).@"struct".fields; + try l.air_extra.ensureUnusedCapacity(l.pt.zcu.gpa, extra_fields.len); + defer inline for (extra_fields) |field| l.air_extra.appendAssumeCapacity(switch (field.type) { + u32 => @field(extra, field.name), + Air.Inst.Ref => @intFromEnum(@field(extra, field.name)), + else => @compileError(@typeName(field.type)), + }); + return @intCast(l.air_extra.items.len); +} + +fn addBlockBody(l: *Legalize, body: []const Air.Inst.Index) Error!u32 { + try l.air_extra.ensureUnusedCapacity(l.pt.zcu.gpa, 1 + body.len); + defer { + l.air_extra.appendAssumeCapacity(@intCast(body.len)); + l.air_extra.appendSliceAssumeCapacity(@ptrCast(body)); + } + return @intCast(l.air_extra.items.len); +} + +fn addCondBrBodies(l: *Legalize, then_body: []const Air.Inst.Index, else_body: []const Air.Inst.Index) Error!u32 { + try l.air_extra.ensureUnusedCapacity(l.pt.zcu.gpa, 3 + then_body.len + else_body.len); + defer { + l.air_extra.appendSliceAssumeCapacity(&.{ + @intCast(then_body.len), + @intCast(else_body.len), + @bitCast(Air.CondBr.BranchHints{ + .true = .none, + .false = .none, + .then_cov = .none, + .else_cov = .none, + }), + }); + l.air_extra.appendSliceAssumeCapacity(@ptrCast(then_body)); + l.air_extra.appendSliceAssumeCapacity(@ptrCast(else_body)); + } + return @intCast(l.air_extra.items.len); +} + // inline to propagate comptime `tag`s inline fn replaceInst(l: *Legalize, inst: Air.Inst.Index, tag: Air.Inst.Tag, data: Air.Inst.Data) Air.Inst.Tag { - const ip = &l.zcu.intern_pool; - const orig_ty = if (std.debug.runtime_safety) l.air.typeOfIndex(inst, ip) else {}; - l.air.instructions.items(.tag)[@intFromEnum(inst)] = tag; - l.air.instructions.items(.data)[@intFromEnum(inst)] = data; - if (std.debug.runtime_safety) std.debug.assert(l.air.typeOfIndex(inst, ip).toIntern() == orig_ty.toIntern()); + const orig_ty = if (std.debug.runtime_safety) l.typeOfIndex(inst) else {}; + l.air_instructions.set(@intFromEnum(inst), .{ .tag = tag, .data = data }); + if (std.debug.runtime_safety) assert(l.typeOfIndex(inst).toIntern() == orig_ty.toIntern()); return tag; } const Air = @import("../Air.zig"); +const assert = std.debug.assert; const codegen = @import("../codegen.zig"); const Legalize = @This(); const std = @import("std"); +const Type = @import("../Type.zig"); const Zcu = @import("../Zcu.zig"); diff --git a/src/Zcu/PerThread.zig b/src/Zcu/PerThread.zig index 44abb3cbf3..80d480bb7c 100644 --- a/src/Zcu/PerThread.zig +++ b/src/Zcu/PerThread.zig @@ -1742,7 +1742,7 @@ pub fn linkerUpdateFunc(pt: Zcu.PerThread, func_index: InternPool.Index, air: *A } const backend = target_util.zigBackend(zcu.root_mod.resolved_target.result, zcu.comp.config.use_llvm); - try air.legalize(backend, zcu); + try air.legalize(backend, pt); var liveness = try Air.Liveness.analyze(gpa, air.*, ip); defer liveness.deinit(gpa); diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 0b2a17d192..63d6730732 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -32,10 +32,15 @@ const FrameIndex = bits.FrameIndex; const InnerError = codegen.CodeGenError || error{OutOfRegisters}; -pub const legalize_features: Air.Legalize.Features = .{ +pub const legalize_features: Air.Legalize.Features = .init(.{ + .scalarize_ctz = true, + .scalarize_popcount = true, + .scalarize_byte_swap = true, + .scalarize_bit_reverse = true, + .remove_shift_vector_rhs_splat = false, .reduce_one_elem_to_bitcast = true, -}; +}); /// Set this to `false` to uncover Sema OPV bugs. /// https://github.com/ziglang/zig/issues/22419 @@ -63352,14 +63357,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { defer assert(cg.loops.remove(inst)); try cg.genBodyBlock(@ptrCast(cg.air.extra.items[block.end..][0..block.data.body_len])); }, - .repeat => if (use_old) try cg.airRepeat(inst) else { + .repeat => { const repeat = air_datas[@intFromEnum(inst)].repeat; const loop = cg.loops.get(repeat.loop_inst).?; try cg.restoreState(loop.state, &.{}, .{ .emit_instructions = true, .update_tracking = false, .resurrect = false, - .close_scope = true, + .close_scope = false, }); _ = try cg.asmJmpReloc(loop.target); }, @@ -162356,6 +162361,136 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .each = .{ .once = &.{ .{ ._, ._, .mov, .leasi(.src0w, .@"2", .src1), .src2w, ._, ._ }, } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .word } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, .vp_w, .extr, .leaa(.src0w, .add_src0_elem_size_mul_src1), .src2x, .ui(0), ._ }, + } }, + }, .{ + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .word } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, .p_w, .extr, .leaa(.src0w, .add_src0_elem_size_mul_src1), .src2x, .ui(0), ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .word } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .f16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .each = .{ .once = &.{ + .{ ._, .p_w, .extr, .tmp0d, .src2x, .ui(0), ._ }, + .{ ._, ._, .mov, .leaa(.src0w, .add_src0_elem_size_mul_src1), .tmp0w, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .word } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .f32, .kind = .mem }, + .{ .type = .f16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .each = .{ .once = &.{ + .{ ._, ._ss, .mov, .mem(.tmp1d), .src2x, ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .mem(.tmp1d), ._, ._ }, + .{ ._, ._, .mov, .leaa(.src0w, .add_src0_elem_size_mul_src1), .tmp1w, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .word } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, .vp_w, .extr, .leasi(.src0w, .@"2", .src1), .src2x, .ui(0), ._ }, + } }, + }, .{ + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .word } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, .p_w, .extr, .leasi(.src0w, .@"2", .src1), .src2x, .ui(0), ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .word } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .f16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .each = .{ .once = &.{ + .{ ._, .p_w, .extr, .tmp0d, .src2x, .ui(0), ._ }, + .{ ._, ._, .mov, .leasi(.src0w, .@"2", .src1), .tmp0w, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .word } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .f32, .kind = .mem }, + .{ .type = .f16, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .each = .{ .once = &.{ + .{ ._, ._ss, .mov, .mem(.tmp1d), .src2x, ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .mem(.tmp1d), ._, ._ }, + .{ ._, ._, .mov, .leasi(.src0w, .@"2", .src1), .tmp1w, ._, ._ }, + } }, }, .{ .src_constraints = .{ .any, .any, .{ .int = .dword } }, .patterns = &.{ @@ -162375,29 +162510,119 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .leasi(.src0d, .@"4", .src1), .src2d, ._, ._ }, } }, }, .{ - .required_features = .{ .@"64bit", null, null, null }, - .dst_constraints = .{ .{ .int = .qword }, .any }, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .dword } }, .patterns = &.{ - .{ .src = .{ .to_mem, .simm32, .simm32 } }, - .{ .src = .{ .to_mem, .simm32, .to_gpr } }, + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, .v_ss, .mov, .leaa(.src0d, .add_src0_elem_size_mul_src1), .src2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .dword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, ._ss, .mov, .leaa(.src0d, .add_src0_elem_size_mul_src1), .src2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .dword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, .v_ss, .mov, .leasi(.src0d, .@"4", .src1), .src2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .dword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, ._ss, .mov, .leasi(.src0d, .@"4", .src1), .src2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .any, .any, .{ .int = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .simm32 } }, + .{ .src = .{ .to_gpr, .simm32, .to_gpr } }, }, .each = .{ .once = &.{ .{ ._, ._, .mov, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2q, ._, ._ }, } }, }, .{ .required_features = .{ .@"64bit", null, null, null }, - .dst_constraints = .{ .{ .int = .qword }, .any }, + .src_constraints = .{ .any, .any, .{ .int = .qword } }, .patterns = &.{ - .{ .src = .{ .to_mem, .to_gpr, .simm32 } }, - .{ .src = .{ .to_mem, .to_gpr, .to_gpr } }, + .{ .src = .{ .to_gpr, .to_gpr, .simm32 } }, + .{ .src = .{ .to_gpr, .to_gpr, .to_gpr } }, }, .each = .{ .once = &.{ .{ ._, ._, .mov, .leasi(.src0q, .@"8", .src1), .src2q, ._, ._ }, } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, .v_sd, .mov, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, ._sd, .mov, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .simm32, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, ._ps, .movl, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, .v_sd, .mov, .leasi(.src0q, .@"8", .src1), .src2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, ._sd, .mov, .leasi(.src0q, .@"8", .src1), .src2x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .any, .any, .{ .float = .qword } }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .to_sse } }, + }, + .each = .{ .once = &.{ + .{ ._, ._ps, .movl, .leasi(.src0q, .@"8", .src1), .src2x, ._, ._ }, + } }, } }) catch |err| switch (err) { error.SelectFailed => { const elem_size = cg.typeOf(bin_op.rhs).abiSize(zcu); - while (try ops[0].toBase(false, cg) or + while (try ops[0].toRegClass(true, .general_purpose, cg) or try ops[1].toRegClass(true, .general_purpose, cg)) {} const base_reg = ops[0].tracking(cg).short.register.to64(); @@ -162410,11 +162635,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { rhs_reg, .u(elem_size), ); - try cg.asmRegisterMemory( - .{ ._, .lea }, - base_reg, - try ops[0].tracking(cg).short.mem(cg, .{ .index = rhs_reg }), - ); + try cg.asmRegisterMemory(.{ ._, .lea }, base_reg, .{ + .base = .{ .reg = base_reg }, + .mod = .{ .rm = .{ .index = rhs_reg } }, + }); } else if (elem_size > 8) { try cg.spillEflagsIfOccupied(); try cg.asmRegisterImmediate( @@ -162422,20 +162646,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { rhs_reg, .u(std.math.log2_int(u64, elem_size)), ); - try cg.asmRegisterMemory( - .{ ._, .lea }, - base_reg, - try ops[0].tracking(cg).short.mem(cg, .{ .index = rhs_reg }), - ); - } else try cg.asmRegisterMemory( - .{ ._, .lea }, - base_reg, - try ops[0].tracking(cg).short.mem(cg, .{ + try cg.asmRegisterMemory(.{ ._, .lea }, base_reg, .{ + .base = .{ .reg = base_reg }, + .mod = .{ .rm = .{ .index = rhs_reg } }, + }); + } else try cg.asmRegisterMemory(.{ ._, .lea }, base_reg, .{ + .base = .{ .reg = base_reg }, + .mod = .{ .rm = .{ .index = rhs_reg, .scale = .fromFactor(@intCast(elem_size)), - }), - ); - try ops[0].store(&ops[1], .{}, cg); + } }, + }); + try ops[0].store(&ops[2], .{}, cg); }, else => |e| return e, }; @@ -174453,18 +174675,6 @@ fn airBr(self: *CodeGen, inst: Air.Inst.Index) !void { try self.freeValue(block_tracking.short); } -fn airRepeat(self: *CodeGen, inst: Air.Inst.Index) !void { - const loop_inst = self.air.instructions.items(.data)[@intFromEnum(inst)].repeat.loop_inst; - const repeat_info = self.loops.get(loop_inst).?; - try self.restoreState(repeat_info.state, &.{}, .{ - .emit_instructions = true, - .update_tracking = false, - .resurrect = false, - .close_scope = true, - }); - _ = try self.asmJmpReloc(repeat_info.target); -} - fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void { @setEvalBranchQuota(1_100); const pt = self.pt; diff --git a/test/behavior/bitreverse.zig b/test/behavior/bitreverse.zig index 965a820141..b2666c6fa6 100644 --- a/test/behavior/bitreverse.zig +++ b/test/behavior/bitreverse.zig @@ -123,12 +123,12 @@ fn vector8() !void { test "bitReverse vectors u8" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; try comptime vector8(); try vector8(); @@ -144,12 +144,12 @@ fn vector16() !void { test "bitReverse vectors u16" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; try comptime vector16(); try vector16(); @@ -165,12 +165,12 @@ fn vector24() !void { test "bitReverse vectors u24" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; try comptime vector24(); try vector24(); diff --git a/test/behavior/byteswap.zig b/test/behavior/byteswap.zig index 0c6e655b25..8a0357515e 100644 --- a/test/behavior/byteswap.zig +++ b/test/behavior/byteswap.zig @@ -95,12 +95,12 @@ fn vector8() !void { test "@byteSwap vectors u8" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; try comptime vector8(); try vector8(); @@ -116,12 +116,12 @@ fn vector16() !void { test "@byteSwap vectors u16" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; try comptime vector16(); try vector16(); @@ -137,12 +137,12 @@ fn vector24() !void { test "@byteSwap vectors u24" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; try comptime vector24(); try vector24(); diff --git a/test/behavior/math.zig b/test/behavior/math.zig index 5cb41aa228..1e4bac12e7 100644 --- a/test/behavior/math.zig +++ b/test/behavior/math.zig @@ -193,12 +193,12 @@ fn testCtz128() !void { test "@ctz vectors" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; try testCtzVectors(); try comptime testCtzVectors(); diff --git a/test/behavior/popcount.zig b/test/behavior/popcount.zig index 1bf5f96515..29c51308b3 100644 --- a/test/behavior/popcount.zig +++ b/test/behavior/popcount.zig @@ -77,12 +77,12 @@ fn testPopCountIntegers() !void { test "@popCount vectors" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; try comptime testPopCountVectors(); try testPopCountVectors(); diff --git a/test/behavior/x86_64/unary.zig b/test/behavior/x86_64/unary.zig index 827d08c4c7..132d17b42d 100644 --- a/test/behavior/x86_64/unary.zig +++ b/test/behavior/x86_64/unary.zig @@ -4828,6 +4828,7 @@ inline fn ctz(comptime Type: type, rhs: Type) @TypeOf(@ctz(rhs)) { test ctz { const test_ctz = unary(ctz, .{}); try test_ctz.testInts(); + try test_ctz.testIntVectors(); } inline fn popCount(comptime Type: type, rhs: Type) @TypeOf(@popCount(rhs)) { @@ -4836,6 +4837,7 @@ inline fn popCount(comptime Type: type, rhs: Type) @TypeOf(@popCount(rhs)) { test popCount { const test_pop_count = unary(popCount, .{}); try test_pop_count.testInts(); + try test_pop_count.testIntVectors(); } inline fn byteSwap(comptime Type: type, rhs: Type) RoundBitsUp(Type, 8) { @@ -4844,6 +4846,7 @@ inline fn byteSwap(comptime Type: type, rhs: Type) RoundBitsUp(Type, 8) { test byteSwap { const test_byte_swap = unary(byteSwap, .{}); try test_byte_swap.testInts(); + try test_byte_swap.testIntVectors(); } inline fn bitReverse(comptime Type: type, rhs: Type) @TypeOf(@bitReverse(rhs)) { @@ -4852,6 +4855,7 @@ inline fn bitReverse(comptime Type: type, rhs: Type) @TypeOf(@bitReverse(rhs)) { test bitReverse { const test_bit_reverse = unary(bitReverse, .{}); try test_bit_reverse.testInts(); + try test_bit_reverse.testIntVectors(); } inline fn sqrt(comptime Type: type, rhs: Type) @TypeOf(@sqrt(rhs)) {