diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 16e73c161a..0adf771efd 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2393,7 +2393,7 @@ fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { - @setEvalBranchQuota(3_600); + @setEvalBranchQuota(3_900); const pt = cg.pt; const zcu = pt.zcu; const ip = &zcu.intern_pool; @@ -2805,10 +2805,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg); }, - .max => |air_tag| if (use_old) try cg.airBinOp(inst, air_tag) else fallback: { + .max => |air_tag| if (use_old) try cg.airBinOp(inst, air_tag) else { const bin_op = air_datas[@intFromEnum(inst)].bin_op; - const ty = cg.typeOf(bin_op.lhs); - if (ty.isVector(zcu) and cg.floatBits(ty.childType(zcu)) != null) break :fallback try cg.airBinOp(inst, air_tag); var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); var res: [1]Temp = undefined; cg.select(&res, &.{cg.typeOf(bin_op.lhs)}, &ops, comptime &.{ .{ @@ -4510,7 +4508,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .xword } } }, + .{ .type = .vector_4_u32, .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .xword } } }, .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, .unused, .unused, @@ -4647,7 +4645,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .xword } } }, + .{ .type = .vector_4_u32, .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .xword } } }, .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, @@ -4967,7 +4965,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, + .{ .type = .u64, .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, .unused, .unused, @@ -4998,7 +4996,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, + .{ .type = .u64, .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, .{ .type = .vector_2_u64, .kind = .{ .reg = .xmm0 } }, .unused, @@ -5030,7 +5028,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, + .{ .type = .u64, .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } }, .unused, .unused, @@ -5059,7 +5057,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, + .{ .type = .u64, .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } }, .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } }, .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } }, @@ -5095,7 +5093,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, + .{ .type = .u64, .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, @@ -5131,7 +5129,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, + .{ .type = .u64, .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, @@ -5383,8 +5381,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_sse, .to_sse } }, }, .extra_temps = .{ - .{ .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .sse } } }, - .{ .kind = .{ .rc = .sse } }, + .{ .type = .f16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .sse } } }, + .{ .type = .f16, .kind = .{ .rc = .sse } }, .unused, .unused, .unused, @@ -5395,12 +5393,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }}, .each = .{ .once = &.{ - .{ ._, .v_ps, .cvtph2, .dst0x, .src0x, ._, ._ }, - .{ ._, .v_ps, .cvtph2, .tmp0x, .src1x, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp0x, .src1q, ._, ._ }, .{ ._, .v_ss, .cmp, .tmp1x, .dst0x, .dst0x, .vp(.unord) }, .{ ._, .v_ss, .max, .dst0x, .tmp0x, .dst0x, ._ }, .{ ._, .v_ps, .blendv, .dst0x, .dst0x, .tmp0x, .tmp1x }, - .{ ._, .v_, .cvtps2ph, .dst0x, .dst0x, .rm(.{}), ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, } }, }, .{ .required_features = .{ .sse, null, null, null }, @@ -5423,11 +5421,248 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, + }, .{ + .required_features = .{ .f16c, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .qword, .is = .word } }, + .{ .scalar_float = .{ .of = .qword, .is = .word } }, + }, + .patterns = &.{ + .{ .src = .{ .mem, .mem } }, + .{ .src = .{ .to_sse, .mem } }, + .{ .src = .{ .mem, .to_sse } }, + .{ .src = .{ .to_sse, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .sse } } }, + .{ .type = .vector_4_f16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }}, + .each = .{ .once = &.{ + .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp0x, .src1q, ._, ._ }, + .{ ._, .v_ps, .cmp, .tmp1x, .dst0x, .dst0x, .vp(.unord) }, + .{ ._, .v_ps, .max, .dst0x, .tmp0x, .dst0x, ._ }, + .{ ._, .v_ps, .blendv, .dst0x, .dst0x, .tmp0x, .tmp1x }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .f16c, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .word } }, + .{ .scalar_float = .{ .of = .xword, .is = .word } }, + }, + .patterns = &.{ + .{ .src = .{ .mem, .mem } }, + .{ .src = .{ .to_sse, .mem } }, + .{ .src = .{ .mem, .to_sse } }, + .{ .src = .{ .to_sse, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .vector_8_f16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .sse } } }, + .{ .type = .vector_8_f16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }}, + .each = .{ .once = &.{ + .{ ._, .v_ps, .cvtph2, .dst0y, .src0x, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp0y, .src1x, ._, ._ }, + .{ ._, .v_ps, .cmp, .tmp1y, .dst0y, .dst0y, .vp(.unord) }, + .{ ._, .v_ps, .max, .dst0y, .tmp0y, .dst0y, ._ }, + .{ ._, .v_ps, .blendv, .dst0y, .dst0y, .tmp0y, .tmp1y }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0y, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .f16c, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .word } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .word } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_8_f16, .kind = .{ .rc = .sse } }, + .{ .type = .vector_8_f16, .kind = .{ .rc = .sse } }, + .{ .type = .vector_8_f16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .v_ps, .cvtph2, .tmp1y, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp2y, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, .v_ps, .cmp, .tmp3y, .tmp1y, .tmp1y, .vp(.unord) }, + .{ ._, .v_ps, .max, .tmp1y, .tmp2y, .tmp1y, ._ }, + .{ ._, .v_ps, .blendv, .tmp1y, .tmp1y, .tmp2y, .tmp3y }, + .{ ._, .v_, .cvtps2ph, .memia(.dst0x, .tmp0, .add_size), .tmp1y, .rm(.{}), ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, + .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f16, .kind = .{ .reg = .xmm0 } }, + .{ .type = .f16, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__fmaxh" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .vp_, .xor, .tmp2x, .tmp2x, .tmp2x, ._ }, + .{ ._, .vp_w, .insr, .tmp1x, .tmp2x, .memia(.src0w, .tmp0, .add_size), .ui(0) }, + .{ ._, .vp_w, .insr, .tmp2x, .tmp2x, .memia(.src1w, .tmp0, .add_size), .ui(0) }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, .vp_w, .extr, .memia(.dst0w, .tmp0, .add_size), .tmp1x, .ui(0), ._ }, + .{ ._, ._, .add, .tmp0q, .si(2), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, + .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f16, .kind = .{ .reg = .xmm0 } }, + .{ .type = .f16, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__fmaxh" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, + .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ }, + .{ ._, .p_w, .insr, .tmp1x, .memia(.src0w, .tmp0, .add_size), .ui(0), ._ }, + .{ ._, .p_w, .insr, .tmp2x, .memia(.src1w, .tmp0, .add_size), .ui(0), ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, .p_w, .extr, .memia(.dst0w, .tmp0, .add_size), .tmp1x, .ui(0), ._ }, + .{ ._, ._, .add, .tmp0q, .si(2), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, + .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f16, .kind = .{ .reg = .xmm0 } }, + .{ .type = .f16, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__fmaxh" } } }, + .{ .type = .f16, .kind = .{ .reg = .ax } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, + .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ }, + .{ ._, .p_w, .insr, .tmp1x, .memia(.src0w, .tmp0, .add_size), .ui(0), ._ }, + .{ ._, .p_w, .insr, .tmp2x, .memia(.src1w, .tmp0, .add_size), .ui(0), ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, .p_w, .extr, .tmp4d, .tmp1x, .ui(0), ._ }, + .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_size), .tmp4w, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(2), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, + .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f16, .kind = .{ .reg = .eax } }, + .{ .type = .f32, .kind = .mem }, + .{ .type = .f16, .kind = .{ .reg = .xmm0 } }, + .{ .type = .f16, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__fmaxh" } } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0w, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .mov, .mem(.tmp2d), .tmp1d, ._, ._ }, + .{ ._, ._ss, .mov, .tmp3x, .mem(.tmp2d), ._, ._ }, + .{ ._, ._, .movzx, .tmp1d, .memia(.src1w, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .mov, .mem(.tmp2d), .tmp1d, ._, ._ }, + .{ ._, ._ss, .mov, .tmp4x, .mem(.tmp2d), ._, ._ }, + .{ ._, ._, .call, .tmp5d, ._, ._, ._ }, + .{ ._, ._ss, .mov, .mem(.tmp2d), .tmp3x, ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .mem(.tmp2d), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_size), .tmp1w, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(2), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, }, .{ .required_features = .{ .avx, null, null, null }, .src_constraints = .{ @@ -5438,7 +5673,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_sse, .to_sse } }, }, .extra_temps = .{ - .{ .kind = .{ .rc = .sse } }, + .{ .type = .f32, .kind = .{ .rc = .sse } }, .unused, .unused, .unused, @@ -5499,6 +5734,210 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ps, .andn, .dst0x, .src1x, ._, ._ }, .{ ._, ._ps, .@"or", .dst0x, .tmp0x, ._, ._ }, } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .dword } }, + .{ .scalar_float = .{ .of = .xword, .is = .dword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_sse, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }}, + .each = .{ .once = &.{ + .{ ._, .v_ps, .cmp, .tmp0x, .src0x, .src0x, .vp(.unord) }, + .{ ._, .v_ps, .max, .dst0x, .src1x, .src0x, ._ }, + .{ ._, .v_ps, .blendv, .dst0x, .dst0x, .src1x, .tmp0x }, + } }, + }, .{ + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .dword } }, + .{ .scalar_float = .{ .of = .xword, .is = .dword } }, + }, + .patterns = &.{ + .{ .src = .{ .{ .to_reg = .xmm0 }, .mem } }, + .{ .src = .{ .mem, .{ .to_reg = .xmm0 } }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .xmm0 }, .to_sse } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, ._ps, .mova, .dst0x, .src1x, ._, ._ }, + .{ ._, ._ps, .max, .dst0x, .src0x, ._, ._ }, + .{ ._, ._ps, .cmp, .src0x, .src0x, .vp(.unord), ._ }, + .{ ._, ._ps, .blendv, .dst0x, .src1x, .src0x, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .dword } }, + .{ .scalar_float = .{ .of = .xword, .is = .dword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .mem } }, + .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_sse, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._ps, .mova, .tmp0x, .src1x, ._, ._ }, + .{ ._, ._ps, .max, .tmp0x, .src0x, ._, ._ }, + .{ ._, ._ps, .cmp, .dst0x, .src0x, .vp(.ord), ._ }, + .{ ._, ._ps, .@"and", .tmp0x, .dst0x, ._, ._ }, + .{ ._, ._ps, .andn, .dst0x, .src1x, ._, ._ }, + .{ ._, ._ps, .@"or", .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .yword, .is = .dword } }, + .{ .scalar_float = .{ .of = .yword, .is = .dword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_sse, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }}, + .each = .{ .once = &.{ + .{ ._, .v_ps, .cmp, .tmp0y, .src0y, .src0y, .vp(.unord) }, + .{ ._, .v_ps, .max, .dst0y, .src1y, .src0y, ._ }, + .{ ._, .v_ps, .blendv, .dst0y, .dst0y, .src1y, .tmp0y }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .yword, .is = .dword } }, + .{ .multiple_scalar_float = .{ .of = .yword, .is = .dword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .v_ps, .mova, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, + .{ ._, .v_ps, .mova, .tmp2y, .memia(.src1y, .tmp0, .add_size), ._, ._ }, + .{ ._, .v_ps, .cmp, .tmp3y, .tmp1y, .tmp1y, .vp(.unord) }, + .{ ._, .v_ps, .max, .tmp1y, .tmp2y, .tmp1y, ._ }, + .{ ._, .v_ps, .blendv, .tmp1y, .tmp1y, .tmp2y, .tmp3y }, + .{ ._, .v_ps, .mova, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .dword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .dword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_4_f32, .kind = .{ .reg = .xmm0 } }, + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._ps, .mova, .tmp3x, .tmp2x, ._, ._ }, + .{ ._, ._ps, .max, .tmp3x, .tmp1x, ._, ._ }, + .{ ._, ._ps, .cmp, .tmp1x, .tmp1x, .vp(.unord), ._ }, + .{ ._, ._ps, .blendv, .tmp3x, .tmp2x, .tmp1x, ._ }, + .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_size), .tmp3x, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .dword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .dword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._ps, .mova, .tmp3x, .tmp2x, ._, ._ }, + .{ ._, ._ps, .max, .tmp3x, .tmp1x, ._, ._ }, + .{ ._, ._ps, .cmp, .tmp1x, .tmp1x, .vp(.ord), ._ }, + .{ ._, ._ps, .@"and", .tmp3x, .tmp1x, ._, ._ }, + .{ ._, ._ps, .andn, .tmp1x, .tmp2x, ._, ._ }, + .{ ._, ._ps, .@"or", .tmp1x, .tmp3x, ._, ._ }, + .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, }, .{ .required_features = .{ .avx, null, null, null }, .src_constraints = .{ @@ -5509,7 +5948,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_sse, .to_sse } }, }, .extra_temps = .{ - .{ .kind = .{ .rc = .sse } }, + .{ .type = .f64, .kind = .{ .rc = .sse } }, .unused, .unused, .unused, @@ -5591,11 +6030,249 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .qword } }, + .{ .scalar_float = .{ .of = .xword, .is = .qword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_sse, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }}, + .each = .{ .once = &.{ + .{ ._, .v_pd, .cmp, .tmp0x, .src0x, .src0x, .vp(.unord) }, + .{ ._, .v_pd, .max, .dst0x, .src1x, .src0x, ._ }, + .{ ._, .v_pd, .blendv, .dst0x, .dst0x, .src1x, .tmp0x }, + } }, + }, .{ + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .qword } }, + .{ .scalar_float = .{ .of = .xword, .is = .qword } }, + }, + .patterns = &.{ + .{ .src = .{ .{ .to_reg = .xmm0 }, .mem } }, + .{ .src = .{ .mem, .{ .to_reg = .xmm0 } }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .xmm0 }, .to_sse } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, ._pd, .mova, .dst0x, .src1x, ._, ._ }, + .{ ._, ._pd, .max, .dst0x, .src0x, ._, ._ }, + .{ ._, ._pd, .cmp, .src0x, .src0x, .vp(.unord), ._ }, + .{ ._, ._pd, .blendv, .dst0x, .src1x, .src0x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .qword } }, + .{ .scalar_float = .{ .of = .xword, .is = .qword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .mem } }, + .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_sse, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._pd, .mova, .tmp0x, .src1x, ._, ._ }, + .{ ._, ._pd, .max, .tmp0x, .src0x, ._, ._ }, + .{ ._, ._pd, .cmp, .dst0x, .src0x, .vp(.ord), ._ }, + .{ ._, ._pd, .@"and", .tmp0x, .dst0x, ._, ._ }, + .{ ._, ._pd, .andn, .dst0x, .src1x, ._, ._ }, + .{ ._, ._pd, .@"or", .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .yword, .is = .qword } }, + .{ .scalar_float = .{ .of = .yword, .is = .qword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_sse, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }}, + .each = .{ .once = &.{ + .{ ._, .v_pd, .cmp, .tmp0y, .src0y, .src0y, .vp(.unord) }, + .{ ._, .v_pd, .max, .dst0y, .src1y, .src0y, ._ }, + .{ ._, .v_pd, .blendv, .dst0y, .dst0y, .src1y, .tmp0y }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .yword, .is = .qword } }, + .{ .multiple_scalar_float = .{ .of = .yword, .is = .qword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_4_f64, .kind = .{ .rc = .sse } }, + .{ .type = .vector_4_f64, .kind = .{ .rc = .sse } }, + .{ .type = .vector_4_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .v_pd, .mova, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, + .{ ._, .v_pd, .mova, .tmp2y, .memia(.src1y, .tmp0, .add_size), ._, ._ }, + .{ ._, .v_pd, .cmp, .tmp3y, .tmp1y, .tmp1y, .vp(.unord) }, + .{ ._, .v_pd, .max, .tmp1y, .tmp2y, .tmp1y, ._ }, + .{ ._, .v_pd, .blendv, .tmp1y, .tmp1y, .tmp2y, .tmp3y }, + .{ ._, .v_pd, .mova, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .qword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .qword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_2_f64, .kind = .{ .reg = .xmm0 } }, + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._pd, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._pd, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._pd, .mova, .tmp3x, .tmp2x, ._, ._ }, + .{ ._, ._pd, .max, .tmp3x, .tmp1x, ._, ._ }, + .{ ._, ._pd, .cmp, .tmp1x, .tmp1x, .vp(.unord), ._ }, + .{ ._, ._pd, .blendv, .tmp3x, .tmp2x, .tmp1x, ._ }, + .{ ._, ._pd, .mova, .memia(.dst0x, .tmp0, .add_size), .tmp3x, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .qword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .qword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._pd, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._pd, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._pd, .mova, .tmp3x, .tmp2x, ._, ._ }, + .{ ._, ._pd, .max, .tmp3x, .tmp1x, ._, ._ }, + .{ ._, ._pd, .cmp, .tmp1x, .tmp1x, .vp(.ord), ._ }, + .{ ._, ._pd, .@"and", .tmp3x, .tmp1x, ._, ._ }, + .{ ._, ._pd, .andn, .tmp1x, .tmp2x, ._, ._ }, + .{ ._, ._pd, .@"or", .tmp1x, .tmp3x, ._, ._ }, + .{ ._, ._pd, .mova, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } }, + .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f64, .kind = .{ .reg = .xmm0 } }, + .{ .type = .f64, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "fmax" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._ps, .xor, .tmp1x, .tmp1x, ._, ._ }, + .{ ._, ._ps, .xor, .tmp2x, .tmp2x, ._, ._ }, + .{ ._, ._ps, .movl, .tmp1x, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, ._ps, .movl, .tmp2x, .memia(.src1q, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._ps, .movl, .memia(.dst0q, .tmp0, .add_size), .tmp1q, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, }, .{ .required_features = .{ .x87, .cmov, null, null }, .src_constraints = .{ @@ -5750,6 +6427,172 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .f_, .ld, .src1t, ._, ._, ._ }, .{ .@"1:", .f_p, .st, .dst0t, ._, ._, ._ }, } }, + }, .{ + .required_features = .{ .x87, .cmov, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, + .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .f_, .ld, .memia(.src1t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_, .ld, .memia(.src0t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_, .ucomi, .tmp1t, .tmp1t, ._, ._ }, + .{ ._, .f_u, .cmov, .tmp1t, .tmp2t, ._, ._ }, + .{ ._, .f_, .xch, .tmp2t, ._, ._, ._ }, + .{ ._, .f_, .ucomi, .tmp1t, .tmp2t, ._, ._ }, + .{ ._, .f_, .xch, .tmp2t, ._, ._, ._ }, + .{ ._, .f_nb, .cmov, .tmp1t, .tmp2t, ._, ._ }, + .{ ._, .f_p, .st, .memia(.dst0t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_p, .st, .tmp2t, ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sahf, .x87, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, + .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .u8, .kind = .{ .reg = .ah } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .f_, .ld, .memia(.src1t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_, .ld, .memia(.src0t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_, .ucom, .tmp1t, ._, ._, ._ }, + .{ ._, .fn_sw, .st, .tmp3w, ._, ._, ._ }, + .{ ._, ._, .sahf, ._, ._, ._, ._ }, + .{ ._, ._p, .j, .@"1f", ._, ._, ._ }, + .{ ._, .f_, .xch, .tmp2t, ._, ._, ._ }, + .{ ._, .f_, .ucom, .tmp2t, ._, ._, ._ }, + .{ ._, .fn_sw, .st, .tmp3w, ._, ._, ._ }, + .{ ._, .f_, .xch, .tmp2t, ._, ._, ._ }, + .{ ._, ._, .sahf, ._, ._, ._, ._ }, + .{ ._, ._b, .j, .@"2f", ._, ._, ._ }, + .{ .@"1:", .f_p, .st, .tmp1t, ._, ._, ._ }, + .{ ._, .f_, .ld, .tmp2t, ._, ._, ._ }, + .{ .@"2:", .f_p, .st, .memia(.dst0t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_p, .st, .tmp2t, ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .x87, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, + .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .u8, .kind = .{ .reg = .ah } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .f_, .ld, .memia(.src1t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_, .ld, .memia(.src0t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_, .xam, ._, ._, ._, ._ }, + .{ ._, .fn_sw, .st, .tmp3w, ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp3b, .si(0b0_1_000_100), ._, ._ }, + .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, + .{ ._, .f_, .xch, .tmp2t, ._, ._, ._ }, + .{ ._, .f_, .ucom, .tmp2t, ._, ._, ._ }, + .{ ._, .fn_sw, .st, .tmp3w, ._, ._, ._ }, + .{ ._, .f_, .xch, .tmp2t, ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp3b, .si(0b0_0_000_001), ._, ._ }, + .{ ._, ._nz, .j, .@"2f", ._, ._, ._ }, + .{ .@"1:", .f_p, .st, .tmp1t, ._, ._, ._ }, + .{ ._, .f_, .ld, .tmp2t, ._, ._, ._ }, + .{ .@"2:", .f_p, .st, .memia(.dst0t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_p, .st, .tmp2t, ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .x87, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, + .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .u8, .kind = .{ .reg = .ah } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .f_, .ld, .memia(.src1t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_, .ld, .memia(.src0t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_, .ucom, .tmp1t, ._, ._, ._ }, + .{ ._, .fn_sw, .st, .tmp3w, ._, ._, ._ }, + .{ ._, ._, .sahf, ._, ._, ._, ._ }, + .{ ._, ._p, .j, .@"1f", ._, ._, ._ }, + .{ ._, .f_, .xch, .tmp2t, ._, ._, ._ }, + .{ ._, .f_, .ucom, .tmp2t, ._, ._, ._ }, + .{ ._, .fn_sw, .st, .tmp3w, ._, ._, ._ }, + .{ ._, .f_, .xch, .tmp2t, ._, ._, ._ }, + .{ ._, ._, .sahf, ._, ._, ._, ._ }, + .{ ._, ._b, .j, .@"2f", ._, ._, ._ }, + .{ .@"1:", .f_p, .st, .tmp1t, ._, ._, ._ }, + .{ ._, .f_, .ld, .tmp2t, ._, ._, ._ }, + .{ .@"2:", .f_p, .st, .memia(.dst0t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_p, .st, .tmp2t, ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, }, .{ .required_features = .{ .sse, null, null, null }, .src_constraints = .{ @@ -5771,11 +6614,107 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f128, .kind = .{ .reg = .xmm0 } }, + .{ .type = .f128, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "fmaxq" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f128, .kind = .{ .reg = .xmm0 } }, + .{ .type = .f128, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "fmaxq" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f128, .kind = .{ .reg = .xmm0 } }, + .{ .type = .f128, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "fmaxq" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, } }) catch |err| switch (err) { error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{ @tagName(air_tag), @@ -5787,10 +6726,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }; try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg); }, - .min => |air_tag| if (use_old) try cg.airBinOp(inst, air_tag) else fallback: { + .min => |air_tag| if (use_old) try cg.airBinOp(inst, air_tag) else { const bin_op = air_datas[@intFromEnum(inst)].bin_op; - const ty = cg.typeOf(bin_op.lhs); - if (ty.isVector(zcu) and cg.floatBits(ty.childType(zcu)) != null) break :fallback try cg.airBinOp(inst, air_tag); var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); var res: [1]Temp = undefined; cg.select(&res, &.{cg.typeOf(bin_op.lhs)}, &ops, comptime &.{ .{ @@ -7494,7 +8431,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .xword } } }, + .{ .type = .vector_4_u32, .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .xword } } }, .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, .unused, .unused, @@ -7631,7 +8568,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .xword } } }, + .{ .type = .vector_4_u32, .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .xword } } }, .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, .{ .type = .vector_4_u32, .kind = .{ .rc = .sse } }, @@ -7955,7 +8892,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, + .{ .type = .u64, .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, .unused, .unused, @@ -7986,7 +8923,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, + .{ .type = .u64, .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, .{ .type = .vector_2_u64, .kind = .{ .reg = .xmm0 } }, .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, .unused, @@ -8018,7 +8955,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, + .{ .type = .u64, .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } }, .unused, .unused, @@ -8047,7 +8984,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, + .{ .type = .u64, .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } }, .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } }, .{ .type = .vector_4_u64, .kind = .{ .rc = .sse } }, @@ -8083,7 +9020,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, + .{ .type = .u64, .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, @@ -8119,7 +9056,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, + .{ .type = .u64, .kind = .{ .smin_mem = .{ .ref = .src0, .vectorize_to = .none } } }, .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, .{ .type = .vector_2_u64, .kind = .{ .rc = .sse } }, @@ -8371,8 +9308,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_sse, .to_sse } }, }, .extra_temps = .{ - .{ .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .sse } } }, - .{ .kind = .{ .rc = .sse } }, + .{ .type = .f16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .sse } } }, + .{ .type = .f16, .kind = .{ .rc = .sse } }, .unused, .unused, .unused, @@ -8383,12 +9320,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }}, .each = .{ .once = &.{ - .{ ._, .v_ps, .cvtph2, .dst0x, .src0x, ._, ._ }, - .{ ._, .v_ps, .cvtph2, .tmp0x, .src1x, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp0x, .src1q, ._, ._ }, .{ ._, .v_ss, .cmp, .tmp1x, .dst0x, .dst0x, .vp(.unord) }, .{ ._, .v_ss, .min, .dst0x, .tmp0x, .dst0x, ._ }, .{ ._, .v_ps, .blendv, .dst0x, .dst0x, .tmp0x, .tmp1x }, - .{ ._, .v_, .cvtps2ph, .dst0x, .dst0x, .rm(.{}), ._ }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, } }, }, .{ .required_features = .{ .sse, null, null, null }, @@ -8411,11 +9348,248 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, + }, .{ + .required_features = .{ .f16c, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .qword, .is = .word } }, + .{ .scalar_float = .{ .of = .qword, .is = .word } }, + }, + .patterns = &.{ + .{ .src = .{ .mem, .mem } }, + .{ .src = .{ .to_sse, .mem } }, + .{ .src = .{ .mem, .to_sse } }, + .{ .src = .{ .to_sse, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .sse } } }, + .{ .type = .vector_4_f16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }}, + .each = .{ .once = &.{ + .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp0x, .src1q, ._, ._ }, + .{ ._, .v_ps, .cmp, .tmp1x, .dst0x, .dst0x, .vp(.unord) }, + .{ ._, .v_ps, .min, .dst0x, .tmp0x, .dst0x, ._ }, + .{ ._, .v_ps, .blendv, .dst0x, .dst0x, .tmp0x, .tmp1x }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .f16c, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .word } }, + .{ .scalar_float = .{ .of = .xword, .is = .word } }, + }, + .patterns = &.{ + .{ .src = .{ .mem, .mem } }, + .{ .src = .{ .to_sse, .mem } }, + .{ .src = .{ .mem, .to_sse } }, + .{ .src = .{ .to_sse, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .vector_8_f16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .sse } } }, + .{ .type = .vector_8_f16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }}, + .each = .{ .once = &.{ + .{ ._, .v_ps, .cvtph2, .dst0y, .src0x, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp0y, .src1x, ._, ._ }, + .{ ._, .v_ps, .cmp, .tmp1y, .dst0y, .dst0y, .vp(.unord) }, + .{ ._, .v_ps, .min, .dst0y, .tmp0y, .dst0y, ._ }, + .{ ._, .v_ps, .blendv, .dst0y, .dst0y, .tmp0y, .tmp1y }, + .{ ._, .v_, .cvtps2ph, .dst0q, .dst0y, .rm(.{}), ._ }, + } }, + }, .{ + .required_features = .{ .f16c, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .word } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .word } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_8_f16, .kind = .{ .rc = .sse } }, + .{ .type = .vector_8_f16, .kind = .{ .rc = .sse } }, + .{ .type = .vector_8_f16, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .v_ps, .cvtph2, .tmp1y, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp2y, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, .v_ps, .cmp, .tmp3y, .tmp1y, .tmp1y, .vp(.unord) }, + .{ ._, .v_ps, .min, .tmp1y, .tmp2y, .tmp1y, ._ }, + .{ ._, .v_ps, .blendv, .tmp1y, .tmp1y, .tmp2y, .tmp3y }, + .{ ._, .v_, .cvtps2ph, .memia(.dst0x, .tmp0, .add_size), .tmp1y, .rm(.{}), ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, + .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f16, .kind = .{ .reg = .xmm0 } }, + .{ .type = .f16, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__fminh" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .vp_, .xor, .tmp2x, .tmp2x, .tmp2x, ._ }, + .{ ._, .vp_w, .insr, .tmp1x, .tmp2x, .memia(.src0w, .tmp0, .add_size), .ui(0) }, + .{ ._, .vp_w, .insr, .tmp2x, .tmp2x, .memia(.src1w, .tmp0, .add_size), .ui(0) }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, .vp_w, .extr, .memia(.dst0w, .tmp0, .add_size), .tmp1x, .ui(0), ._ }, + .{ ._, ._, .add, .tmp0q, .si(2), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, + .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f16, .kind = .{ .reg = .xmm0 } }, + .{ .type = .f16, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__fminh" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, + .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ }, + .{ ._, .p_w, .insr, .tmp1x, .memia(.src0w, .tmp0, .add_size), .ui(0), ._ }, + .{ ._, .p_w, .insr, .tmp2x, .memia(.src1w, .tmp0, .add_size), .ui(0), ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, .p_w, .extr, .memia(.dst0w, .tmp0, .add_size), .tmp1x, .ui(0), ._ }, + .{ ._, ._, .add, .tmp0q, .si(2), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, + .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f16, .kind = .{ .reg = .xmm0 } }, + .{ .type = .f16, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__fminh" } } }, + .{ .type = .f16, .kind = .{ .reg = .ax } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, + .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ }, + .{ ._, .p_w, .insr, .tmp1x, .memia(.src0w, .tmp0, .add_size), .ui(0), ._ }, + .{ ._, .p_w, .insr, .tmp2x, .memia(.src1w, .tmp0, .add_size), .ui(0), ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, .p_w, .extr, .tmp4d, .tmp1x, .ui(0), ._ }, + .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_size), .tmp4w, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(2), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, + .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f16, .kind = .{ .reg = .eax } }, + .{ .type = .f32, .kind = .mem }, + .{ .type = .f16, .kind = .{ .reg = .xmm0 } }, + .{ .type = .f16, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__fminh" } } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0w, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .mov, .mem(.tmp2d), .tmp1d, ._, ._ }, + .{ ._, ._ss, .mov, .tmp3x, .mem(.tmp2d), ._, ._ }, + .{ ._, ._, .movzx, .tmp1d, .memia(.src1w, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .mov, .mem(.tmp2d), .tmp1d, ._, ._ }, + .{ ._, ._ss, .mov, .tmp4x, .mem(.tmp2d), ._, ._ }, + .{ ._, ._, .call, .tmp5d, ._, ._, ._ }, + .{ ._, ._ss, .mov, .mem(.tmp2d), .tmp3x, ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .mem(.tmp2d), ._, ._ }, + .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_size), .tmp1w, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(2), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, }, .{ .required_features = .{ .avx, null, null, null }, .src_constraints = .{ @@ -8426,7 +9600,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_sse, .to_sse } }, }, .extra_temps = .{ - .{ .kind = .{ .rc = .sse } }, + .{ .type = .f32, .kind = .{ .rc = .sse } }, .unused, .unused, .unused, @@ -8487,6 +9661,210 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ps, .andn, .dst0x, .src1x, ._, ._ }, .{ ._, ._ps, .@"or", .dst0x, .tmp0x, ._, ._ }, } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .dword } }, + .{ .scalar_float = .{ .of = .xword, .is = .dword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_sse, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }}, + .each = .{ .once = &.{ + .{ ._, .v_ps, .cmp, .tmp0x, .src0x, .src0x, .vp(.unord) }, + .{ ._, .v_ps, .min, .dst0x, .src1x, .src0x, ._ }, + .{ ._, .v_ps, .blendv, .dst0x, .dst0x, .src1x, .tmp0x }, + } }, + }, .{ + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .dword } }, + .{ .scalar_float = .{ .of = .xword, .is = .dword } }, + }, + .patterns = &.{ + .{ .src = .{ .{ .to_reg = .xmm0 }, .mem } }, + .{ .src = .{ .mem, .{ .to_reg = .xmm0 } }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .xmm0 }, .to_sse } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, ._ps, .mova, .dst0x, .src1x, ._, ._ }, + .{ ._, ._ps, .min, .dst0x, .src0x, ._, ._ }, + .{ ._, ._ps, .cmp, .src0x, .src0x, .vp(.unord), ._ }, + .{ ._, ._ps, .blendv, .dst0x, .src1x, .src0x, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .dword } }, + .{ .scalar_float = .{ .of = .xword, .is = .dword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .mem } }, + .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_sse, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._ps, .mova, .tmp0x, .src1x, ._, ._ }, + .{ ._, ._ps, .min, .tmp0x, .src0x, ._, ._ }, + .{ ._, ._ps, .cmp, .dst0x, .src0x, .vp(.ord), ._ }, + .{ ._, ._ps, .@"and", .tmp0x, .dst0x, ._, ._ }, + .{ ._, ._ps, .andn, .dst0x, .src1x, ._, ._ }, + .{ ._, ._ps, .@"or", .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .yword, .is = .dword } }, + .{ .scalar_float = .{ .of = .yword, .is = .dword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_sse, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }}, + .each = .{ .once = &.{ + .{ ._, .v_ps, .cmp, .tmp0y, .src0y, .src0y, .vp(.unord) }, + .{ ._, .v_ps, .min, .dst0y, .src1y, .src0y, ._ }, + .{ ._, .v_ps, .blendv, .dst0y, .dst0y, .src1y, .tmp0y }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .yword, .is = .dword } }, + .{ .multiple_scalar_float = .{ .of = .yword, .is = .dword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .v_ps, .mova, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, + .{ ._, .v_ps, .mova, .tmp2y, .memia(.src1y, .tmp0, .add_size), ._, ._ }, + .{ ._, .v_ps, .cmp, .tmp3y, .tmp1y, .tmp1y, .vp(.unord) }, + .{ ._, .v_ps, .min, .tmp1y, .tmp2y, .tmp1y, ._ }, + .{ ._, .v_ps, .blendv, .tmp1y, .tmp1y, .tmp2y, .tmp3y }, + .{ ._, .v_ps, .mova, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .dword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .dword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_4_f32, .kind = .{ .reg = .xmm0 } }, + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._ps, .mova, .tmp3x, .tmp2x, ._, ._ }, + .{ ._, ._ps, .min, .tmp3x, .tmp1x, ._, ._ }, + .{ ._, ._ps, .cmp, .tmp1x, .tmp1x, .vp(.unord), ._ }, + .{ ._, ._ps, .blendv, .tmp3x, .tmp2x, .tmp1x, ._ }, + .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_size), .tmp3x, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .dword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .dword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._ps, .mova, .tmp3x, .tmp2x, ._, ._ }, + .{ ._, ._ps, .min, .tmp3x, .tmp1x, ._, ._ }, + .{ ._, ._ps, .cmp, .tmp1x, .tmp1x, .vp(.ord), ._ }, + .{ ._, ._ps, .@"and", .tmp3x, .tmp1x, ._, ._ }, + .{ ._, ._ps, .andn, .tmp1x, .tmp2x, ._, ._ }, + .{ ._, ._ps, .@"or", .tmp1x, .tmp3x, ._, ._ }, + .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, }, .{ .required_features = .{ .avx, null, null, null }, .src_constraints = .{ @@ -8497,7 +9875,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .src = .{ .to_sse, .to_sse } }, }, .extra_temps = .{ - .{ .kind = .{ .rc = .sse } }, + .{ .type = .f64, .kind = .{ .rc = .sse } }, .unused, .unused, .unused, @@ -8579,11 +9957,249 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .qword } }, + .{ .scalar_float = .{ .of = .xword, .is = .qword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_sse, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }}, + .each = .{ .once = &.{ + .{ ._, .v_pd, .cmp, .tmp0x, .src0x, .src0x, .vp(.unord) }, + .{ ._, .v_pd, .min, .dst0x, .src1x, .src0x, ._ }, + .{ ._, .v_pd, .blendv, .dst0x, .dst0x, .src1x, .tmp0x }, + } }, + }, .{ + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .qword } }, + .{ .scalar_float = .{ .of = .xword, .is = .qword } }, + }, + .patterns = &.{ + .{ .src = .{ .{ .to_reg = .xmm0 }, .mem } }, + .{ .src = .{ .mem, .{ .to_reg = .xmm0 } }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .xmm0 }, .to_sse } }, + }, + .dst_temps = .{.{ .rc = .sse }}, + .each = .{ .once = &.{ + .{ ._, ._pd, .mova, .dst0x, .src1x, ._, ._ }, + .{ ._, ._pd, .min, .dst0x, .src0x, ._, ._ }, + .{ ._, ._pd, .cmp, .src0x, .src0x, .vp(.unord), ._ }, + .{ ._, ._pd, .blendv, .dst0x, .src1x, .src0x, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .qword } }, + .{ .scalar_float = .{ .of = .xword, .is = .qword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mut_sse, .mem } }, + .{ .src = .{ .mem, .to_mut_sse }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_sse, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .ref = .src0 }}, + .each = .{ .once = &.{ + .{ ._, ._pd, .mova, .tmp0x, .src1x, ._, ._ }, + .{ ._, ._pd, .min, .tmp0x, .src0x, ._, ._ }, + .{ ._, ._pd, .cmp, .dst0x, .src0x, .vp(.ord), ._ }, + .{ ._, ._pd, .@"and", .tmp0x, .dst0x, ._, ._ }, + .{ ._, ._pd, .andn, .dst0x, .src1x, ._, ._ }, + .{ ._, ._pd, .@"or", .dst0x, .tmp0x, ._, ._ }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .yword, .is = .qword } }, + .{ .scalar_float = .{ .of = .yword, .is = .qword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_sse, .to_sse } }, + }, + .extra_temps = .{ + .{ .type = .vector_4_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.{ .mut_rc = .{ .ref = .src0, .rc = .sse } }}, + .each = .{ .once = &.{ + .{ ._, .v_pd, .cmp, .tmp0y, .src0y, .src0y, .vp(.unord) }, + .{ ._, .v_pd, .min, .dst0y, .src1y, .src0y, ._ }, + .{ ._, .v_pd, .blendv, .dst0y, .dst0y, .src1y, .tmp0y }, + } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .yword, .is = .qword } }, + .{ .multiple_scalar_float = .{ .of = .yword, .is = .qword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_4_f64, .kind = .{ .rc = .sse } }, + .{ .type = .vector_4_f64, .kind = .{ .rc = .sse } }, + .{ .type = .vector_4_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .v_pd, .mova, .tmp1y, .memia(.src0y, .tmp0, .add_size), ._, ._ }, + .{ ._, .v_pd, .mova, .tmp2y, .memia(.src1y, .tmp0, .add_size), ._, ._ }, + .{ ._, .v_pd, .cmp, .tmp3y, .tmp1y, .tmp1y, .vp(.unord) }, + .{ ._, .v_pd, .min, .tmp1y, .tmp2y, .tmp1y, ._ }, + .{ ._, .v_pd, .blendv, .tmp1y, .tmp1y, .tmp2y, .tmp3y }, + .{ ._, .v_pd, .mova, .memia(.dst0y, .tmp0, .add_size), .tmp1y, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(32), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .qword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .qword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_2_f64, .kind = .{ .reg = .xmm0 } }, + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._pd, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._pd, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._pd, .mova, .tmp3x, .tmp2x, ._, ._ }, + .{ ._, ._pd, .min, .tmp3x, .tmp1x, ._, ._ }, + .{ ._, ._pd, .cmp, .tmp1x, .tmp1x, .vp(.unord), ._ }, + .{ ._, ._pd, .blendv, .tmp3x, .tmp2x, .tmp1x, ._ }, + .{ ._, ._pd, .mova, .memia(.dst0x, .tmp0, .add_size), .tmp3x, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .qword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .qword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._pd, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._pd, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._pd, .mova, .tmp3x, .tmp2x, ._, ._ }, + .{ ._, ._pd, .min, .tmp3x, .tmp1x, ._, ._ }, + .{ ._, ._pd, .cmp, .tmp1x, .tmp1x, .vp(.ord), ._ }, + .{ ._, ._pd, .@"and", .tmp3x, .tmp1x, ._, ._ }, + .{ ._, ._pd, .andn, .tmp1x, .tmp2x, ._, ._ }, + .{ ._, ._pd, .@"or", .tmp1x, .tmp3x, ._, ._ }, + .{ ._, ._pd, .mova, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } }, + .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f64, .kind = .{ .reg = .xmm0 } }, + .{ .type = .f64, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "fmin" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._ps, .xor, .tmp1x, .tmp1x, ._, ._ }, + .{ ._, ._ps, .xor, .tmp2x, .tmp2x, ._, ._ }, + .{ ._, ._ps, .movl, .tmp1x, .memia(.src0q, .tmp0, .add_size), ._, ._ }, + .{ ._, ._ps, .movl, .tmp2x, .memia(.src1q, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._ps, .movl, .memia(.dst0q, .tmp0, .add_size), .tmp1q, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(8), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, }, .{ .required_features = .{ .x87, .cmov, null, null }, .src_constraints = .{ @@ -8730,6 +10346,164 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .f_, .ld, .src1t, ._, ._, ._ }, .{ .@"1:", .f_p, .st, .dst0t, ._, ._, ._ }, } }, + }, .{ + .required_features = .{ .x87, .cmov, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, + .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .f_, .ld, .memia(.src1t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_, .ld, .memia(.src0t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_, .ucomi, .tmp1t, .tmp1t, ._, ._ }, + .{ ._, .f_u, .cmov, .tmp1t, .tmp2t, ._, ._ }, + .{ ._, .f_, .ucomi, .tmp1t, .tmp2t, ._, ._ }, + .{ ._, .f_nb, .cmov, .tmp1t, .tmp2t, ._, ._ }, + .{ ._, .f_p, .st, .memia(.dst0t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_p, .st, .tmp2t, ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sahf, .x87, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, + .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .u8, .kind = .{ .reg = .ah } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .f_, .ld, .memia(.src1t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_, .ld, .memia(.src0t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_, .ucom, .tmp1t, ._, ._, ._ }, + .{ ._, .fn_sw, .st, .tmp3w, ._, ._, ._ }, + .{ ._, ._, .sahf, ._, ._, ._, ._ }, + .{ ._, ._p, .j, .@"1f", ._, ._, ._ }, + .{ ._, .f_, .ucom, .tmp2t, ._, ._, ._ }, + .{ ._, .fn_sw, .st, .tmp3w, ._, ._, ._ }, + .{ ._, ._, .sahf, ._, ._, ._, ._ }, + .{ ._, ._b, .j, .@"2f", ._, ._, ._ }, + .{ .@"1:", .f_p, .st, .tmp1t, ._, ._, ._ }, + .{ ._, .f_, .ld, .tmp2t, ._, ._, ._ }, + .{ .@"2:", .f_p, .st, .memia(.dst0t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_p, .st, .tmp2t, ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .x87, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, + .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .u8, .kind = .{ .reg = .ah } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .f_, .ld, .memia(.src1t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_, .ld, .memia(.src0t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_, .xam, ._, ._, ._, ._ }, + .{ ._, .fn_sw, .st, .tmp3w, ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp3b, .si(0b0_1_000_100), ._, ._ }, + .{ ._, ._z, .j, .@"1f", ._, ._, ._ }, + .{ ._, .f_, .ucom, .tmp2t, ._, ._, ._ }, + .{ ._, .fn_sw, .st, .tmp3w, ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp3b, .si(0b0_0_000_001), ._, ._ }, + .{ ._, ._nz, .j, .@"2f", ._, ._, ._ }, + .{ .@"1:", .f_p, .st, .tmp1t, ._, ._, ._ }, + .{ ._, .f_, .ld, .tmp2t, ._, ._, ._ }, + .{ .@"2:", .f_p, .st, .memia(.dst0t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_p, .st, .tmp2t, ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .x87, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, + .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .u8, .kind = .{ .reg = .ah } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .f_, .ld, .memia(.src1t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_, .ld, .memia(.src0t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_, .ucom, .tmp1t, ._, ._, ._ }, + .{ ._, .fn_sw, .st, .tmp3w, ._, ._, ._ }, + .{ ._, ._, .sahf, ._, ._, ._, ._ }, + .{ ._, ._p, .j, .@"1f", ._, ._, ._ }, + .{ ._, .f_, .ucom, .tmp2t, ._, ._, ._ }, + .{ ._, .fn_sw, .st, .tmp3w, ._, ._, ._ }, + .{ ._, ._, .sahf, ._, ._, ._, ._ }, + .{ ._, ._b, .j, .@"2f", ._, ._, ._ }, + .{ .@"1:", .f_p, .st, .tmp1t, ._, ._, ._ }, + .{ ._, .f_, .ld, .tmp2t, ._, ._, ._ }, + .{ .@"2:", .f_p, .st, .memia(.dst0t, .tmp0, .add_size), ._, ._, ._ }, + .{ ._, .f_p, .st, .tmp2t, ._, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, }, .{ .required_features = .{ .sse, null, null, null }, .src_constraints = .{ @@ -8751,11 +10525,107 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .dst_temps = .{.{ .ref = .src0 }}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, + }, .{ + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f128, .kind = .{ .reg = .xmm0 } }, + .{ .type = .f128, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "fminq" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f128, .kind = .{ .reg = .xmm0 } }, + .{ .type = .f128, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "fminq" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .f128, .kind = .{ .reg = .xmm0 } }, + .{ .type = .f128, .kind = .{ .reg = .xmm1 } }, + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "fminq" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{.mem}, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .sa(.src0, .sub_size), ._, ._ }, + .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, + .{ ._, ._, .add, .tmp0q, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, } }) catch |err| switch (err) { error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{ @tagName(air_tag), @@ -8955,7 +10825,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .rc = .sse } }, + .{ .type = .vector_32_u8, .kind = .{ .rc = .sse } }, .unused, .unused, .unused, @@ -8982,7 +10852,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .rc = .sse } }, + .{ .type = .vector_32_u8, .kind = .{ .rc = .sse } }, .unused, .unused, .unused, @@ -9009,7 +10879,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .rc = .sse } }, + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, .unused, .unused, .unused, @@ -9036,7 +10906,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .rc = .sse } }, + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, .unused, .unused, .unused, @@ -9063,7 +10933,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .extra_temps = .{ .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .kind = .{ .rc = .sse } }, + .{ .type = .vector_16_u8, .kind = .{ .rc = .sse } }, .unused, .unused, .unused, @@ -15226,8 +17096,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .info = .{ .kind = .all, .scalar = .dword }, } }}, .each = .{ .once = &.{ - .{ ._, .v_ps, .cvtph2, .dst0x, .src0x, ._, ._ }, - .{ ._, .v_ps, .cvtph2, .tmp0x, .src1x, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp0x, .src1q, ._, ._ }, .{ ._, .v_ss, .cmp, .dst0x, .dst0x, .tmp0x, .vp(switch (cc) { else => unreachable, .e => .eq, @@ -15815,7 +17685,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .i32, .kind = .{ .reg = .eax } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, - .{ .type = .vector_8_f16, .kind = .mem }, + .{ .type = .f32, .kind = .mem }, .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, @@ -15825,10 +17695,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, .{ .@"0:", ._, .movzx, .tmp4d, .memsi(.src0w, .@"2", .tmp0), ._, ._ }, .{ ._, ._, .mov, .mem(.tmp7d), .tmp4d, ._, ._ }, - .{ ._, ._ps, .mova, .tmp1x, .mem(.tmp7x), ._, ._ }, + .{ ._, ._ss, .mov, .tmp1x, .mem(.tmp7d), ._, ._ }, .{ ._, ._, .movzx, .tmp4d, .memsi(.src1w, .@"2", .tmp0), ._, ._ }, .{ ._, ._, .mov, .mem(.tmp7d), .tmp4d, ._, ._ }, - .{ ._, ._ps, .mova, .tmp2x, .mem(.tmp7x), ._, ._ }, + .{ ._, ._ss, .mov, .tmp2x, .mem(.tmp7d), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, .{ ._, ._, .xor, .tmp6d, .tmp6d, ._, ._ }, .{ ._, ._, .@"test", .tmp4d, .tmp4d, ._, ._ }, @@ -15863,7 +17733,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .i32, .kind = .{ .reg = .eax } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, - .{ .type = .vector_8_f16, .kind = .mem }, + .{ .type = .f32, .kind = .mem }, .unused, }, .dst_temps = .{.{ .rc = .general_purpose }}, @@ -15873,10 +17743,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, .{ .@"0:", ._, .movzx, .tmp4d, .memsi(.src0w, .@"2", .tmp0), ._, ._ }, .{ ._, ._, .mov, .mem(.tmp7d), .tmp4d, ._, ._ }, - .{ ._, ._ps, .mova, .tmp1x, .mem(.tmp7x), ._, ._ }, + .{ ._, ._ss, .mov, .tmp1x, .mem(.tmp7d), ._, ._ }, .{ ._, ._, .movzx, .tmp4d, .memsi(.src1w, .@"2", .tmp0), ._, ._ }, .{ ._, ._, .mov, .mem(.tmp7d), .tmp4d, ._, ._ }, - .{ ._, ._ps, .mova, .tmp2x, .mem(.tmp7x), ._, ._ }, + .{ ._, ._ss, .mov, .tmp2x, .mem(.tmp7d), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, .{ ._, ._, .xor, .tmp6d, .tmp6d, ._, ._ }, .{ ._, ._, .@"test", .tmp4d, .tmp4d, ._, ._ }, @@ -16133,7 +18003,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .i32, .kind = .{ .reg = .eax } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, - .{ .type = .vector_8_f16, .kind = .mem }, + .{ .type = .f32, .kind = .mem }, }, .dst_temps = .{.mem}, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, @@ -16142,10 +18012,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", ._, .movzx, .tmp5d, .memsi(.src0w, .@"2", .tmp0), ._, ._ }, .{ ._, ._, .mov, .mem(.tmp8d), .tmp5d, ._, ._ }, - .{ ._, ._ps, .mova, .tmp2x, .mem(.tmp8x), ._, ._ }, + .{ ._, ._ss, .mov, .tmp2x, .mem(.tmp8d), ._, ._ }, .{ ._, ._, .movzx, .tmp5d, .memsi(.src1w, .@"2", .tmp0), ._, ._ }, .{ ._, ._, .mov, .mem(.tmp8d), .tmp5d, ._, ._ }, - .{ ._, ._ps, .mova, .tmp3x, .mem(.tmp8x), ._, ._ }, + .{ ._, ._ss, .mov, .tmp3x, .mem(.tmp8d), ._, ._ }, .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, .{ ._, ._, .xor, .tmp7d, .tmp7d, ._, ._ }, .{ ._, ._, .@"test", .tmp5d, .tmp5d, ._, ._ }, @@ -16191,7 +18061,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .i32, .kind = .{ .reg = .eax } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, - .{ .type = .vector_8_f16, .kind = .mem }, + .{ .type = .f32, .kind = .mem }, }, .dst_temps = .{.mem}, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, @@ -16200,10 +18070,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .xor, .tmp1d, .tmp1d, ._, ._ }, .{ .@"0:", ._, .movzx, .tmp5d, .memsi(.src0w, .@"2", .tmp0), ._, ._ }, .{ ._, ._, .mov, .mem(.tmp8d), .tmp5d, ._, ._ }, - .{ ._, ._ps, .mova, .tmp2x, .mem(.tmp8x), ._, ._ }, + .{ ._, ._ss, .mov, .tmp2x, .mem(.tmp8d), ._, ._ }, .{ ._, ._, .movzx, .tmp5d, .memsi(.src1w, .@"2", .tmp0), ._, ._ }, .{ ._, ._, .mov, .mem(.tmp8d), .tmp5d, ._, ._ }, - .{ ._, ._ps, .mova, .tmp3x, .mem(.tmp8x), ._, ._ }, + .{ ._, ._ss, .mov, .tmp3x, .mem(.tmp8d), ._, ._ }, .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, .{ ._, ._, .xor, .tmp7d, .tmp7d, ._, ._ }, .{ ._, ._, .@"test", .tmp5d, .tmp5d, ._, ._ }, @@ -16482,7 +18352,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .@"or", .tmp4b, .tmp5b, ._, ._ }, .{ ._, ._, .mov, .lea(.byte, .tmp1), .tmp4b, ._, ._ }, .{ ._, ._, .lea, .tmp1p, .lead(.none, .tmp1, 1), ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(64), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ @@ -16528,7 +18398,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .@"or", .tmp4b, .tmp5b, ._, ._ }, .{ ._, ._, .mov, .lea(.byte, .tmp1), .tmp4b, ._, ._ }, .{ ._, ._c, .in, .tmp1q, ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(64), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ @@ -19809,8 +21679,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }}, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, .v_ps, .cvtph2, .tmp0x, .src0x, ._, ._ }, - .{ ._, .v_ps, .cvtph2, .tmp1x, .src1x, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp0x, .src0q, ._, ._ }, + .{ ._, .v_ps, .cvtph2, .tmp1x, .src1q, ._, ._ }, .{ ._, .v_ss, .ucomi, .tmp0x, .tmp1x, ._, ._ }, } }, }, .{ @@ -21194,19 +23064,21 @@ fn allocRegOrMemAdvanced(self: *CodeGen, ty: Type, inst: ?Air.Inst.Index, reg_ok }; if (reg_ok) need_mem: { - if (std.math.isPowerOfTwo(abi_size) and abi_size <= @as(u32, switch (ty.zigTypeTag(zcu)) { + if (std.math.isPowerOfTwo(abi_size) and abi_size <= @as(u32, max_abi_size: switch (ty.zigTypeTag(zcu)) { .float => switch (ty.floatBits(self.target.*)) { 16, 32, 64, 128 => 16, 80 => break :need_mem, else => unreachable, }, - .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) { - .float => switch (ty.childType(zcu).floatBits(self.target.*)) { + .vector => { + const elem_ty = ty.childType(zcu); + break :max_abi_size if (elem_ty.toIntern() == .bool_type) + 8 + else if (self.floatBits(elem_ty)) |float_bits| switch (float_bits) { 16, 32, 64, 128 => self.vectorSize(.float), 80 => break :need_mem, else => unreachable, - }, - else => self.vectorSize(.int), + } else self.vectorSize(.int); }, else => 8, })) { @@ -21223,17 +23095,18 @@ fn allocRegOrMemAdvanced(self: *CodeGen, ty: Type, inst: ?Air.Inst.Index, reg_ok fn regClassForType(self: *CodeGen, ty: Type) Register.Class { const pt = self.pt; const zcu = pt.zcu; - return switch (ty.zigTypeTag(zcu)) { - .float => switch (ty.floatBits(self.target.*)) { - 80 => .x87, - else => .sse, - }, - .vector => switch (ty.childType(zcu).toIntern()) { - .bool_type => .general_purpose, - else => .sse, - }, - else => .general_purpose, + if (self.floatBits(ty)) |float_bits| return switch (float_bits) { + 80 => .x87, + else => .sse, }; + if (!ty.isVector(zcu)) return .general_purpose; + const elem_ty = ty.childType(zcu); + return if (elem_ty.toIntern() == .bool_type) + .general_purpose + else if (self.floatBits(elem_ty) == 80) + .x87 + else + .sse; } fn regSetForRegClass(rc: Register.Class) RegisterManager.RegisterBitSet { @@ -33370,10 +35243,21 @@ const MoveStrategy = union(enum) { else => dst_reg, .lea => if (dst_reg.bitSize() >= 32) dst_reg else dst_reg.to32(), }, src_mem), - .x87_load_store => { + .x87_load_store => if (dst_reg != .st0 and self.register_manager.isKnownRegFree(.st7)) { try self.asmMemory(.{ .f_, .ld }, src_mem); - assert(dst_reg != .st7); - try self.asmRegister(.{ .f_p, .st }, @enumFromInt(@intFromEnum(dst_reg) + 1)); + switch (dst_reg) { + .st1, .st2, .st3, .st4, .st5, .st6 => try self.asmRegister(.{ .f_p, .st }, @enumFromInt(@intFromEnum(dst_reg) + 1)), + .st7 => try self.asmOpOnly(.{ .f_cstp, .in }), + else => unreachable, + } + } else { + try self.asmRegister(.{ .f_p, .st }, dst_reg); + try self.asmMemory(.{ .f_, .ld }, src_mem); + switch (dst_reg) { + .st0 => {}, + .st1, .st2, .st3, .st4, .st5, .st6, .st7 => try self.asmRegister(.{ .f_, .xch }, dst_reg), + else => unreachable, + } }, .insert_extract => |ie| if (ie.insert[0] != .p_w or self.hasFeature(.sse2)) try self.asmRegisterMemoryImmediate(ie.insert, dst_reg, src_mem, .u(0)) @@ -33405,9 +35289,22 @@ const MoveStrategy = union(enum) { pub fn write(strat: MoveStrategy, self: *CodeGen, dst_mem: Memory, src_reg: Register) !void { switch (strat) { .move => |tag| try self.asmMemoryRegister(tag, dst_mem, src_reg), - .x87_load_store => { + .x87_load_store => if (self.register_manager.isKnownRegFree(.st7)) { try self.asmRegister(.{ .f_, .ld }, src_reg); try self.asmMemory(.{ .f_p, .st }, dst_mem); + } else { + switch (src_reg) { + .st0 => {}, + .st1, .st2, .st3, .st4, .st5, .st6, .st7 => try self.asmRegister(.{ .f_, .xch }, src_reg), + else => unreachable, + } + try self.asmMemory(.{ .f_p, .st }, dst_mem); + try self.asmMemory(.{ .f_, .ld }, dst_mem); + switch (src_reg) { + .st0 => {}, + .st1, .st2, .st3, .st4, .st5, .st6, .st7 => try self.asmRegister(.{ .f_, .xch }, src_reg), + else => unreachable, + } }, .insert_extract, .vex_insert_extract => |ie| if (ie.extract[0] != .p_w or self.hasFeature(.sse4_1)) try self.asmMemoryRegisterImmediate(ie.extract, dst_mem, src_reg, .u(0)) @@ -33964,10 +35861,25 @@ fn genSetReg( .general_purpose, .segment => unreachable, .x87 => switch (src_reg) { .st0 => try self.asmRegister(.{ .f_, .st }, dst_reg), - .st1, .st2, .st3, .st4, .st5, .st6 => { - try self.asmRegister(.{ .f_, .ld }, src_reg); - assert(dst_reg != .st7); - try self.asmRegister(.{ .f_p, .st }, @enumFromInt(@intFromEnum(dst_reg) + 1)); + .st1, .st2, .st3, .st4, .st5, .st6 => switch (dst_reg) { + .st0 => { + try self.asmRegister(.{ .f_p, .st }, .st0); + try self.asmRegister(.{ .f_, .ld }, @enumFromInt(@intFromEnum(src_reg) - 1)); + }, + .st2, .st3, .st4, .st5, .st6 => if (self.register_manager.isKnownRegFree(.st7)) { + try self.asmRegister(.{ .f_, .ld }, src_reg); + try self.asmRegister(.{ .f_p, .st }, @enumFromInt(@intFromEnum(dst_reg) + 1)); + } else { + try self.asmRegister(.{ .f_, .xch }, src_reg); + try self.asmRegister(.{ .f_, .xch }, dst_reg); + try self.asmRegister(.{ .f_, .xch }, src_reg); + }, + .st7 => { + if (!self.register_manager.isKnownRegFree(.st7)) try self.asmRegister(.{ .f_, .free }, dst_reg); + try self.asmRegister(.{ .f_, .ld }, src_reg); + try self.asmOpOnly(.{ .f_cstp, .in }); + }, + else => unreachable, }, else => unreachable, }, @@ -33993,10 +35905,14 @@ fn genSetReg( .base = .{ .frame = frame_index }, .mod = .{ .rm = .{ .size = .fromSize(abi_size) } }, }, registerAlias(src_reg, abi_size)); + switch (frame_size) { + else => {}, + 8 => try self.asmRegisterRegister(.{ ._ps, .xor }, dst_reg.to128(), dst_reg.to128()), + } try self.asmRegisterMemory(switch (frame_size) { 4 => .{ ._ss, .mov }, 8 => .{ ._ps, .movl }, - 16 => .{ ._ps, .mov }, + 16 => .{ ._ps, .mova }, else => unreachable, }, dst_reg.to128(), .{ .base = .{ .frame = frame_index }, @@ -34009,7 +35925,26 @@ fn genSetReg( .{ .register = try self.copyToTmpRegister(ty, src_mcv) }, opts, ), - .x87, .mmx, .ip, .cr, .dr => unreachable, + .x87 => { + const frame_index = try self.allocFrameIndex(.init(.{ + .size = 16, + .alignment = .@"16", + })); + try MoveStrategy.write(.x87_load_store, self, .{ + .base = .{ .frame = frame_index }, + .mod = .{ .rm = .{ .size = .tbyte } }, + }, src_reg); + try self.asmRegisterMemory(if (self.hasFeature(.avx)) + .{ .v_dqa, .mov } + else if (self.hasFeature(.sse2)) + .{ ._dqa, .mov } + else + .{ ._ps, .mova }, dst_reg.to128(), .{ + .base = .{ .frame = frame_index }, + .mod = .{ .rm = .{ .size = .xword } }, + }); + }, + .mmx, .ip, .cr, .dr => unreachable, .sse => try self.asmRegisterRegister( @as(?Mir.Inst.FixedTag, switch (ty.scalarType(zcu).zigTypeTag(zcu)) { else => switch (abi_size) { @@ -38510,7 +40445,7 @@ fn resolveCallingConventionValues( else if (ret_gpr.len >= 2 and ret_ty.isSliceAtRuntime(zcu)) break :return_value .init(.{ .register_pair = ret_gpr[0..2].* }), .segment, .mmx, .ip, .cr, .dr => unreachable, - .x87 => break :return_value .init(.{ .register = .st0 }), + .x87 => if (ret_size <= 16) break :return_value .init(.{ .register = .st0 }), .sse => if (ret_size <= self.vectorSize(.float)) break :return_value .init(.{ .register = registerAlias(abi.getCAbiSseReturnRegs(cc)[0], @max(ret_size, 16)), }), @@ -38545,7 +40480,7 @@ fn resolveCallingConventionValues( continue; }, .segment, .mmx, .ip, .cr, .dr => unreachable, - .x87 => if (param_x87.len >= 1) { + .x87 => if (param_x87.len >= 1 and param_size <= 16) { arg.* = .{ .register = param_x87[0] }; param_x87 = param_x87[1..]; continue; @@ -38656,10 +40591,12 @@ fn registerAlias(reg: Register, size_bytes: u32) Register { fn memSize(self: *CodeGen, ty: Type) Memory.Size { const zcu = self.pt.zcu; - return switch (ty.zigTypeTag(zcu)) { - .float => .fromBitSize(ty.floatBits(self.target.*)), - else => .fromSize(@intCast(ty.abiSize(zcu))), - }; + return if (self.floatBits(ty)) |float_bits| + .fromBitSize(float_bits) + else if (ty.isVector(zcu) and ty.vectorLen(zcu) == 1 and self.floatBits(ty.childType(zcu)) == 80) + .tbyte + else + .fromSize(@intCast(ty.abiSize(zcu))); } fn splitType(self: *CodeGen, comptime parts_len: usize, ty: Type) ![parts_len]Type { @@ -40762,7 +42699,7 @@ const Select = struct { Select.Operand, Select.Operand, }; - const Label = enum { @"0:", @"1:", @"_" }; + const Label = enum { @"0:", @"1:", @"2:", @"_" }; const Operand = struct { tag: Tag, base: Ref.Sized = .none, @@ -40992,6 +42929,8 @@ const Select = struct { const @"0f": Select.Operand = .{ .tag = .forward_label, .base = .{ .ref = .tmp0, .size = .none } }; const @"1b": Select.Operand = .{ .tag = .backward_label, .base = .{ .ref = .tmp1, .size = .none } }; const @"1f": Select.Operand = .{ .tag = .forward_label, .base = .{ .ref = .tmp1, .size = .none } }; + const @"2b": Select.Operand = .{ .tag = .backward_label, .base = .{ .ref = .tmp2, .size = .none } }; + const @"2f": Select.Operand = .{ .tag = .forward_label, .base = .{ .ref = .tmp2, .size = .none } }; const tmp0b: Select.Operand = .{ .tag = .ref, .base = .tmp0b }; const tmp0w: Select.Operand = .{ .tag = .ref, .base = .tmp0w }; diff --git a/test/behavior/x86_64/math.zig b/test/behavior/x86_64/math.zig index 26d0930b48..87d6179123 100644 --- a/test/behavior/x86_64/math.zig +++ b/test/behavior/x86_64/math.zig @@ -3781,6 +3781,396 @@ fn binary(comptime op: anytype, comptime opts: struct { strict: bool = false }) 0xf1e3bbe031d59351770a7a501b6e969b2c00d144f17648db3f944b69dfeb7be72e5ff933a061eba4eaa422f8ca09e5a97d0b0dd740fd4076eba8c72d7a278523f399202dc2d043c4e0eb58a2bcd4066e2146e321810b1ee4d3afdddb4f026bcc7905ce17e033a7727b4e08f33b53c63d8c9f763fc6c31d0523eb38c30d5e40bc, }); } + fn testFloatVectorTypes() !void { + @setEvalBranchQuota(21_700); + + try testArgs(@Vector(1, f16), .{ + -tmin(f16), + }, .{ + fmax(f16), + }); + try testArgs(@Vector(2, f16), .{ + 0.1, 1.0, + }, .{ + -nan(f16), -fmin(f16), + }); + try testArgs(@Vector(4, f16), .{ + 0.1, -fmax(f16), 0.0, 0.1, + }, .{ + -fmin(f16), -10.0, 1.0, -tmin(f16), + }); + try testArgs(@Vector(8, f16), .{ + -fmax(f16), -fmin(f16), -nan(f16), -0.0, tmin(f16), -0.0, 0.0, 0.1, + }, .{ + -1.0, tmin(f16), nan(f16), nan(f16), -fmax(f16), -10.0, -nan(f16), 10.0, + }); + try testArgs(@Vector(16, f16), .{ + 0.1, fmax(f16), -10.0, fmax(f16), -10.0, 0.1, -tmin(f16), -inf(f16), -tmin(f16), -1.0, -fmin(f16), tmin(f16), 10.0, -fmax(f16), 0.0, -fmin(f16), + }, .{ + inf(f16), -10.0, -fmax(f16), fmax(f16), -tmin(f16), 0.0, -1.0, -1.0, 0.1, -nan(f16), -tmin(f16), 1.0, 0.1, fmax(f16), -0.0, inf(f16), + }); + try testArgs(@Vector(32, f16), .{ + -inf(f16), tmin(f16), fmin(f16), -nan(f16), nan(f16), 0.1, 0.0, 10.0, -tmin(f16), inf(f16), 1.0, -10.0, fmin(f16), -0.0, 1.0, -fmax(f16), + 10.0, -0.0, -10.0, -tmin(f16), fmax(f16), nan(f16), -fmin(f16), -1.0, 0.0, -10.0, -nan(f16), 1.0, -tmin(f16), -0.0, nan(f16), 10.0, + }, .{ + 0.0, 10.0, -nan(f16), -0.0, tmin(f16), fmax(f16), nan(f16), tmin(f16), -10.0, 0.1, 10.0, fmin(f16), -fmax(f16), inf(f16), inf(f16), -tmin(f16), + inf(f16), -0.0, 0.1, 0.0, -fmin(f16), -0.0, -nan(f16), -inf(f16), -fmin(f16), fmax(f16), 1.0, fmin(f16), -0.0, -tmin(f16), -fmax(f16), -10.0, + }); + try testArgs(@Vector(64, f16), .{ + -nan(f16), fmin(f16), -inf(f16), inf(f16), -tmin(f16), inf(f16), 0.1, -1.0, -inf(f16), nan(f16), -fmin(f16), 0.1, -tmin(f16), -fmax(f16), -10.0, inf(f16), + 0.0, -fmin(f16), -fmax(f16), 10.0, -fmax(f16), fmax(f16), 10.0, fmin(f16), -inf(f16), -nan(f16), -tmin(f16), nan(f16), -0.0, 0.0, 0.1, -fmin(f16), + 0.0, nan(f16), inf(f16), fmax(f16), nan(f16), tmin(f16), 1.0, tmin(f16), fmin(f16), -10.0, 0.0, 0.1, inf(f16), -10.0, inf(f16), 1.0, + 0.1, -inf(f16), 10.0, -0.0, -1.0, -tmin(f16), -nan(f16), 0.1, 0.1, -nan(f16), -0.0, -10.0, -0.0, -nan(f16), 0.1, fmin(f16), + }, .{ + 10.0, 0.0, fmax(f16), -inf(f16), -fmax(f16), -fmax(f16), tmin(f16), -1.0, -tmin(f16), -10.0, nan(f16), -nan(f16), tmin(f16), -fmin(f16), nan(f16), -10.0, + 10.0, fmax(f16), 0.1, 0.0, 0.1, -fmax(f16), -0.0, -fmin(f16), inf(f16), -1.0, inf(f16), fmin(f16), -inf(f16), -tmin(f16), 10.0, 10.0, + 0.1, 0.1, 0.1, 10.0, -fmin(f16), inf(f16), 0.1, fmax(f16), inf(f16), -0.0, -10.0, tmin(f16), -fmin(f16), 0.0, 10.0, 0.0, + -tmin(f16), -inf(f16), 1.0, -fmax(f16), inf(f16), 10.0, fmax(f16), -1.0, 0.0, 0.1, -1.0, -inf(f16), 0.1, 0.0, -10.0, fmax(f16), + }); + try testArgs(@Vector(128, f16), .{ + -fmin(f16), 1.0, 0.0, 0.1, nan(f16), 0.1, 0.1, -inf(f16), -tmin(f16), 1.0, -fmin(f16), -fmax(f16), -1.0, -fmin(f16), 10.0, -nan(f16), + inf(f16), -inf(f16), tmin(f16), -10.0, -1.0, -0.0, -0.0, 1.0, nan(f16), -10.0, fmin(f16), -tmin(f16), tmin(f16), 0.1, -fmax(f16), fmax(f16), + tmin(f16), -fmin(f16), nan(f16), 10.0, 1.0, -fmin(f16), 0.1, 10.0, fmax(f16), fmax(f16), fmax(f16), -1.0, -nan(f16), 10.0, tmin(f16), -nan(f16), + -nan(f16), -inf(f16), -0.0, -inf(f16), nan(f16), -1.0, 0.1, -fmax(f16), -10.0, nan(f16), 1.0, -10.0, tmin(f16), 1.0, 0.1, 1.0, + 10.0, 0.1, tmin(f16), nan(f16), -inf(f16), -1.0, -1.0, -fmax(f16), -inf(f16), 0.1, 0.1, -0.0, 10.0, fmin(f16), -1.0, inf(f16), + 0.1, -10.0, inf(f16), -0.0, 0.1, 0.0, inf(f16), 1.0, tmin(f16), -tmin(f16), 0.1, inf(f16), tmin(f16), -inf(f16), 10.0, 1.0, + -inf(f16), 0.1, 1.0, fmax(f16), -fmin(f16), nan(f16), -nan(f16), fmin(f16), -1.0, -fmax(f16), inf(f16), -fmax(f16), 0.0, -10.0, fmin(f16), -fmax(f16), + -0.0, -1.0, 0.1, 10.0, inf(f16), fmax(f16), inf(f16), 10.0, fmax(f16), -0.0, -tmin(f16), fmin(f16), inf(f16), nan(f16), -fmin(f16), -1.0, + }, .{ + -fmax(f16), fmax(f16), inf(f16), 1.0, nan(f16), 0.1, -fmax(f16), 10.0, -fmin(f16), 0.1, fmin(f16), -0.0, 0.1, -0.0, -nan(f16), -nan(f16), + inf(f16), 1.0, -1.0, 0.1, 0.1, 0.1, 0.0, -tmin(f16), -1.0, -10.0, -tmin(f16), 1.0, -10.0, fmin(f16), -fmax(f16), -nan(f16), + -tmin(f16), -inf(f16), inf(f16), -fmin(f16), -nan(f16), 0.0, -inf(f16), -fmax(f16), 0.1, -inf(f16), tmin(f16), nan(f16), tmin(f16), fmin(f16), -0.0, 0.1, + fmin(f16), fmin(f16), 1.0, tmin(f16), 0.0, 10.0, 0.1, inf(f16), 10.0, -tmin(f16), tmin(f16), -1.0, -fmin(f16), 1.0, nan(f16), -fmax(f16), + nan(f16), -fmin(f16), 0.1, 10.0, -10.0, 1.0, -0.0, tmin(f16), nan(f16), inf(f16), -fmax(f16), tmin(f16), -tmin(f16), 10.0, fmin(f16), -tmin(f16), + -0.0, 1.0, tmin(f16), fmax(f16), 1.0, -inf(f16), -nan(f16), -0.0, 0.1, -inf(f16), 0.1, fmax(f16), -inf(f16), -nan(f16), -1.0, -inf(f16), + 0.1, fmin(f16), -10.0, -tmin(f16), 1.0, -nan(f16), -fmax(f16), -10.0, -tmin(f16), 10.0, nan(f16), fmin(f16), fmax(f16), tmin(f16), -inf(f16), 1.0, + -fmin(f16), tmin(f16), -1.0, 0.1, 0.0, nan(f16), 1.0, fmax(f16), -1.0, 10.0, nan(f16), 1.0, fmin(f16), 1.0, -10.0, -10.0, + }); + try testArgs(@Vector(69, f16), .{ + -nan(f16), -1.0, -fmin(f16), fmin(f16), inf(f16), 0.1, 0.0, fmax(f16), tmin(f16), 0.1, 0.0, -tmin(f16), 0.0, 0.0, 1.0, -inf(f16), + tmin(f16), -inf(f16), -tmin(f16), fmin(f16), -inf(f16), -nan(f16), tmin(f16), -tmin(f16), 0.1, -1.0, -tmin(f16), fmax(f16), nan(f16), -fmin(f16), fmin(f16), 10.0, + fmin(f16), -10.0, 0.0, fmin(f16), fmax(f16), -nan(f16), fmax(f16), -fmax(f16), nan(f16), -nan(f16), fmin(f16), -10.0, -fmin(f16), fmin(f16), -fmin(f16), -nan(f16), + 0.0, -1.0, fmax(f16), 0.1, inf(f16), 1.0, -1.0, -0.0, 10.0, 0.1, -fmax(f16), tmin(f16), -inf(f16), tmin(f16), -fmax(f16), 0.1, + -10.0, -0.0, -fmax(f16), nan(f16), fmax(f16), + }, .{ + inf(f16), -fmin(f16), 0.1, 0.1, -0.0, fmax(f16), 0.1, -0.0, 0.0, -0.0, 0.0, -tmin(f16), tmin(f16), -1.0, nan(f16), -fmin(f16), + fmin(f16), 0.1, 0.1, nan(f16), -fmax(f16), -inf(f16), -nan(f16), -nan(f16), 0.1, -fmax(f16), fmin(f16), 0.1, 0.1, 0.1, -0.0, 10.0, + tmin(f16), -nan(f16), fmin(f16), -1.0, 1.0, -tmin(f16), 0.0, nan(f16), fmax(f16), -10.0, fmin(f16), -fmin(f16), -1.0, 0.1, -fmin(f16), -fmin(f16), + -fmax(f16), 0.0, fmin(f16), -10.0, -1.0, -1.0, fmax(f16), -nan(f16), -inf(f16), -inf(f16), 0.0, tmin(f16), -0.0, nan(f16), -inf(f16), nan(f16), + inf(f16), fmin(f16), -nan(f16), -inf(f16), inf(f16), + }); + + try testArgs(@Vector(1, f32), .{ + fmin(f32), + }, .{ + -tmin(f32), + }); + try testArgs(@Vector(2, f32), .{ + nan(f32), -10.0, + }, .{ + -tmin(f32), fmin(f32), + }); + try testArgs(@Vector(4, f32), .{ + fmax(f32), -fmax(f32), -10.0, 0.0, + }, .{ + inf(f32), inf(f32), -10.0, inf(f32), + }); + try testArgs(@Vector(8, f32), .{ + -10.0, fmax(f32), inf(f32), -0.0, -tmin(f32), -tmin(f32), 10.0, 0.1, + }, .{ + 10.0, -1.0, -1.0, inf(f32), 1.0, -tmin(f32), nan(f32), 10.0, + }); + try testArgs(@Vector(16, f32), .{ + 0.1, 0.1, -nan(f32), -10.0, -nan(f32), 0.0, fmin(f32), fmin(f32), -10.0, 1.0, -fmax(f32), -0.0, inf(f32), -0.0, fmax(f32), -fmin(f32), + }, .{ + nan(f32), 0.0, tmin(f32), -1.0, -10.0, -tmin(f32), fmin(f32), -fmax(f32), 0.1, 0.1, -inf(f32), tmin(f32), -0.0, 10.0, -0.0, -inf(f32), + }); + try testArgs(@Vector(32, f32), .{ + 0.1, tmin(f32), -1.0, 1.0, tmin(f32), -10.0, fmax(f32), 0.0, tmin(f32), 0.1, -1.0, fmax(f32), -nan(f32), -0.0, fmin(f32), 0.0, + -fmax(f32), fmax(f32), -fmin(f32), -inf(f32), tmin(f32), -nan(f32), -1.0, tmin(f32), -fmin(f32), -inf(f32), nan(f32), -tmin(f32), inf(f32), -inf(f32), -nan(f32), 0.1, + }, .{ + -fmin(f32), -1.0, fmax(f32), inf(f32), -fmin(f32), fmax(f32), 0.0, -10.0, 0.0, 0.1, fmin(f32), -inf(f32), 1.0, -nan(f32), -nan(f32), + -inf(f32), -0.0, nan(f32), -fmax(f32), 10.0, -tmin(f32), fmax(f32), -10.0, 0.1, tmin(f32), 0.1, -fmax(f32), 0.0, 0.1, -nan(f32), + -fmin(f32), fmax(f32), + }); + try testArgs(@Vector(64, f32), .{ + fmin(f32), 0.0, -inf(f32), 0.1, -10.0, -fmin(f32), 10.0, nan(f32), 0.1, 1.0, -1.0, 10.0, 10.0, 0.1, -fmax(f32), -1.0, + -fmin(f32), 0.1, -inf(f32), -inf(f32), 0.1, 0.1, 0.0, -1.0, nan(f32), -0.0, -0.0, -fmin(f32), -inf(f32), inf(f32), tmin(f32), -nan(f32), + 0.1, 0.0, 1.0, tmin(f32), 10.0, fmin(f32), -fmin(f32), fmax(f32), nan(f32), 1.0, -nan(f32), -nan(f32), 1.0, nan(f32), 1.0, fmax(f32), + -0.0, 0.0, inf(f32), nan(f32), tmin(f32), 0.0, fmin(f32), -0.0, -fmin(f32), tmin(f32), -1.0, -10.0, 0.1, -tmin(f32), -inf(f32), -1.0, + }, .{ + nan(f32), -nan(f32), -tmin(f32), inf(f32), -inf(f32), 0.1, 0.1, 0.1, -1.0, -inf(f32), -0.0, fmax(f32), tmin(f32), -nan(f32), -fmax(f32), -1.0, + -fmin(f32), -0.0, fmax(f32), -fmax(f32), 1.0, -0.0, 0.0, 10.0, -1.0, -fmin(f32), 0.0, fmax(f32), 0.1, 1.0, 10.0, 0.1, + 0.1, fmin(f32), -nan(f32), -inf(f32), -0.0, -inf(f32), 0.1, -fmax(f32), -10.0, -10.0, nan(f32), 10.0, -1.0, -fmin(f32), 10.0, fmin(f32), + 1.0, -fmax(f32), nan(f32), inf(f32), fmax(f32), fmax(f32), -fmin(f32), -inf(f32), -tmin(f32), -nan(f32), nan(f32), nan(f32), 0.1, 0.1, -1.0, inf(f32), + }); + try testArgs(@Vector(128, f32), .{ + -10.0, -nan(f32), inf(f32), inf(f32), -tmin(f32), -0.0, 0.0, 0.1, -0.0, fmin(f32), nan(f32), -1.0, nan(f32), -fmax(f32), nan(f32), 0.0, + 1.0, -tmin(f32), 0.0, -nan(f32), 0.1, 0.1, -1.0, 10.0, -fmax(f32), -fmin(f32), 0.1, nan(f32), 0.1, -fmax(f32), -tmin(f32), -inf(f32), + inf(f32), tmin(f32), -tmin(f32), nan(f32), -inf(f32), -10.0, 1.0, -nan(f32), 0.1, nan(f32), -1.0, tmin(f32), -fmin(f32), -0.0, -0.0, 1.0, + fmin(f32), -fmin(f32), 0.1, 0.1, 0.1, -10.0, -10.0, -tmin(f32), 1.0, -0.0, 10.0, -fmax(f32), 10.0, -fmax(f32), inf(f32), -1.0, + -fmax(f32), fmin(f32), fmin(f32), fmin(f32), -1.0, -nan(f32), fmax(f32), -nan(f32), 0.1, -1.0, -fmax(f32), -tmin(f32), -0.0, fmax(f32), -10.0, inf(f32), + 10.0, -inf(f32), 0.1, fmin(f32), nan(f32), -fmax(f32), -tmin(f32), inf(f32), tmin(f32), -fmin(f32), fmax(f32), 1.0, fmin(f32), -0.0, 0.1, fmin(f32), + 0.1, inf(f32), -10.0, inf(f32), 10.0, tmin(f32), 0.0, 1.0, inf(f32), -10.0, -fmin(f32), tmin(f32), 1.0, 0.1, 0.1, -fmin(f32), + 10.0, 0.1, fmax(f32), fmin(f32), 1.0, -10.0, -inf(f32), -10.0, 0.0, -fmax(f32), -inf(f32), -1.0, fmax(f32), -tmin(f32), inf(f32), nan(f32), + }, .{ + -tmin(f32), -fmax(f32), -fmax(f32), 10.0, inf(f32), 0.1, 1.0, fmin(f32), 0.1, 10.0, fmin(f32), -fmax(f32), 1.0, fmax(f32), 0.1, -fmin(f32), + 0.0, -0.0, -0.0, -1.0, -nan(f32), nan(f32), -tmin(f32), 10.0, -tmin(f32), -10.0, inf(f32), 0.0, tmin(f32), 0.0, -fmax(f32), inf(f32), + fmin(f32), 0.1, -10.0, tmin(f32), tmin(f32), 0.1, fmin(f32), -tmin(f32), fmin(f32), nan(f32), 0.1, -fmax(f32), -1.0, -0.0, fmin(f32), -0.0, + -1.0, -0.0, -inf(f32), fmax(f32), -10.0, 1.0, inf(f32), -1.0, -tmin(f32), -tmin(f32), 0.1, -10.0, -fmin(f32), 10.0, -10.0, -inf(f32), + -1.0, inf(f32), 0.1, 1.0, -nan(f32), 0.1, -10.0, -nan(f32), -tmin(f32), 0.0, fmin(f32), -nan(f32), fmax(f32), -tmin(f32), 0.0, 0.0, + -fmax(f32), -inf(f32), -1.0, -0.0, 10.0, nan(f32), 0.1, tmin(f32), -10.0, 10.0, tmin(f32), -fmax(f32), 0.1, -10.0, -tmin(f32), fmax(f32), + -fmax(f32), 0.1, -nan(f32), -fmin(f32), inf(f32), inf(f32), tmin(f32), tmin(f32), -tmin(f32), tmin(f32), 0.0, -0.0, 1.0, 10.0, -10.0, inf(f32), + 0.0, -fmin(f32), fmax(f32), -10.0, fmax(f32), -0.0, 0.0, -fmin(f32), 10.0, -fmin(f32), -fmin(f32), -fmin(f32), 10.0, fmin(f32), -inf(f32), fmax(f32), + }); + try testArgs(@Vector(69, f32), .{ + nan(f32), 0.1, -tmin(f32), fmax(f32), nan(f32), -fmax(f32), 0.1, fmax(f32), 10.0, inf(f32), -fmin(f32), -fmax(f32), inf(f32), -nan(f32), 0.1, 1.0, + fmax(f32), 0.1, 10.0, 0.0, -10.0, fmax(f32), 10.0, 0.0, 1.0, 10.0, -fmax(f32), 0.0, -tmin(f32), -fmin(f32), 0.1, 1.0, + fmin(f32), tmin(f32), -fmin(f32), -tmin(f32), tmin(f32), -inf(f32), -fmax(f32), -0.0, -1.0, -0.0, -fmax(f32), fmax(f32), fmin(f32), -0.0, 0.0, -inf(f32), + -tmin(f32), inf(f32), -nan(f32), tmin(f32), -1.0, -tmin(f32), 10.0, -inf(f32), -fmin(f32), 0.1, -inf(f32), -1.0, nan(f32), -inf(f32), -tmin(f32), 10.0, + 10.0, -nan(f32), -nan(f32), tmin(f32), -nan(f32), + }, .{ + -nan(f32), 1.0, fmax(f32), 0.1, -0.0, 1.0, -inf(f32), -fmin(f32), -nan(f32), inf(f32), 1.0, -nan(f32), -nan(f32), -inf(f32), tmin(f32), -fmin(f32), + -nan(f32), 0.1, fmin(f32), -1.0, -fmax(f32), 0.1, -1.0, 0.1, 0.1, -tmin(f32), 0.1, 0.1, 10.0, fmin(f32), 0.0, nan(f32), + tmin(f32), 1.0, nan(f32), -fmin(f32), tmin(f32), nan(f32), 0.1, nan(f32), 1.0, -fmax(f32), tmin(f32), 1.0, 0.0, -1.0, nan(f32), fmin(f32), + -inf(f32), fmax(f32), -0.0, nan(f32), tmin(f32), tmin(f32), -inf(f32), -10.0, -nan(f32), -fmax(f32), -0.0, 0.1, -inf(f32), 1.0, nan(f32), 1.0, + -10.0, fmin(f32), inf(f32), fmin(f32), 0.0, + }); + + try testArgs(@Vector(1, f64), .{ + -0.0, + }, .{ + 1.0, + }); + try testArgs(@Vector(2, f64), .{ + -1.0, 0.0, + }, .{ + -inf(f64), -fmax(f64), + }); + try testArgs(@Vector(4, f64), .{ + -inf(f64), inf(f64), 10.0, 0.0, + }, .{ + -tmin(f64), 1.0, nan(f64), 0.0, + }); + try testArgs(@Vector(8, f64), .{ + 0.1, -tmin(f64), -fmax(f64), 1.0, inf(f64), -10.0, -tmin(f64), -10.0, + }, .{ + tmin(f64), fmin(f64), 0.1, 10.0, -0.0, -0.0, fmax(f64), -1.0, + }); + try testArgs(@Vector(16, f64), .{ + 0.1, -nan(f64), 1.0, tmin(f64), fmax(f64), -fmax(f64), -tmin(f64), -0.0, -fmin(f64), -1.0, -fmax(f64), -nan(f64), -fmax(f64), nan(f64), -0.0, 0.1, + }, .{ + -1.0, -tmin(f64), -fmin(f64), 0.1, 0.1, -0.0, -nan(f64), -inf(f64), -inf(f64), -0.0, nan(f64), tmin(f64), 1.0, 0.1, tmin(f64), fmin(f64), + }); + try testArgs(@Vector(32, f64), .{ + -fmax(f64), fmin(f64), 0.1, 0.1, 0.0, 1.0, -0.0, -tmin(f64), tmin(f64), inf(f64), -tmin(f64), -tmin(f64), -tmin(f64), -fmax(f64), fmin(f64), 1.0, + -fmin(f64), -nan(f64), 1.0, -inf(f64), -nan(f64), -1.0, 0.0, 0.0, nan(f64), -nan(f64), -fmin(f64), fmin(f64), 0.1, nan(f64), tmin(f64), -fmax(f64), + }, .{ + -tmin(f64), -fmax(f64), -inf(f64), -nan(f64), fmin(f64), -inf(f64), 0.1, -fmax(f64), -inf(f64), fmin(f64), inf(f64), -1.0, -tmin(f64), inf(f64), 0.1, nan(f64), + fmin(f64), 10.0, -tmin(f64), -nan(f64), -inf(f64), 1.0, nan(f64), -fmin(f64), -1.0, nan(f64), -1.0, 0.0, 1.0, nan(f64), -1.0, -fmin(f64), + }); + try testArgs(@Vector(64, f64), .{ + -10.0, fmax(f64), -nan(f64), tmin(f64), 0.1, -1.0, 1.0, -0.0, -fmin(f64), 0.1, -fmin(f64), -0.0, -0.0, tmin(f64), -10.0, 0.1, + -10.0, -fmax(f64), -10.0, -fmin(f64), 0.0, -10.0, nan(f64), 1.0, inf(f64), inf(f64), -inf(f64), tmin(f64), tmin(f64), 0.1, -0.0, 0.1, + -0.0, 0.1, -10.0, 10.0, fmax(f64), -fmin(f64), 1.0, fmax(f64), 1.0, -10.0, fmin(f64), fmax(f64), -1.0, -0.0, -0.0, fmax(f64), + -inf(f64), -inf(f64), -tmin(f64), -fmax(f64), -nan(f64), tmin(f64), -1.0, 0.0, -inf(f64), fmax(f64), nan(f64), -inf(f64), fmin(f64), -nan(f64), -nan(f64), -10.0, + }, .{ + nan(f64), -1.0, 0.0, -10.0, -fmax(f64), -fmin(f64), -nan(f64), -tmin(f64), 0.1, -1.0, -nan(f64), -fmax(f64), 0.0, 0.0, 10.0, inf(f64), + fmin(f64), 0.0, -10.0, 1.0, -tmin(f64), -inf(f64), -fmax(f64), 0.0, -fmin(f64), -1.0, -fmin(f64), tmin(f64), 1.0, -10.0, fmin(f64), 0.1, + inf(f64), -0.0, tmin(f64), -fmax(f64), -tmin(f64), -fmax(f64), fmin(f64), -fmax(f64), 0.1, 1.0, 1.0, 0.0, fmin(f64), nan(f64), -10.0, tmin(f64), + inf(f64), 0.1, 1.0, -nan(f64), 1.0, -fmin(f64), fmax(f64), inf(f64), fmin(f64), -inf(f64), -0.0, 0.0, -1.0, -0.0, 0.1, 0.1, + }); + try testArgs(@Vector(128, f64), .{ + nan(f64), -fmin(f64), fmax(f64), fmin(f64), -10.0, nan(f64), tmin(f64), fmax(f64), inf(f64), -nan(f64), tmin(f64), -nan(f64), -0.0, fmin(f64), fmax(f64), + -inf(f64), inf(f64), -1.0, 0.0, 0.1, fmin(f64), 0.0, 0.1, -1.0, -inf(f64), 0.1, fmax(f64), fmin(f64), fmax(f64), -fmax(f64), + fmin(f64), inf(f64), -fmin(f64), -10.0, -0.0, 0.1, nan(f64), -fmax(f64), -fmax(f64), -1.0, 10.0, 10.0, -1.0, -inf(f64), inf(f64), + -fmin(f64), 1.0, -inf(f64), -10.0, 0.1, 1.0, 10.0, 10.0, tmin(f64), nan(f64), inf(f64), 0.0, -1.0, -10.0, 1.0, + -tmin(f64), -fmax(f64), -nan(f64), 10.0, 0.1, tmin(f64), 0.0, 10.0, 0.1, -tmin(f64), -tmin(f64), 1.0, -fmax(f64), nan(f64), -fmin(f64), + nan(f64), 10.0, -1.0, -0.0, -tmin(f64), nan(f64), 10.0, 10.0, -inf(f64), 0.1, -nan(f64), -10.0, -tmin(f64), -fmax(f64), -fmax(f64), + inf(f64), -inf(f64), tmin(f64), 1.0, -inf(f64), -10.0, inf(f64), 0.1, -nan(f64), -inf(f64), fmax(f64), 0.1, -inf(f64), 0.1, 1.0, + 0.1, 0.1, 0.1, inf(f64), -inf(f64), 1.0, 10.0, 10.0, nan(f64), 10.0, -tmin(f64), 1.0, -fmin(f64), -1.0, -fmax(f64), + -fmin(f64), -fmin(f64), -1.0, inf(f64), nan(f64), tmin(f64), 0.1, -1.0, + }, .{ + 0.0, 0.0, inf(f64), -0.0, 0.1, -nan(f64), 10.0, -nan(f64), tmin(f64), -10.0, -0.0, inf(f64), -fmin(f64), 0.1, fmax(f64), + nan(f64), -tmin(f64), tmin(f64), 1.0, 0.1, -10.0, -nan(f64), 1.0, inf(f64), -10.0, fmin(f64), 0.1, 10.0, -10.0, 10.0, + -nan(f64), -nan(f64), 0.1, 0.0, 10.0, -fmax(f64), -tmin(f64), tmin(f64), -1.0, -tmin(f64), -10.0, 0.1, -fmax(f64), 10.0, nan(f64), + fmax(f64), -1.0, -1.0, -tmin(f64), fmax(f64), -10.0, 0.1, 1.0, fmin(f64), inf(f64), 0.1, tmin(f64), 0.1, -fmax(f64), fmax(f64), + -10.0, -fmax(f64), fmax(f64), tmin(f64), -fmin(f64), inf(f64), 0.1, -0.0, fmax(f64), tmin(f64), 0.1, 1.0, -inf(f64), 1.0, 10.0, + 0.1, 0.0, -10.0, -nan(f64), 10.0, -fmin(f64), -tmin(f64), 10.0, 1.0, -tmin(f64), -1.0, -fmin(f64), -0.0, -10.0, 0.1, + inf(f64), -fmax(f64), 0.1, tmin(f64), -0.0, fmax(f64), 0.0, -nan(f64), -fmin(f64), fmax(f64), -0.0, nan(f64), -inf(f64), tmin(f64), 0.1, + inf(f64), 0.0, 10.0, -fmax(f64), tmin(f64), -0.0, fmin(f64), -nan(f64), -10.0, -inf(f64), nan(f64), inf(f64), -0.0, 10.0, fmax(f64), + tmin(f64), -10.0, -nan(f64), 10.0, -inf(f64), -fmax(f64), -inf(f64), -1.0, + }); + try testArgs(@Vector(69, f64), .{ + inf(f64), -0.0, -fmax(f64), fmax(f64), fmax(f64), 0.0, fmin(f64), -nan(f64), 0.1, 0.1, 0.1, -fmin(f64), inf(f64), 0.1, fmax(f64), nan(f64), + tmin(f64), -10.0, 10.0, -tmin(f64), -0.0, nan(f64), -10.0, fmin(f64), 0.0, -0.0, 0.1, inf(f64), -tmin(f64), -nan(f64), inf(f64), -nan(f64), + -inf(f64), fmax(f64), 0.1, -fmin(f64), 0.1, -1.0, fmin(f64), fmin(f64), fmin(f64), 10.0, -fmin(f64), nan(f64), 0.0, 0.0, 10.0, nan(f64), + -tmin(f64), tmin(f64), tmin(f64), fmin(f64), -0.0, -1.0, 0.1, 1.0, fmax(f64), tmin(f64), fmin(f64), 0.0, -fmin(f64), fmin(f64), -tmin(f64), 0.0, + -nan(f64), 10.0, -1.0, 0.1, 0.0, + }, .{ + -10.0, -0.0, fmin(f64), -fmin(f64), nan(f64), 10.0, -tmin(f64), -fmax(f64), 10.0, 0.1, -fmin(f64), inf(f64), -inf(f64), -tmin(f64), 1.0, tmin(f64), + -tmin(f64), -nan(f64), fmax(f64), 0.0, -1.0, 10.0, inf(f64), fmin(f64), fmax(f64), 0.1, 0.1, fmax(f64), -inf(f64), 0.1, 0.1, fmin(f64), + 0.1, fmin(f64), -10.0, nan(f64), 0.0, 0.0, fmax(f64), -inf(f64), tmin(f64), inf(f64), -tmin(f64), fmax(f64), -inf(f64), -10.0, -1.0, fmin(f64), + 0.1, -nan(f64), fmax(f64), -fmin(f64), fmax(f64), nan(f64), -0.0, -fmax(f64), 10.0, nan(f64), inf(f64), -1.0, -fmin(f64), nan(f64), -fmin(f64), -0.0, + -nan(f64), -fmin(f64), 0.1, nan(f64), 0.1, + }); + + try testArgs(@Vector(1, f80), .{ + -nan(f80), + }, .{ + -1.0, + }); + try testArgs(@Vector(2, f80), .{ + -fmax(f80), -inf(f80), + }, .{ + 0.1, 10.0, + }); + try testArgs(@Vector(4, f80), .{ + -0.0, -inf(f80), 0.1, 10.0, + }, .{ + -1.0, 0.0, 0.1, -10.0, + }); + try testArgs(@Vector(8, f80), .{ + 1.0, -0.0, -inf(f80), 0.1, -inf(f80), fmin(f80), 0.0, 10.0, + }, .{ + -0.0, -fmin(f80), fmin(f80), -nan(f80), nan(f80), inf(f80), fmin(f80), 10.0, + }); + try testArgs(@Vector(16, f80), .{ + 10.0, inf(f80), -fmin(f80), 0.1, -tmin(f80), -0.0, -inf(f80), -1.0, -fmax(f80), -nan(f80), -tmin(f80), 10.0, 10.0, -inf(f80), -fmax(f80), fmax(f80), + }, .{ + -inf(f80), nan(f80), -fmax(f80), fmin(f80), 1.0, 0.1, -inf(f80), nan(f80), 0.1, nan(f80), -inf(f80), nan(f80), tmin(f80), 0.1, -tmin(f80), -10.0, + }); + try testArgs(@Vector(32, f80), .{ + inf(f80), -0.0, 0.1, -0.0, 0.1, -fmin(f80), -0.0, fmax(f80), nan(f80), -tmin(f80), nan(f80), -10.0, 0.0, 1.0, 10.0, -fmin(f80), + fmin(f80), 0.1, inf(f80), -0.0, nan(f80), tmin(f80), -tmin(f80), fmin(f80), tmin(f80), -0.0, nan(f80), -fmax(f80), tmin(f80), -fmin(f80), 1.0, tmin(f80), + }, .{ + 0.0, -10.0, fmax(f80), -inf(f80), 0.1, -inf(f80), inf(f80), 10.0, -1.0, -10.0, -fmin(f80), 0.0, inf(f80), 1.0, -nan(f80), 0.0, + 0.1, nan(f80), 1.0, -fmax(f80), fmin(f80), -inf(f80), -fmax(f80), 0.1, -10.0, tmin(f80), fmax(f80), -0.0, -fmin(f80), -fmin(f80), fmin(f80), -tmin(f80), + }); + try testArgs(@Vector(64, f80), .{ + -fmax(f80), 0.1, -1.0, 1.0, inf(f80), 0.1, -10.0, 0.1, fmin(f80), -fmin(f80), -10.0, -fmax(f80), 0.0, -10.0, -1.0, -nan(f80), + 0.0, 0.1, -1.0, -tmin(f80), 1.0, tmin(f80), fmax(f80), 0.0, -10.0, -tmin(f80), fmax(f80), -0.0, 0.1, -inf(f80), -fmax(f80), -1.0, + -nan(f80), tmin(f80), -tmin(f80), -0.0, -0.0, -1.0, -0.0, fmax(f80), inf(f80), -nan(f80), 0.1, -inf(f80), -tmin(f80), nan(f80), 0.1, 10.0, + nan(f80), -inf(f80), 0.1, tmin(f80), -fmin(f80), 10.0, -10.0, tmin(f80), fmin(f80), nan(f80), 0.1, -nan(f80), tmin(f80), nan(f80), fmax(f80), -fmax(f80), + }, .{ + -nan(f80), -fmax(f80), tmin(f80), -inf(f80), -tmin(f80), fmin(f80), -nan(f80), -fmin(f80), fmax(f80), inf(f80), -0.0, -1.0, 0.1, -fmax(f80), 1.0, -inf(f80), + 0.0, -nan(f80), -10.0, -1.0, -nan(f80), inf(f80), 1.0, -nan(f80), 10.0, inf(f80), tmin(f80), 0.1, tmin(f80), -tmin(f80), -inf(f80), -fmin(f80), + fmax(f80), fmax(f80), 0.1, -tmin(f80), -nan(f80), -1.0, fmin(f80), -nan(f80), -nan(f80), inf(f80), -1.0, 0.1, -fmin(f80), -tmin(f80), 0.0, -0.0, + 0.1, -fmin(f80), -inf(f80), -1.0, -tmin(f80), 1.0, -inf(f80), -0.0, 0.0, 1.0, tmin(f80), 0.0, 0.1, -nan(f80), fmax(f80), 1.0, + }); + try testArgs(@Vector(128, f80), .{ + 0.1, -0.0, 0.1, 0.0, fmin(f80), -1.0, 1.0, -inf(f80), fmax(f80), -fmin(f80), nan(f80), 10.0, 0.1, 0.1, -fmin(f80), -inf(f80), + -1.0, -inf(f80), 1.0, -fmin(f80), inf(f80), -nan(f80), 10.0, inf(f80), tmin(f80), nan(f80), -10.0, inf(f80), 10.0, inf(f80), -10.0, 0.0, + -10.0, fmin(f80), -tmin(f80), 1.0, -fmax(f80), nan(f80), 0.0, fmax(f80), 0.1, -1.0, -fmin(f80), inf(f80), -tmin(f80), nan(f80), -tmin(f80), 10.0, + -10.0, -tmin(f80), -1.0, -tmin(f80), -fmax(f80), 10.0, -1.0, -inf(f80), -nan(f80), 0.0, 1.0, fmax(f80), -tmin(f80), -fmin(f80), fmin(f80), fmin(f80), + -10.0, -fmax(f80), -tmin(f80), inf(f80), 1.0, 0.0, tmin(f80), -nan(f80), -fmin(f80), 0.1, -nan(f80), 0.0, 0.1, -10.0, -0.0, -nan(f80), + 1.0, 10.0, -10.0, fmin(f80), -nan(f80), fmax(f80), -0.0, 1.0, inf(f80), 1.0, -fmin(f80), -fmin(f80), 0.0, 0.1, inf(f80), 10.0, + tmin(f80), -1.0, fmax(f80), -0.0, fmax(f80), fmax(f80), 0.1, -fmin(f80), -10.0, 1.0, -fmin(f80), -fmax(f80), fmin(f80), -fmax(f80), -0.0, -1.0, + -nan(f80), -inf(f80), nan(f80), -fmax(f80), inf(f80), -inf(f80), -nan(f80), fmin(f80), nan(f80), -1.0, tmin(f80), tmin(f80), 0.1, 10.0, -tmin(f80), -nan(f80), + }, .{ + -1.0, -0.0, 0.0, fmax(f80), -1.0, -0.0, 0.1, tmin(f80), -inf(f80), 10.0, -0.0, 0.1, -tmin(f80), -fmax(f80), tmin(f80), inf(f80), + 0.1, 1.0, tmin(f80), nan(f80), -fmax(f80), 10.0, fmin(f80), -1.0, -fmax(f80), nan(f80), -fmin(f80), 10.0, -1.0, tmin(f80), inf(f80), -0.0, + tmin(f80), 1.0, 0.0, -fmin(f80), 0.0, 10.0, -fmax(f80), -0.0, -inf(f80), fmin(f80), -0.0, -0.0, -0.0, -fmax(f80), 0.1, fmax(f80), + -tmin(f80), tmin(f80), -fmax(f80), 10.0, -fmax(f80), 0.1, fmax(f80), -10.0, 0.1, 1.0, -1.0, -1.0, nan(f80), -nan(f80), 10.0, -nan(f80), + nan(f80), -10.0, -tmin(f80), fmin(f80), -tmin(f80), -fmin(f80), tmin(f80), -0.0, 0.1, fmax(f80), tmin(f80), tmin(f80), nan(f80), 0.1, 10.0, 0.1, + inf(f80), inf(f80), 1.0, -inf(f80), -fmax(f80), 0.0, 1.0, -fmax(f80), fmax(f80), nan(f80), fmin(f80), 0.1, -1.0, 1.0, 0.1, -tmin(f80), + 10.0, 0.1, -fmax(f80), 0.0, nan(f80), -tmin(f80), 0.1, fmax(f80), fmax(f80), 0.1, -1.0, inf(f80), nan(f80), 10.0, fmax(f80), -nan(f80), + -10.0, -1.0, tmin(f80), fmin(f80), inf(f80), fmax(f80), -fmin(f80), fmin(f80), -inf(f80), -tmin(f80), 1.0, nan(f80), -fmin(f80), -fmin(f80), fmax(f80), 1.0, + }); + try testArgs(@Vector(69, f80), .{ + -10.0, tmin(f80), 0.1, -nan(f80), -inf(f80), -nan(f80), fmin(f80), -0.0, 10.0, fmax(f80), -fmin(f80), 0.1, -nan(f80), inf(f80), 1.0, -1.0, + inf(f80), fmin(f80), -fmax(f80), 0.1, nan(f80), 0.0, 0.0, nan(f80), -10.0, fmax(f80), fmin(f80), -fmax(f80), 1.0, 0.1, 0.0, -fmin(f80), + -tmin(f80), 0.0, -10.0, fmin(f80), 1.0, 10.0, 0.1, nan(f80), -10.0, fmax(f80), 0.1, fmin(f80), -inf(f80), 0.0, tmin(f80), inf(f80), + fmax(f80), 1.0, 0.1, nan(f80), inf(f80), tmin(f80), tmin(f80), -fmax(f80), 0.0, fmin(f80), -inf(f80), 0.1, -tmin(f80), 0.1, -1.0, 0.1, + -fmax(f80), -1.0, 0.1, -1.0, fmax(f80), + }, .{ + -1.0, fmin(f80), inf(f80), -nan(f80), -0.0, fmin(f80), -0.0, nan(f80), -fmax(f80), 0.1, 1.0, -10.0, -tmin(f80), -fmin(f80), 10.0, inf(f80), + -10.0, -tmin(f80), -fmin(f80), 10.0, 0.0, -tmin(f80), 10.0, -10.0, 0.1, 0.1, tmin(f80), fmax(f80), 0.0, 0.1, 0.1, -10.0, + fmin(f80), nan(f80), -10.0, -10.0, -10.0, 0.0, -0.0, 0.1, fmin(f80), fmin(f80), -0.0, -fmin(f80), -nan(f80), -inf(f80), 0.0, -inf(f80), + inf(f80), fmax(f80), -tmin(f80), inf(f80), 0.1, -nan(f80), 0.1, tmin(f80), -10.0, -fmax(f80), -fmax(f80), inf(f80), -nan(f80), 1.0, -inf(f80), 10.0, + nan(f80), 10.0, -10.0, 0.0, -fmin(f80), + }); + + try testArgs(@Vector(1, f128), .{ + -nan(f128), + }, .{ + -0.0, + }); + try testArgs(@Vector(2, f128), .{ + 0.0, -inf(f128), + }, .{ + 0.1, -fmin(f128), + }); + try testArgs(@Vector(4, f128), .{ + 0.1, fmax(f128), 10.0, -fmax(f128), + }, .{ + -tmin(f128), fmax(f128), -0.0, -0.0, + }); + try testArgs(@Vector(8, f128), .{ + 10.0, -fmin(f128), 0.0, -inf(f128), 10.0, -0.0, -1.0, -fmin(f128), + }, .{ + fmin(f128), tmin(f128), -1.0, -10.0, 0.0, -tmin(f128), 0.0, 0.1, + }); + try testArgs(@Vector(16, f128), .{ + -fmin(f128), -10.0, -fmin(f128), 0.1, -10.0, 1.0, -fmax(f128), tmin(f128), -nan(f128), -tmin(f128), 10.0, -inf(f128), -1.0, tmin(f128), -0.0, nan(f128), + }, .{ + -fmax(f128), fmin(f128), inf(f128), tmin(f128), -10.0, 10.0, fmax(f128), 1.0, -inf(f128), -inf(f128), -fmax(f128), -nan(f128), 1.0, -inf(f128), tmin(f128), tmin(f128), + }); + try testArgs(@Vector(32, f128), .{ + -0.0, -1.0, 1.0, -fmax(f128), -fmax(f128), 0.1, -fmin(f128), -fmin(f128), -1.0, -tmin(f128), -0.0, -fmax(f128), tmin(f128), inf(f128), 0.0, fmax(f128), + -nan(f128), -0.0, -inf(f128), -1.0, 0.1, -fmin(f128), tmin(f128), -10.0, fmax(f128), -nan(f128), -nan(f128), -fmax(f128), 0.1, inf(f128), -0.0, tmin(f128), + }, .{ + -1.0, -10.0, -fmin(f128), -fmin(f128), inf(f128), tmin(f128), nan(f128), 0.0, -fmin(f128), 0.1, -nan(f128), 0.1, -0.0, tmin(f128), 1.0, 0.0, + fmin(f128), fmax(f128), -fmax(f128), -tmin(f128), fmin(f128), -0.0, -1.0, -nan(f128), -inf(f128), 1.0, nan(f128), 1.0, 0.1, -0.0, -fmax(f128), -10.0, + }); + try testArgs(@Vector(64, f128), .{ + -1.0, -0.0, nan(f128), 0.1, -10.0, 0.0, 1.0, 1.0, -inf(f128), fmin(f128), fmax(f128), nan(f128), -nan(f128), inf(f128), -0.0, + 0.1, -inf(f128), -fmax(f128), 10.0, -tmin(f128), -tmin(f128), -fmax(f128), 1.0, 0.1, 0.1, nan(f128), 10.0, 1.0, -tmin(f128), 10.0, + -nan(f128), fmax(f128), fmax(f128), 0.0, fmax(f128), inf(f128), 1.0, -0.0, 0.1, -tmin(f128), fmin(f128), fmax(f128), tmin(f128), inf(f128), -10.0, + -1.0, -1.0, -1.0, -inf(f128), 10.0, -tmin(f128), nan(f128), nan(f128), 0.1, fmin(f128), 0.1, tmin(f128), -10.0, 0.1, 10.0, + fmax(f128), fmax(f128), 0.1, -fmax(f128), + }, .{ + -0.0, 0.1, -0.0, -fmin(f128), 10.0, 0.0, 1.0, -inf(f128), tmin(f128), -1.0, fmin(f128), -nan(f128), -10.0, 0.1, -10.0, 0.1, + 0.1, tmin(f128), nan(f128), -1.0, 0.0, -10.0, -10.0, fmax(f128), -fmax(f128), inf(f128), -nan(f128), 0.1, -nan(f128), 1.0, fmax(f128), inf(f128), + nan(f128), fmin(f128), 10.0, inf(f128), 0.0, -inf(f128), 0.1, 0.1, 0.1, -1.0, 0.1, -10.0, inf(f128), -nan(f128), 0.1, inf(f128), + inf(f128), inf(f128), -10.0, -tmin(f128), 0.1, -inf(f128), -fmin(f128), 1.0, -tmin(f128), 1.0, -tmin(f128), -inf(f128), -0.0, -nan(f128), -1.0, -fmax(f128), + }); + try testArgs(@Vector(128, f128), .{ + -inf(f128), tmin(f128), -fmax(f128), 1.0, fmin(f128), -fmax(f128), -1.0, 0.1, -fmax(f128), -fmin(f128), -10.0, nan(f128), 0.1, nan(f128), + inf(f128), -1.0, tmin(f128), -inf(f128), 0.0, fmax(f128), tmin(f128), -fmin(f128), fmin(f128), -10.0, -fmin(f128), -10.0, 1.0, -nan(f128), + -inf(f128), fmin(f128), inf(f128), -tmin(f128), 0.1, 0.0, 10.0, 1.0, -tmin(f128), -tmin(f128), tmin(f128), 1.0, fmin(f128), 0.1, + 0.1, 0.1, fmax(f128), 0.1, inf(f128), 0.0, fmin(f128), -fmin(f128), 10.0, 10.0, -10.0, tmin(f128), inf(f128), inf(f128), + -fmin(f128), 0.0, 0.1, -nan(f128), 0.1, -inf(f128), -nan(f128), -1.0, fmin(f128), -0.0, 10.0, -tmin(f128), 10.0, 1.0, + 0.1, -0.0, -tmin(f128), 0.1, -1.0, -tmin(f128), -fmin(f128), tmin(f128), 0.1, -tmin(f128), -nan(f128), -10.0, -inf(f128), 0.0, + 0.1, 0.0, -fmin(f128), 0.0, 10.0, 10.0, tmin(f128), inf(f128), -nan(f128), -inf(f128), -1.0, -fmin(f128), -10.0, -fmin(f128), + -inf(f128), -fmax(f128), tmin(f128), tmin(f128), -fmin(f128), 0.1, fmin(f128), fmin(f128), -fmin(f128), nan(f128), -1.0, -0.0, -0.0, 0.1, + fmax(f128), 0.0, -fmax(f128), nan(f128), nan(f128), nan(f128), nan(f128), -nan(f128), fmin(f128), -inf(f128), inf(f128), -fmax(f128), -10.0, fmin(f128), + 0.1, fmax(f128), + }, .{ + 0.0, 10.0, 0.1, inf(f128), -0.0, -1.0, nan(f128), -10.0, -inf(f128), 0.1, -tmin(f128), 1.0, inf(f128), 0.1, -1.0, + 10.0, 0.0, 1.0, nan(f128), tmin(f128), fmax(f128), 10.0, 0.1, 0.1, -fmin(f128), -inf(f128), -nan(f128), -fmin(f128), -0.0, -inf(f128), + -nan(f128), fmax(f128), -fmin(f128), -tmin(f128), -fmin(f128), -fmax(f128), nan(f128), fmin(f128), -fmax(f128), fmax(f128), 1.0, 10.0, -fmax(f128), nan(f128), -fmax(f128), + -inf(f128), nan(f128), -nan(f128), tmin(f128), -1.0, 0.1, 0.1, -1.0, -nan(f128), fmax(f128), 10.0, -inf(f128), 10.0, -0.0, -1.0, + -0.0, -tmin(f128), 10.0, -1.0, -fmax(f128), fmin(f128), fmax(f128), tmin(f128), 10.0, fmin(f128), -nan(f128), 1.0, -tmin(f128), -1.0, fmax(f128), + 1.0, -tmin(f128), 0.1, -nan(f128), inf(f128), 0.1, 0.1, fmax(f128), -fmin(f128), fmin(f128), -0.0, fmax(f128), -fmax(f128), -tmin(f128), tmin(f128), + nan(f128), 0.1, tmin(f128), -1.0, fmin(f128), -nan(f128), fmax(f128), 1.0, nan(f128), -nan(f128), inf(f128), -fmin(f128), fmin(f128), 0.1, 10.0, + -tmin(f128), -10.0, 0.0, 0.1, -fmin(f128), -0.0, 0.0, -10.0, fmax(f128), nan(f128), nan(f128), -fmin(f128), -fmax(f128), 10.0, 0.0, + fmin(f128), 10.0, -tmin(f128), -tmin(f128), 0.0, -10.0, 1.0, -fmin(f128), + }); + try testArgs(@Vector(69, f128), .{ + -1.0, nan(f128), 0.1, 0.1, 0.1, -1.0, -10.0, inf(f128), -0.0, inf(f128), tmin(f128), 0.0, -fmax(f128), -tmin(f128), -10.0, -fmax(f128), + -0.0, 0.0, nan(f128), inf(f128), 1.0, -1.0, 0.1, -0.0, 1.0, fmax(f128), -fmax(f128), 0.0, inf(f128), -inf(f128), -tmin(f128), -inf(f128), + 10.0, fmin(f128), 10.0, -10.0, 0.1, 1.0, -0.0, nan(f128), tmin(f128), inf(f128), inf(f128), -nan(f128), -nan(f128), 1.0, -tmin(f128), 0.0, + fmin(f128), fmax(f128), fmin(f128), -10.0, nan(f128), 0.0, -nan(f128), -0.0, -nan(f128), 0.1, -10.0, -tmin(f128), fmax(f128), 1.0, fmin(f128), fmax(f128), + nan(f128), -inf(f128), 1.0, fmin(f128), -nan(f128), + }, .{ + -inf(f128), fmax(f128), 0.0, nan(f128), -10.0, tmin(f128), nan(f128), 1.0, 10.0, -fmin(f128), fmin(f128), tmin(f128), 0.0, -fmin(f128), -0.0, fmin(f128), + inf(f128), inf(f128), fmin(f128), fmin(f128), -tmin(f128), -fmax(f128), 10.0, nan(f128), -0.0, 1.0, 10.0, -10.0, -inf(f128), fmin(f128), -fmax(f128), 0.1, + -1.0, -nan(f128), -10.0, tmin(f128), inf(f128), nan(f128), 0.0, -10.0, tmin(f128), 0.0, -fmax(f128), -tmin(f128), 0.1, 0.1, 10.0, 0.1, + fmax(f128), 0.1, 0.0, -fmin(f128), -inf(f128), -inf(f128), -nan(f128), 0.1, -fmax(f128), fmax(f128), -fmax(f128), -0.0, -tmin(f128), -1.0, nan(f128), 0.1, + -1.0, -inf(f128), tmin(f128), inf(f128), inf(f128), + }); + } }; } @@ -3848,6 +4238,7 @@ test min { try t.testIntTypes(); try t.testIntVectorTypes(); try t.testFloatTypes(); + try t.testFloatVectorTypes(); } inline fn max(comptime Type: type, lhs: Type, rhs: Type) Type { @@ -3858,4 +4249,5 @@ test max { try t.testIntTypes(); try t.testIntVectorTypes(); try t.testFloatTypes(); + try t.testFloatVectorTypes(); }