diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 228b220613..b46c839687 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -6036,10 +6036,377 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .sub_safe => unreachable, .mul, .mul_optimized => |air_tag| if (use_old) try cg.airMulDivBinOp(inst, .mul) else fallback: { const bin_op = air_datas[@intFromEnum(inst)].bin_op; - if (cg.floatBits(cg.typeOf(bin_op.lhs).scalarType(zcu)) == null) break :fallback try cg.airMulDivBinOp(inst, .mul); + const ty = cg.typeOf(bin_op.lhs); + if (ty.isVector(zcu) and cg.floatBits(ty.childType(zcu)) == null) break :fallback try cg.airMulDivBinOp(inst, .mul); var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); var res: [1]Temp = undefined; - cg.select(&res, &.{cg.typeOf(bin_op.lhs)}, &ops, comptime &.{ .{ + cg.select(&res, &.{ty}, &ops, comptime &.{ .{ + .src_constraints = .{ .{ .signed_int = .byte }, .{ .signed_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .{ .to_reg = .al }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, .i_, .mul, .src1b, ._, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .{ .to_reg = .al }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mul, .src1b, ._, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .int = .word }, .{ .int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .imm16, .none } }, + .{ .src = .{ .imm16, .mem, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_gpr, .imm16, .none } }, + .{ .src = .{ .imm16, .to_gpr, .none }, .commute = .{ 0, 1 } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, .i_, .mul, .dst0w, .src0w, .src1w, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .int = .word }, .{ .int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .mem, .none } }, + .{ .src = .{ .mem, .to_mut_gpr, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .to_gpr, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, .i_, .mul, .dst0w, .src1w, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .imm32, .none } }, + .{ .src = .{ .imm32, .mem, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_gpr, .imm32, .none } }, + .{ .src = .{ .imm32, .to_gpr, .none }, .commute = .{ 0, 1 } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, .i_, .mul, .dst0d, .src0d, .src1d, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .int = .dword }, .{ .int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .mem, .none } }, + .{ .src = .{ .mem, .to_mut_gpr, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .to_gpr, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, .i_, .mul, .dst0d, .src1d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .mem, .simm32, .none } }, + .{ .src = .{ .simm32, .mem, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_gpr, .simm32, .none } }, + .{ .src = .{ .simm32, .to_gpr, .none }, .commute = .{ 0, 1 } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, .i_, .mul, .dst0q, .src0q, .src1q, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .int = .qword }, .{ .int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mut_gpr, .mem, .none } }, + .{ .src = .{ .mem, .to_mut_gpr, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .to_mut_gpr, .to_gpr, .none } }, + }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, .i_, .mul, .dst0q, .src1q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .int = .xword }, .{ .int = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .u64, .kind = .{ .reg = .rax } }, + .{ .type = .u64, .kind = .{ .reg = .rdx } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0q, .src0q, ._, ._ }, + .{ ._, ._, .mul, .src1q, ._, ._, ._ }, + .{ ._, ._, .mov, .dst0q, .tmp0q, ._, ._ }, + .{ ._, ._, .mov, .tmp0q, .src0q, ._, ._ }, + .{ ._, .i_, .mul, .tmp0q, .memd(.src1q, 8), ._, ._ }, + .{ ._, ._, .add, .tmp1q, .tmp0q, ._, ._ }, + .{ ._, ._, .mov, .tmp0q, .src1q, ._, ._ }, + .{ ._, .i_, .mul, .tmp0q, .memd(.src0q, 8), ._, ._ }, + .{ ._, ._, .add, .tmp1q, .tmp0q, ._, ._ }, + .{ ._, ._, .mov, .memd(.dst0q, 8), .tmp1q, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bmi2, .adx, null }, + .src_constraints = .{ + .{ .remainder_int = .{ .of = .qword, .is = .qword } }, + .{ .remainder_int = .{ .of = .qword, .is = .qword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .reg = .rdx } }, + .{ .type = .isize, .kind = .{ .reg = .rcx } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ }, + .{ ._, ._, .@"or", .tmp2q, .memi(.src0q, .tmp0), ._, ._ }, + .{ ._, ._z, .j, .@"2f", ._, ._, ._ }, + .{ ._, ._, .lea, .tmp3p, .leaad(.tmp0, .sub_src0_size, 8), ._, ._ }, + .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ }, + .{ .@"1:", ._x, .mul, .tmp6q, .tmp5q, .leai(.tmp1q, .tmp3), ._ }, + .{ ._, ._x, .adc, .tmp5q, .tmp4q, ._, ._ }, + .{ ._, ._, .mov, .memiad(.dst0q, .tmp3, .add_size, -8), .tmp5q, ._, ._ }, + .{ ._, ._rcxz, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._x, .ado, .tmp6q, .memia(.dst0q, .tmp3, .add_size), ._, ._ }, + .{ ._, ._, .mov, .tmp4q, .tmp6q, ._, ._ }, + .{ ._, ._, .lea, .tmp3p, .lead(.tmp3, 8), ._, ._ }, + .{ ._, ._mp, .j, .@"1b", ._, ._, ._ }, + .{ .@"2:", ._, .mov, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ }, + .{ .@"1:", ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bmi2, .slow_incdec, null }, + .src_constraints = .{ + .{ .remainder_int = .{ .of = .qword, .is = .qword } }, + .{ .remainder_int = .{ .of = .qword, .is = .qword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .reg = .rdx } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_size_div_8), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memd(.src1, 8), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .leaa(.tmp0, .sub_src0_size_div_8), ._, ._ }, + .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, + .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ }, + .{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ }, + .{ ._, ._, .@"or", .tmp3q, .memsi(.src0q, .@"8", .tmp0), ._, ._ }, + .{ ._, ._nz, .j, .@"2f", ._, ._, ._ }, + .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp3q, ._, ._ }, + .{ ._, ._mp, .j, .@"3f", ._, ._, ._ }, + .{ .@"1:", ._, .adc, .tmp7q, .memsia(.dst0q, .@"8", .tmp2, .add_size), ._, ._ }, + .{ ._, ._, .adc, .tmp4b, .si(0), ._, ._ }, + .{ ._, ._, .mov, .tmp5q, .tmp7q, ._, ._ }, + .{ ._, ._l, .sh, .tmp4b, .ui(4), ._, ._ }, + .{ .@"2:", ._x, .mul, .tmp7q, .tmp6q, .leasi(.tmp1q, .@"8", .tmp2), ._ }, + .{ ._, ._, .adc, .tmp6q, .tmp5q, ._, ._ }, + .{ ._, ._, .mov, .memsia(.dst0q, .@"8", .tmp2, .add_size), .tmp6q, ._, ._ }, + .{ ._, ._c, .in, .tmp2p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"1b", ._, ._, ._ }, + .{ .@"3:", ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .bmi2, null, null }, + .src_constraints = .{ + .{ .remainder_int = .{ .of = .qword, .is = .qword } }, + .{ .remainder_int = .{ .of = .qword, .is = .qword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .reg = .rdx } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_size_div_8), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memd(.src1, 8), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .leaa(.tmp0, .sub_src0_size_div_8), ._, ._ }, + .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, + .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ }, + .{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ }, + .{ ._, ._, .@"or", .tmp3q, .memsi(.src0q, .@"8", .tmp0), ._, ._ }, + .{ ._, ._nz, .j, .@"2f", ._, ._, ._ }, + .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp3q, ._, ._ }, + .{ ._, ._mp, .j, .@"3f", ._, ._, ._ }, + .{ .@"1:", ._, .adc, .tmp7q, .memsia(.dst0q, .@"8", .tmp2, .add_size), ._, ._ }, + .{ ._, ._, .adc, .tmp4b, .si(0), ._, ._ }, + .{ ._, ._, .mov, .tmp5q, .tmp7q, ._, ._ }, + .{ ._, ._l, .sh, .tmp4b, .ui(4), ._, ._ }, + .{ .@"2:", ._x, .mul, .tmp7q, .tmp6q, .leasi(.tmp1q, .@"8", .tmp2), ._ }, + .{ ._, ._, .adc, .tmp6q, .tmp5q, ._, ._ }, + .{ ._, ._, .mov, .memsia(.dst0q, .@"8", .tmp2, .add_size), .tmp6q, ._, ._ }, + .{ ._, ._c, .in, .tmp2p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"1b", ._, ._, ._ }, + .{ .@"3:", ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ }, + .{ ._, ._c, .de, .tmp0d, ._, ._, ._ }, + .{ ._, ._ns, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", .slow_incdec, null, null }, + .src_constraints = .{ + .{ .remainder_int = .{ .of = .qword, .is = .qword } }, + .{ .remainder_int = .{ .of = .qword, .is = .qword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .reg = .rax } }, + .{ .type = .u64, .kind = .{ .reg = .rdx } }, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_size_div_8), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memd(.src1, 8), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .leaa(.tmp0, .sub_src0_size_div_8), ._, ._ }, + .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, + .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ }, + .{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ }, + .{ ._, ._, .@"or", .tmp3q, .memsi(.src0q, .@"8", .tmp0), ._, ._ }, + .{ ._, ._nz, .j, .@"2f", ._, ._, ._ }, + .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp3q, ._, ._ }, + .{ ._, ._mp, .j, .@"3f", ._, ._, ._ }, + .{ .@"1:", ._, .adc, .tmp7q, .memsia(.dst0q, .@"8", .tmp2, .add_size), ._, ._ }, + .{ ._, ._, .adc, .tmp4b, .si(0), ._, ._ }, + .{ ._, ._, .mov, .tmp5q, .tmp7q, ._, ._ }, + .{ .@"2:", ._, .mov, .tmp6q, .tmp3q, ._, ._ }, + .{ ._, ._, .mul, .leasi(.tmp1q, .@"8", .tmp2), ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp4b, .ui(4), ._, ._ }, + .{ ._, ._, .adc, .tmp6q, .tmp5q, ._, ._ }, + .{ ._, ._, .mov, .memsia(.dst0q, .@"8", .tmp2, .add_size), .tmp6q, ._, ._ }, + .{ ._, ._c, .in, .tmp2p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"1b", ._, ._, ._ }, + .{ .@"3:", ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ + .{ .remainder_int = .{ .of = .qword, .is = .qword } }, + .{ .remainder_int = .{ .of = .qword, .is = .qword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u8, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u64, .kind = .{ .reg = .rax } }, + .{ .type = .u64, .kind = .{ .reg = .rdx } }, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_size_div_8), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memd(.src1, 8), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .leaa(.tmp0, .sub_src0_size_div_8), ._, ._ }, + .{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ }, + .{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ }, + .{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ }, + .{ ._, ._, .@"or", .tmp3q, .memsi(.src0q, .@"8", .tmp0), ._, ._ }, + .{ ._, ._nz, .j, .@"2f", ._, ._, ._ }, + .{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp3q, ._, ._ }, + .{ ._, ._mp, .j, .@"3f", ._, ._, ._ }, + .{ .@"1:", ._, .adc, .tmp7q, .memsia(.dst0q, .@"8", .tmp2, .add_size), ._, ._ }, + .{ ._, ._, .adc, .tmp4b, .si(0), ._, ._ }, + .{ ._, ._, .mov, .tmp5q, .tmp7q, ._, ._ }, + .{ .@"2:", ._, .mov, .tmp6q, .tmp3q, ._, ._ }, + .{ ._, ._, .mul, .leasi(.tmp1q, .@"8", .tmp2), ._, ._, ._ }, + .{ ._, ._l, .sh, .tmp4b, .ui(4), ._, ._ }, + .{ ._, ._, .adc, .tmp6q, .tmp5q, ._, ._ }, + .{ ._, ._, .mov, .memsia(.dst0q, .@"8", .tmp2, .add_size), .tmp6q, ._, ._ }, + .{ ._, ._c, .in, .tmp2p, ._, ._, ._ }, + .{ ._, ._nz, .j, .@"1b", ._, ._, ._ }, + .{ .@"3:", ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ }, + .{ ._, ._c, .de, .tmp0d, ._, ._, ._ }, + .{ ._, ._ns, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ .required_features = .{ .f16c, null, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .word, .is = .word } }, @@ -6890,7 +7257,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }) catch |err| switch (err) { error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{ @tagName(air_tag), - cg.typeOf(bin_op.lhs).fmt(pt), + ty.fmt(pt), ops[0].tracking(cg), ops[1].tracking(cg), }), @@ -92700,7 +93067,7 @@ const Select = struct { const mir_tag: Mir.Inst.FixedTag = .{ inst[1], inst[2] }; pseudo: { switch (inst[0]) { - .@"0:", .@"1:", .@"2:" => |label| s.emitLabel(label), + .@"0:", .@"1:", .@"2:", .@"3:" => |label| s.emitLabel(label), ._ => {}, .pseudo => break :pseudo, } @@ -93578,7 +93945,7 @@ const Select = struct { Select.Operand, Select.Operand, }; - const Label = enum { @"0:", @"1:", @"2:", @"_", pseudo }; + const Label = enum { @"0:", @"1:", @"2:", @"3:", @"_", pseudo }; const Operand = struct { flags: packed struct(u16) { tag: Tag, @@ -93609,6 +93976,7 @@ const Select = struct { ptr_size, ptr_bit_size, size, + src0_size, delta_size, delta_elem_size, size_add_elem_size, @@ -93641,6 +94009,8 @@ const Select = struct { const sub_size_div_8: Adjust = .{ .sign = .neg, .lhs = .size, .op = .div, .rhs = .@"8" }; const sub_size_div_4: Adjust = .{ .sign = .neg, .lhs = .size, .op = .div, .rhs = .@"4" }; const sub_size: Adjust = .{ .sign = .neg, .lhs = .size, .op = .mul, .rhs = .@"1" }; + const sub_src0_size_div_8: Adjust = .{ .sign = .neg, .lhs = .src0_size, .op = .div, .rhs = .@"8" }; + const sub_src0_size: Adjust = .{ .sign = .neg, .lhs = .src0_size, .op = .mul, .rhs = .@"1" }; const add_delta_size_div_8: Adjust = .{ .sign = .pos, .lhs = .delta_size, .op = .div, .rhs = .@"8" }; const add_delta_elem_size: Adjust = .{ .sign = .pos, .lhs = .delta_elem_size, .op = .mul, .rhs = .@"1" }; const add_delta_elem_size_div_8: Adjust = .{ .sign = .pos, .lhs = .delta_elem_size, .op = .div, .rhs = .@"8" }; @@ -93882,6 +94252,8 @@ const Select = struct { const @"1f": Select.Operand = .{ .flags = .{ .tag = .forward_label }, .base = .{ .ref = .tmp1, .size = .none } }; const @"2b": Select.Operand = .{ .flags = .{ .tag = .backward_label }, .base = .{ .ref = .tmp2, .size = .none } }; const @"2f": Select.Operand = .{ .flags = .{ .tag = .forward_label }, .base = .{ .ref = .tmp2, .size = .none } }; + const @"3b": Select.Operand = .{ .flags = .{ .tag = .backward_label }, .base = .{ .ref = .tmp3, .size = .none } }; + const @"3f": Select.Operand = .{ .flags = .{ .tag = .forward_label }, .base = .{ .ref = .tmp3, .size = .none } }; const tmp0b: Select.Operand = .{ .flags = .{ .tag = .ref }, .base = .tmp0b }; const tmp0w: Select.Operand = .{ .flags = .{ .tag = .ref }, .base = .tmp0w }; @@ -94070,6 +94442,13 @@ const Select = struct { .base = base, }; } + fn leaad(base: Ref.Sized, adjust: Adjust, disp: i32) Select.Operand { + return .{ + .flags = .{ .tag = .lea, .adjust = adjust }, + .base = base, + .imm = disp, + }; + } fn lead(base: Ref.Sized, disp: i32) Select.Operand { return .{ .flags = .{ .tag = .lea }, @@ -94226,6 +94605,7 @@ const Select = struct { .ptr_size => @divExact(s.cg.target.ptrBitWidth(), 8), .ptr_bit_size => s.cg.target.ptrBitWidth(), .size => @intCast(op.base.ref.typeOf(s).abiSize(s.cg.pt.zcu)), + .src0_size => @intCast(Select.Operand.Ref.src0.typeOf(s).abiSize(s.cg.pt.zcu)), .delta_size => @intCast(@as(SignedImm, @intCast(op.base.ref.typeOf(s).abiSize(s.cg.pt.zcu))) - @as(SignedImm, @intCast(op.index.ref.typeOf(s).abiSize(s.cg.pt.zcu)))), .delta_elem_size => @intCast(@as(SignedImm, @intCast(op.base.ref.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu))) - diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index bd5efec81c..e6f3f6541a 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -88,13 +88,32 @@ pub fn emitMir(emit: *Emit) Error!void { lowered_relocs[0].lowered_inst_index == lowered_index) : ({ lowered_relocs = lowered_relocs[1..]; }) switch (lowered_relocs[0].target) { - .inst => |target| try relocs.append(emit.lower.allocator, .{ - .source = start_offset, - .source_offset = end_offset - 4, - .target = target, - .target_offset = lowered_relocs[0].off, - .length = @intCast(end_offset - start_offset), - }), + .inst => |target| { + const inst_length: u4 = @intCast(end_offset - start_offset); + const reloc_offset, const reloc_length = reloc_offset_length: { + var reloc_offset = inst_length; + var op_index: usize = lowered_inst.ops.len; + while (true) { + op_index -= 1; + const op = lowered_inst.encoding.data.ops[op_index]; + if (op == .none) continue; + const enc_length: u4 = @intCast( + std.math.divCeil(u7, @intCast(op.immBitSize()), 8) catch unreachable, + ); + reloc_offset -= enc_length; + if (op_index == lowered_relocs[0].op_index) + break :reloc_offset_length .{ reloc_offset, enc_length }; + } + }; + try relocs.append(emit.lower.allocator, .{ + .inst_offset = start_offset, + .inst_length = inst_length, + .source_offset = reloc_offset, + .source_length = reloc_length, + .target = target, + .target_offset = lowered_relocs[0].off, + }); + }, .table => try table_relocs.append(emit.lower.allocator, .{ .source_offset = end_offset - 4, .target_offset = lowered_relocs[0].off, @@ -409,7 +428,7 @@ pub fn emitMir(emit: *Emit) Error!void { } } }; }, .pseudo_dbg_local_am => loc: { - const mem = emit.lower.mem(mir_inst.data.ax.payload); + const mem = emit.lower.mem(undefined, mir_inst.data.ax.payload); break :loc .{ mir_inst.data.ax.air_inst, .{ .plus = .{ base: { loc_buf[0] = switch (mem.base()) { @@ -466,15 +485,18 @@ pub fn emitMir(emit: *Emit) Error!void { } } } - { - // TODO this function currently assumes all relocs via JMP/CALL instructions are 32bit in size. - // This should be reversed like it is done in aarch64 MIR emit code: start with the smallest - // possible resolution, i.e., 8bit, and iteratively converge on the minimum required resolution - // until the entire decl is correctly emitted with all JMP/CALL instructions within range. - for (relocs.items) |reloc| { - const target = code_offset_mapping[reloc.target]; - const disp = @as(i64, @intCast(target)) - @as(i64, @intCast(reloc.source + reloc.length)) + reloc.target_offset; - std.mem.writeInt(i32, emit.code.items[reloc.source_offset..][0..4], @intCast(disp), .little); + for (relocs.items) |reloc| { + const target = code_offset_mapping[reloc.target]; + const disp = @as(i64, @intCast(target)) - @as(i64, @intCast(reloc.inst_offset + reloc.inst_length)) + reloc.target_offset; + const inst_bytes = emit.code.items[reloc.inst_offset..][0..reloc.inst_length]; + switch (reloc.source_length) { + else => unreachable, + inline 1, 4 => |source_length| std.mem.writeInt( + @Type(.{ .int = .{ .signedness = .signed, .bits = @as(u16, 8) * source_length } }), + inst_bytes[reloc.source_offset..][0..source_length], + @intCast(disp), + .little, + ), } } if (emit.lower.mir.table.len > 0) { @@ -511,15 +533,17 @@ fn fail(emit: *Emit, comptime format: []const u8, args: anytype) Error { const Reloc = struct { /// Offset of the instruction. - source: u32, + inst_offset: u32, + /// Length of the instruction. + inst_length: u4, /// Offset of the relocation within the instruction. - source_offset: u32, + source_offset: u4, + /// Length of the relocation. + source_length: u4, /// Target of the relocation. target: Mir.Inst.Index, - /// Offset from the target instruction. + /// Offset from the target. target_offset: i32, - /// Length of the instruction. - length: u5, }; const TableReloc = struct { diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 81fa28dcd5..c4f7a310ee 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -304,20 +304,20 @@ pub const Mnemonic = enum { jnc, jne, jng, jnge, jnl, jnle, jno, jnp, jns, jnz, jo, jp, jpe, jpo, jrcxz, js, jz, lahf, lar, lea, leave, lfence, lgdt, lidt, lldt, lmsw, loop, loope, loopne, lods, lodsb, lodsd, lodsq, lodsw, - lsl, ltr, lzcnt, + lsl, ltr, mfence, mov, movbe, movs, movsb, movsd, movsq, movsw, movsx, movsxd, movzx, mul, neg, nop, not, @"or", out, outs, outsb, outsd, outsw, - pause, pop, popcnt, popf, popfd, popfq, push, pushfq, + pause, pop, popf, popfd, popfq, push, pushfq, rcl, rcr, rdfsbase, rdgsbase, rdmsr, rdpid, rdpkru, rdpmc, rdrand, rdseed, rdssd, rdssq, rdtsc, rdtscp, - ret, rol, ror, rorx, rsm, - sahf, sal, sar, sarx, sbb, + ret, rol, ror, rsm, + sahf, sal, sar, sbb, scas, scasb, scasd, scasq, scasw, senduipi, serialize, - shl, shld, shlx, shr, shrd, shrx, + shl, shld, shr, shrd, stac, stc, std, sti, str, stui, sub, swapgs, syscall, sysenter, sysexit, sysret, seta, setae, setb, setbe, setc, sete, setg, setge, setl, setle, setna, setnae, @@ -433,6 +433,8 @@ pub const Mnemonic = enum { roundpd, roundps, roundsd, roundss, // SSE4.2 crc32, pcmpgtq, + // ABM + lzcnt, popcnt, // PCLMUL pclmulqdq, // AES @@ -440,7 +442,6 @@ pub const Mnemonic = enum { // SHA sha1rnds4, sha1nexte, sha1msg1, sha1msg2, sha256msg1, sha256msg2, sha256rnds2, // AVX - andn, bextr, blsi, blsmsk, blsr, bzhi, tzcnt, vaddpd, vaddps, vaddsd, vaddss, vaddsubpd, vaddsubps, vaesdec, vaesdeclast, vaesenc, vaesenclast, vaesimc, vaeskeygenassist, vandnpd, vandnps, vandpd, vandps, @@ -506,6 +507,10 @@ pub const Mnemonic = enum { vtestpd, vtestps, vucomisd, vucomiss, vunpckhpd, vunpckhps, vunpcklpd, vunpcklps, vxorpd, vxorps, + // BMI + andn, bextr, blsi, blsmsk, blsr, tzcnt, + // BMI2 + bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx, // F16C vcvtph2ps, vcvtps2ph, // FMA diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index f6e34ce2bf..6a8b5efcf8 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -10,32 +10,38 @@ mir: Mir, cc: std.builtin.CallingConvention, err_msg: ?*Zcu.ErrorMsg = null, src_loc: Zcu.LazySrcLoc, -result_insts_len: u8 = undefined, -result_relocs_len: u8 = undefined, -result_insts: [ - @max( - 1, // non-pseudo instructions - 3, // (ELF only) TLS local dynamic (LD) sequence in PIC mode - 2, // cmovcc: cmovcc \ cmovcc - 3, // setcc: setcc \ setcc \ logicop - 2, // jcc: jcc \ jcc - pseudo_probe_align_insts, - pseudo_probe_adjust_unrolled_max_insts, - pseudo_probe_adjust_setup_insts, - pseudo_probe_adjust_loop_insts, - abi.Win64.callee_preserved_regs.len * 2, // push_regs/pop_regs - abi.SysV.callee_preserved_regs.len * 2, // push_regs/pop_regs - ) -]Instruction = undefined, -result_relocs: [ - @max( - 1, // jmp/jcc/call/mov/lea: jmp/jcc/call/mov/lea - 2, // jcc: jcc \ jcc - 2, // test \ jcc \ probe \ sub \ jmp - 1, // probe \ sub \ jcc - 3, // (ELF only) TLS local dynamic (LD) sequence in PIC mode - ) -]Reloc = undefined, +result_insts_len: ResultInstIndex = undefined, +result_insts: [max_result_insts]Instruction = undefined, +result_relocs_len: ResultRelocIndex = undefined, +result_relocs: [max_result_relocs]Reloc = undefined, + +const max_result_insts = @max( + 1, // non-pseudo instructions + 3, // (ELF only) TLS local dynamic (LD) sequence in PIC mode + 2, // cmovcc: cmovcc \ cmovcc + 3, // setcc: setcc \ setcc \ logicop + 2, // jcc: jcc \ jcc + pseudo_probe_align_insts, + pseudo_probe_adjust_unrolled_max_insts, + pseudo_probe_adjust_setup_insts, + pseudo_probe_adjust_loop_insts, + abi.Win64.callee_preserved_regs.len * 2, // push_regs/pop_regs + abi.SysV.callee_preserved_regs.len * 2, // push_regs/pop_regs +); +const max_result_relocs = @max( + 1, // jmp/jcc/call/mov/lea: jmp/jcc/call/mov/lea + 2, // jcc: jcc \ jcc + 2, // test \ jcc \ probe \ sub \ jmp + 1, // probe \ sub \ jcc + 3, // (ELF only) TLS local dynamic (LD) sequence in PIC mode +); + +const ResultInstIndex = std.math.IntFittingRange(0, max_result_insts - 1); +const ResultRelocIndex = std.math.IntFittingRange(0, max_result_relocs - 1); +const InstOpIndex = std.math.IntFittingRange( + 0, + @typeInfo(@FieldType(Instruction, "ops")).array.len - 1, +); pub const pseudo_probe_align_insts = 5; // test \ jcc \ probe \ sub \ jmp pub const pseudo_probe_adjust_unrolled_max_insts = @@ -51,7 +57,8 @@ pub const Error = error{ }; pub const Reloc = struct { - lowered_inst_index: u8, + lowered_inst_index: ResultInstIndex, + op_index: InstOpIndex, target: Target, off: i32, @@ -114,11 +121,11 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { assert(inst.data.rx.fixes == ._); try lower.emit(.none, .cmovnz, &.{ .{ .reg = inst.data.rx.r1 }, - .{ .mem = lower.mem(inst.data.rx.payload) }, + .{ .mem = lower.mem(1, inst.data.rx.payload) }, }); try lower.emit(.none, .cmovp, &.{ .{ .reg = inst.data.rx.r1 }, - .{ .mem = lower.mem(inst.data.rx.payload) }, + .{ .mem = lower.mem(1, inst.data.rx.payload) }, }); }, .pseudo_set_z_and_np_r => { @@ -137,13 +144,13 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .pseudo_set_z_and_np_m => { assert(inst.data.rx.fixes == ._); try lower.emit(.none, .setz, &.{ - .{ .mem = lower.mem(inst.data.rx.payload) }, + .{ .mem = lower.mem(0, inst.data.rx.payload) }, }); try lower.emit(.none, .setnp, &.{ .{ .reg = inst.data.rx.r1 }, }); try lower.emit(.none, .@"and", &.{ - .{ .mem = lower.mem(inst.data.rx.payload) }, + .{ .mem = lower.mem(0, inst.data.rx.payload) }, .{ .reg = inst.data.rx.r1 }, }); }, @@ -163,32 +170,32 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .pseudo_set_nz_or_p_m => { assert(inst.data.rx.fixes == ._); try lower.emit(.none, .setnz, &.{ - .{ .mem = lower.mem(inst.data.rx.payload) }, + .{ .mem = lower.mem(0, inst.data.rx.payload) }, }); try lower.emit(.none, .setp, &.{ .{ .reg = inst.data.rx.r1 }, }); try lower.emit(.none, .@"or", &.{ - .{ .mem = lower.mem(inst.data.rx.payload) }, + .{ .mem = lower.mem(0, inst.data.rx.payload) }, .{ .reg = inst.data.rx.r1 }, }); }, .pseudo_j_z_and_np_inst => { assert(inst.data.inst.fixes == ._); try lower.emit(.none, .jnz, &.{ - .{ .imm = lower.reloc(.{ .inst = index + 1 }, 0) }, + .{ .imm = lower.reloc(0, .{ .inst = index + 1 }, 0) }, }); try lower.emit(.none, .jnp, &.{ - .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }, 0) }, + .{ .imm = lower.reloc(0, .{ .inst = inst.data.inst.inst }, 0) }, }); }, .pseudo_j_nz_or_p_inst => { assert(inst.data.inst.fixes == ._); try lower.emit(.none, .jnz, &.{ - .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }, 0) }, + .{ .imm = lower.reloc(0, .{ .inst = inst.data.inst.inst }, 0) }, }); try lower.emit(.none, .jp, &.{ - .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }, 0) }, + .{ .imm = lower.reloc(0, .{ .inst = inst.data.inst.inst }, 0) }, }); }, @@ -198,7 +205,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .{ .imm = .s(@bitCast(inst.data.ri.i)) }, }); try lower.emit(.none, .jz, &.{ - .{ .imm = lower.reloc(.{ .inst = index + 1 }, 0) }, + .{ .imm = lower.reloc(0, .{ .inst = index + 1 }, 0) }, }); try lower.emit(.none, .lea, &.{ .{ .reg = inst.data.ri.r1 }, @@ -214,7 +221,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .{ .reg = inst.data.ri.r1.to32() }, }); try lower.emit(.none, .jmp, &.{ - .{ .imm = lower.reloc(.{ .inst = index }, 0) }, + .{ .imm = lower.reloc(0, .{ .inst = index }, 0) }, }); assert(lower.result_insts_len == pseudo_probe_align_insts); }, @@ -260,7 +267,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .{ .imm = .s(page_size) }, }); try lower.emit(.none, .jae, &.{ - .{ .imm = lower.reloc(.{ .inst = index }, 0) }, + .{ .imm = lower.reloc(0, .{ .inst = index }, 0) }, }); assert(lower.result_insts_len == pseudo_probe_adjust_loop_insts); }, @@ -382,21 +389,22 @@ pub fn imm(lower: *const Lower, ops: Mir.Inst.Ops, i: u32) Immediate { }; } -pub fn mem(lower: *Lower, payload: u32) Memory { +pub fn mem(lower: *Lower, op_index: InstOpIndex, payload: u32) Memory { var m = lower.mir.resolveFrameLoc(lower.mir.extraData(Mir.Memory, payload).data).decode(); switch (m) { .sib => |*sib| switch (sib.base) { else => {}, - .table => sib.disp = lower.reloc(.table, sib.disp).signed, + .table => sib.disp = lower.reloc(op_index, .table, sib.disp).signed, }, else => {}, } return m; } -fn reloc(lower: *Lower, target: Reloc.Target, off: i32) Immediate { +fn reloc(lower: *Lower, op_index: InstOpIndex, target: Reloc.Target, off: i32) Immediate { lower.result_relocs[lower.result_relocs_len] = .{ .lowered_inst_index = lower.result_insts_len, + .op_index = op_index, .target = target, .off = off, }; @@ -409,7 +417,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) var emit_mnemonic = mnemonic; var emit_ops_storage: [4]Operand = undefined; const emit_ops = emit_ops_storage[0..ops.len]; - for (emit_ops, ops) |*emit_op, op| { + for (emit_ops, ops, 0..) |*emit_op, op, op_index| { emit_op.* = switch (op) { else => op, .mem => |mem_op| switch (mem_op.base()) { @@ -428,20 +436,20 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) if (lower.pic) { // Here, we currently assume local dynamic TLS vars, and so // we emit LD model. - _ = lower.reloc(.{ .linker_tlsld = sym_index }, 0); + _ = lower.reloc(1, .{ .linker_tlsld = sym_index }, 0); lower.result_insts[lower.result_insts_len] = try .new(.none, .lea, &.{ .{ .reg = .rdi }, .{ .mem = Memory.initRip(.none, 0) }, }, lower.target); lower.result_insts_len += 1; - _ = lower.reloc(.{ + _ = lower.reloc(0, .{ .linker_extern_fn = try elf_file.getGlobalSymbol("__tls_get_addr", null), }, 0); lower.result_insts[lower.result_insts_len] = try .new(.none, .call, &.{ .{ .imm = .s(0) }, }, lower.target); lower.result_insts_len += 1; - _ = lower.reloc(.{ .linker_dtpoff = sym_index }, 0); + _ = lower.reloc(@intCast(op_index), .{ .linker_dtpoff = sym_index }, 0); emit_mnemonic = .lea; break :op .{ .mem = Memory.initSib(.none, .{ .base = .{ .reg = .rax }, @@ -454,7 +462,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) .{ .mem = Memory.initSib(.qword, .{ .base = .{ .reg = .fs } }) }, }, lower.target); lower.result_insts_len += 1; - _ = lower.reloc(.{ .linker_reloc = sym_index }, 0); + _ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0); emit_mnemonic = .lea; break :op .{ .mem = Memory.initSib(.none, .{ .base = .{ .reg = .rax }, @@ -463,15 +471,17 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) } } - _ = lower.reloc(.{ .linker_reloc = sym_index }, 0); if (lower.pic) switch (mnemonic) { - .lea => if (elf_sym.flags.is_extern_ptr) { + .lea => { + _ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0); + if (!elf_sym.flags.is_extern_ptr) break :op .{ .mem = Memory.initRip(.none, 0) }; emit_mnemonic = .mov; break :op .{ .mem = Memory.initRip(.ptr, 0) }; - } else break :op .{ .mem = Memory.initRip(.none, 0) }, + }, .mov => { if (elf_sym.flags.is_extern_ptr) { const reg = ops[0].reg; + _ = lower.reloc(1, .{ .linker_reloc = sym_index }, 0); lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{ .{ .reg = reg.to64() }, .{ .mem = Memory.initRip(.qword, 0) }, @@ -481,10 +491,13 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) .reg = reg.to64(), } }) }; } + _ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0); break :op .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) }; }, else => unreachable, - } else switch (mnemonic) { + }; + _ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0); + switch (mnemonic) { .call => break :op .{ .mem = Memory.initSib(mem_op.sib.ptr_size, .{ .base = .{ .reg = .ds }, }) }, @@ -502,7 +515,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) const macho_sym = zo.symbols.items[sym_index]; if (macho_sym.flags.tlv) { - _ = lower.reloc(.{ .linker_reloc = sym_index }, 0); + _ = lower.reloc(1, .{ .linker_reloc = sym_index }, 0); lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{ .{ .reg = .rdi }, .{ .mem = Memory.initRip(.ptr, 0) }, @@ -516,15 +529,17 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) break :op .{ .reg = .rax }; } - _ = lower.reloc(.{ .linker_reloc = sym_index }, 0); break :op switch (mnemonic) { - .lea => if (macho_sym.flags.is_extern_ptr) { + .lea => { + _ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0); + if (!macho_sym.flags.is_extern_ptr) break :op .{ .mem = Memory.initRip(.none, 0) }; emit_mnemonic = .mov; break :op .{ .mem = Memory.initRip(.ptr, 0) }; - } else break :op .{ .mem = Memory.initRip(.none, 0) }, + }, .mov => { if (macho_sym.flags.is_extern_ptr) { const reg = ops[0].reg; + _ = lower.reloc(1, .{ .linker_reloc = sym_index }, 0); lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{ .{ .reg = reg.to64() }, .{ .mem = Memory.initRip(.qword, 0) }, @@ -534,6 +549,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) .reg = reg.to64(), } }) }; } + _ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0); break :op .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) }; }, else => unreachable, @@ -550,7 +566,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) } fn generic(lower: *Lower, inst: Mir.Inst) Error!void { - @setEvalBranchQuota(2_400); + @setEvalBranchQuota(2_500); const fixes = switch (inst.ops) { .none => inst.data.none.fixes, .inst => inst.data.inst.fixes, @@ -595,7 +611,7 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void { }, switch (inst.ops) { .none => &.{}, .inst => &.{ - .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }, 0) }, + .{ .imm = lower.reloc(0, .{ .inst = inst.data.inst.inst }, 0) }, }, .i_s, .i_u => &.{ .{ .imm = lower.imm(inst.ops, inst.data.i.i) }, @@ -642,10 +658,10 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void { .{ .imm = lower.imm(inst.ops, inst.data.rri.i) }, }, .m => &.{ - .{ .mem = lower.mem(inst.data.x.payload) }, + .{ .mem = lower.mem(0, inst.data.x.payload) }, }, .mi_s, .mi_u => &.{ - .{ .mem = lower.mem(inst.data.x.payload + 1) }, + .{ .mem = lower.mem(0, inst.data.x.payload + 1) }, .{ .imm = lower.imm( inst.ops, lower.mir.extraData(Mir.Imm32, inst.data.x.payload).data.imm, @@ -653,64 +669,64 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void { }, .rm => &.{ .{ .reg = inst.data.rx.r1 }, - .{ .mem = lower.mem(inst.data.rx.payload) }, + .{ .mem = lower.mem(1, inst.data.rx.payload) }, }, .rmr => &.{ .{ .reg = inst.data.rrx.r1 }, - .{ .mem = lower.mem(inst.data.rrx.payload) }, + .{ .mem = lower.mem(1, inst.data.rrx.payload) }, .{ .reg = inst.data.rrx.r2 }, }, .rmi => &.{ .{ .reg = inst.data.rix.r1 }, - .{ .mem = lower.mem(inst.data.rix.payload) }, + .{ .mem = lower.mem(1, inst.data.rix.payload) }, .{ .imm = lower.imm(inst.ops, inst.data.rix.i) }, }, .rmi_s, .rmi_u => &.{ .{ .reg = inst.data.rx.r1 }, - .{ .mem = lower.mem(inst.data.rx.payload + 1) }, + .{ .mem = lower.mem(1, inst.data.rx.payload + 1) }, .{ .imm = lower.imm( inst.ops, lower.mir.extraData(Mir.Imm32, inst.data.rx.payload).data.imm, ) }, }, .mr => &.{ - .{ .mem = lower.mem(inst.data.rx.payload) }, + .{ .mem = lower.mem(0, inst.data.rx.payload) }, .{ .reg = inst.data.rx.r1 }, }, .mrr => &.{ - .{ .mem = lower.mem(inst.data.rrx.payload) }, + .{ .mem = lower.mem(0, inst.data.rrx.payload) }, .{ .reg = inst.data.rrx.r1 }, .{ .reg = inst.data.rrx.r2 }, }, .mri => &.{ - .{ .mem = lower.mem(inst.data.rix.payload) }, + .{ .mem = lower.mem(0, inst.data.rix.payload) }, .{ .reg = inst.data.rix.r1 }, .{ .imm = lower.imm(inst.ops, inst.data.rix.i) }, }, .rrm => &.{ .{ .reg = inst.data.rrx.r1 }, .{ .reg = inst.data.rrx.r2 }, - .{ .mem = lower.mem(inst.data.rrx.payload) }, + .{ .mem = lower.mem(2, inst.data.rrx.payload) }, }, .rrmr => &.{ .{ .reg = inst.data.rrrx.r1 }, .{ .reg = inst.data.rrrx.r2 }, - .{ .mem = lower.mem(inst.data.rrrx.payload) }, + .{ .mem = lower.mem(2, inst.data.rrrx.payload) }, .{ .reg = inst.data.rrrx.r3 }, }, .rrmi => &.{ .{ .reg = inst.data.rrix.r1 }, .{ .reg = inst.data.rrix.r2 }, - .{ .mem = lower.mem(inst.data.rrix.payload) }, + .{ .mem = lower.mem(2, inst.data.rrix.payload) }, .{ .imm = lower.imm(inst.ops, inst.data.rrix.i) }, }, .extern_fn_reloc, .rel => &.{ - .{ .imm = lower.reloc(.{ .linker_extern_fn = inst.data.reloc.sym_index }, inst.data.reloc.off) }, + .{ .imm = lower.reloc(0, .{ .linker_extern_fn = inst.data.reloc.sym_index }, inst.data.reloc.off) }, }, .got_reloc, .direct_reloc, .import_reloc => ops: { const reg = inst.data.rx.r1; const extra = lower.mir.extraData(bits.SymbolOffset, inst.data.rx.payload).data; - _ = lower.reloc(switch (inst.ops) { + _ = lower.reloc(1, switch (inst.ops) { .got_reloc => .{ .linker_got = extra.sym_index }, .direct_reloc => .{ .linker_direct = extra.sym_index }, .import_reloc => .{ .linker_import = extra.sym_index }, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index b03e363903..c9482d0890 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -100,6 +100,8 @@ pub const Inst = struct { /// ___ Division _d, + /// ___ Without Affecting Flags + _x, /// ___ Left _l, /// ___ Left Double @@ -483,6 +485,7 @@ pub const Inst = struct { /// ASCII adjust al after subtraction aa, /// Add with carry + /// Unsigned integer addition of two operands with carry flag adc, /// Add /// Add packed integers @@ -1162,10 +1165,8 @@ pub const Inst = struct { fmadd231, // ADX - /// Unsigned integer addition of two operands with carry flag - adcx, /// Unsigned integer addition of two operands with overflow flag - adox, + ado, // AESKLE /// Encode 128-bit key with key locker diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index f8d7779a8d..2dcacd9ad5 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -405,9 +405,9 @@ pub const table = [_]Entry{ .{ .jb, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none }, .{ .jbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none, .none }, .{ .jc, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none }, - .{ .jcxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .short, .@"32bit" }, - .{ .jecxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .none, .@"32bit" }, - .{ .jrcxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .none, .@"64bit" }, + .{ .jcxz, .d, &.{ .rel8 }, &.{ 0xe3 }, 0, .short, .@"32bit" }, + .{ .jecxz, .d, &.{ .rel8 }, &.{ 0xe3 }, 0, .none, .@"32bit" }, + .{ .jrcxz, .d, &.{ .rel8 }, &.{ 0xe3 }, 0, .none, .@"64bit" }, .{ .je, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none, .none }, .{ .jg, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none, .none }, .{ .jge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none, .none }, @@ -477,10 +477,6 @@ pub const table = [_]Entry{ .{ .ltr, .m, &.{ .rm16 }, &.{ 0x0f, 0x00 }, 3, .none, .none }, - .{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .short, .lzcnt }, - .{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none, .lzcnt }, - .{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long, .lzcnt }, - .{ .mfence, .z, &.{}, &.{ 0x0f, 0xae, 0xf0 }, 0, .none, .none }, .{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .none, .none }, @@ -630,10 +626,6 @@ pub const table = [_]Entry{ .{ .pop, .m, &.{ .rm16 }, &.{ 0x8f }, 0, .short, .none }, .{ .pop, .m, &.{ .rm64 }, &.{ 0x8f }, 0, .none, .none }, - .{ .popcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .short, .popcnt }, - .{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none, .popcnt }, - .{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long, .popcnt }, - .{ .popf, .z, &.{}, &.{ 0x9d }, 0, .short, .none }, .{ .popfd, .z, &.{}, &.{ 0x9d }, 0, .none, .@"32bit" }, .{ .popfq, .z, &.{}, &.{ 0x9d }, 0, .none, .@"64bit" }, @@ -1738,6 +1730,15 @@ pub const table = [_]Entry{ .{ .pcmpgtq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x37 }, 0, .none, .sse4_2 }, + // ABM + .{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .short, .lzcnt }, + .{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none, .lzcnt }, + .{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long, .lzcnt }, + + .{ .popcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .short, .popcnt }, + .{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none, .popcnt }, + .{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long, .popcnt }, + // PCLMUL .{ .pclmulqdq, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x44 }, 0, .none, .pclmul }, @@ -1771,38 +1772,6 @@ pub const table = [_]Entry{ .{ .sha256msg2, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x38, 0xcd }, 0, .none, .sha }, // AVX - .{ .andn, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf2 }, 0, .vex_lz_w0, .bmi }, - .{ .andn, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf2 }, 0, .vex_lz_w1, .bmi }, - - .{ .bextr, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi }, - .{ .bextr, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi }, - - .{ .blsi, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 3, .vex_lz_w0, .bmi }, - .{ .blsi, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 3, .vex_lz_w1, .bmi }, - - .{ .blsmsk, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 2, .vex_lz_w0, .bmi }, - .{ .blsmsk, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 2, .vex_lz_w1, .bmi }, - - .{ .blsr, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 1, .vex_lz_w0, .bmi }, - .{ .blsr, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 1, .vex_lz_w1, .bmi }, - - .{ .bzhi, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w0, .bmi2 }, - .{ .bzhi, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w1, .bmi2 }, - - .{ .rorx, .rmi, &.{ .r32, .rm32, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w0, .bmi2 }, - .{ .rorx, .rmi, &.{ .r64, .rm64, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w1, .bmi2 }, - - .{ .sarx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0xf3, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 }, - .{ .shlx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 }, - .{ .shrx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 }, - .{ .sarx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf3, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 }, - .{ .shlx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 }, - .{ .shrx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 }, - - .{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .short, .bmi }, - .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .bmi }, - .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long, .bmi }, - .{ .vaddpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_128_wig, .avx }, .{ .vaddpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_256_wig, .avx }, @@ -2307,6 +2276,49 @@ pub const table = [_]Entry{ .{ .vxorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .vex_128_wig, .avx }, .{ .vxorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x57 }, 0, .vex_256_wig, .avx }, + // BMI + .{ .andn, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf2 }, 0, .vex_lz_w0, .bmi }, + .{ .andn, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf2 }, 0, .vex_lz_w1, .bmi }, + + .{ .bextr, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi }, + .{ .bextr, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi }, + + .{ .blsi, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 3, .vex_lz_w0, .bmi }, + .{ .blsi, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 3, .vex_lz_w1, .bmi }, + + .{ .blsmsk, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 2, .vex_lz_w0, .bmi }, + .{ .blsmsk, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 2, .vex_lz_w1, .bmi }, + + .{ .blsr, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 1, .vex_lz_w0, .bmi }, + .{ .blsr, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 1, .vex_lz_w1, .bmi }, + + .{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .short, .bmi }, + .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .bmi }, + .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long, .bmi }, + + // BMI2 + .{ .bzhi, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w0, .bmi2 }, + .{ .bzhi, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w1, .bmi2 }, + + .{ .mulx, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0xf2, 0x0f, 0x38, 0xf6 }, 0, .vex_lz_w0, .bmi2 }, + .{ .mulx, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0xf2, 0x0f, 0x38, 0xf6 }, 0, .vex_lz_w1, .bmi2 }, + + .{ .pdep, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0xf2, 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w0, .bmi2 }, + .{ .pdep, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0xf2, 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w1, .bmi2 }, + + .{ .pext, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0xf3, 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w0, .bmi2 }, + .{ .pext, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0xf3, 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w1, .bmi2 }, + + .{ .rorx, .rmi, &.{ .r32, .rm32, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w0, .bmi2 }, + .{ .rorx, .rmi, &.{ .r64, .rm64, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w1, .bmi2 }, + + .{ .sarx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0xf3, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 }, + .{ .shlx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 }, + .{ .shrx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 }, + .{ .sarx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf3, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 }, + .{ .shlx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 }, + .{ .shrx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 }, + // F16C .{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c }, .{ .vcvtph2ps, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_256_w0, .f16c }, diff --git a/test/behavior/x86_64/build.zig b/test/behavior/x86_64/build.zig index d18d5eb5ee..967d061be1 100644 --- a/test/behavior/x86_64/build.zig +++ b/test/behavior/x86_64/build.zig @@ -93,6 +93,11 @@ pub fn build(b: *std.Build) void { .cpu_arch = .x86_64, .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 }, }, + .{ + .cpu_arch = .x86_64, + .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 }, + .cpu_features_add = std.Target.x86.featureSet(&.{.adx}), + }, .{ .cpu_arch = .x86_64, .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v4 }, diff --git a/test/behavior/x86_64/math.zig b/test/behavior/x86_64/math.zig index f180ade402..bd498e3596 100644 --- a/test/behavior/x86_64/math.zig +++ b/test/behavior/x86_64/math.zig @@ -44,6 +44,17 @@ fn AddOneBit(comptime Type: type) type { .vector => |vector| @Vector(vector.len, ResultScalar), }; } +fn DoubleBits(comptime Type: type) type { + const ResultScalar = switch (@typeInfo(Scalar(Type))) { + .int => |int| @Type(.{ .int = .{ .signedness = int.signedness, .bits = int.bits * 2 } }), + .float => Scalar(Type), + else => @compileError(@typeName(Type)), + }; + return switch (@typeInfo(Type)) { + else => ResultScalar, + .vector => |vector| @Vector(vector.len, ResultScalar), + }; +} // inline to avoid a runtime `@splat` inline fn splat(comptime Type: type, scalar: Scalar(Type)) Type { return switch (@typeInfo(Type)) { @@ -16216,6 +16227,8 @@ fn binary(comptime op: anytype, comptime opts: struct { compare: Compare = .rela ); } fn testInts() !void { + try testArgs(i4, 0x3, 0x2); + try testArgs(u4, 0xe, 0x6); try testArgs(i8, 0x48, 0x6c); try testArgs(u8, 0xbb, 0x43); try testArgs(i16, -0x0fdf, 0x302e); @@ -18993,6 +19006,15 @@ test subUnsafe { try test_sub_unsafe.testFloatVectors(); } +inline fn mulUnsafe(comptime Type: type, lhs: Type, rhs: Type) DoubleBits(Type) { + @setRuntimeSafety(false); + return @as(DoubleBits(Type), lhs) * rhs; +} +test mulUnsafe { + const test_mul_unsafe = binary(mulUnsafe, .{}); + try test_mul_unsafe.testInts(); +} + inline fn multiply(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs * rhs) { if (@inComptime() and @typeInfo(Type) == .vector) { // workaround https://github.com/ziglang/zig/issues/22743