x86_64: rewrite unsafe scalar int multiplication

This commit is contained in:
Jacob Young 2025-02-15 03:45:08 -05:00
parent 5db585fcde
commit dcc9fe322e
8 changed files with 616 additions and 151 deletions

View File

@ -6036,10 +6036,377 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.sub_safe => unreachable, .sub_safe => unreachable,
.mul, .mul_optimized => |air_tag| if (use_old) try cg.airMulDivBinOp(inst, .mul) else fallback: { .mul, .mul_optimized => |air_tag| if (use_old) try cg.airMulDivBinOp(inst, .mul) else fallback: {
const bin_op = air_datas[@intFromEnum(inst)].bin_op; const bin_op = air_datas[@intFromEnum(inst)].bin_op;
if (cg.floatBits(cg.typeOf(bin_op.lhs).scalarType(zcu)) == null) break :fallback try cg.airMulDivBinOp(inst, .mul); const ty = cg.typeOf(bin_op.lhs);
if (ty.isVector(zcu) and cg.floatBits(ty.childType(zcu)) == null) break :fallback try cg.airMulDivBinOp(inst, .mul);
var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
var res: [1]Temp = undefined; var res: [1]Temp = undefined;
cg.select(&res, &.{cg.typeOf(bin_op.lhs)}, &ops, comptime &.{ .{ cg.select(&res, &.{ty}, &ops, comptime &.{ .{
.src_constraints = .{ .{ .signed_int = .byte }, .{ .signed_int = .byte }, .any },
.patterns = &.{
.{ .src = .{ .{ .to_reg = .al }, .mem, .none } },
.{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } },
.{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, .i_, .mul, .src1b, ._, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any },
.patterns = &.{
.{ .src = .{ .{ .to_reg = .al }, .mem, .none } },
.{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } },
.{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mul, .src1b, ._, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .int = .word }, .{ .int = .word }, .any },
.patterns = &.{
.{ .src = .{ .mem, .imm16, .none } },
.{ .src = .{ .imm16, .mem, .none }, .commute = .{ 0, 1 } },
.{ .src = .{ .to_gpr, .imm16, .none } },
.{ .src = .{ .imm16, .to_gpr, .none }, .commute = .{ 0, 1 } },
},
.dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, .i_, .mul, .dst0w, .src0w, .src1w, ._ },
} },
}, .{
.src_constraints = .{ .{ .int = .word }, .{ .int = .word }, .any },
.patterns = &.{
.{ .src = .{ .to_mut_gpr, .mem, .none } },
.{ .src = .{ .mem, .to_mut_gpr, .none }, .commute = .{ 0, 1 } },
.{ .src = .{ .to_mut_gpr, .to_gpr, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, .i_, .mul, .dst0w, .src1w, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .int = .dword }, .{ .int = .dword }, .any },
.patterns = &.{
.{ .src = .{ .mem, .imm32, .none } },
.{ .src = .{ .imm32, .mem, .none }, .commute = .{ 0, 1 } },
.{ .src = .{ .to_gpr, .imm32, .none } },
.{ .src = .{ .imm32, .to_gpr, .none }, .commute = .{ 0, 1 } },
},
.dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, .i_, .mul, .dst0d, .src0d, .src1d, ._ },
} },
}, .{
.src_constraints = .{ .{ .int = .dword }, .{ .int = .dword }, .any },
.patterns = &.{
.{ .src = .{ .to_mut_gpr, .mem, .none } },
.{ .src = .{ .mem, .to_mut_gpr, .none }, .commute = .{ 0, 1 } },
.{ .src = .{ .to_mut_gpr, .to_gpr, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, .i_, .mul, .dst0d, .src1d, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{ .{ .int = .qword }, .{ .int = .qword }, .any },
.patterns = &.{
.{ .src = .{ .mem, .simm32, .none } },
.{ .src = .{ .simm32, .mem, .none }, .commute = .{ 0, 1 } },
.{ .src = .{ .to_gpr, .simm32, .none } },
.{ .src = .{ .simm32, .to_gpr, .none }, .commute = .{ 0, 1 } },
},
.dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, .i_, .mul, .dst0q, .src0q, .src1q, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{ .{ .int = .qword }, .{ .int = .qword }, .any },
.patterns = &.{
.{ .src = .{ .to_mut_gpr, .mem, .none } },
.{ .src = .{ .mem, .to_mut_gpr, .none }, .commute = .{ 0, 1 } },
.{ .src = .{ .to_mut_gpr, .to_gpr, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, .i_, .mul, .dst0q, .src1q, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{ .{ .int = .xword }, .{ .int = .xword }, .any },
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem, .none } },
},
.extra_temps = .{
.{ .type = .u64, .kind = .{ .reg = .rax } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0q, .src0q, ._, ._ },
.{ ._, ._, .mul, .src1q, ._, ._, ._ },
.{ ._, ._, .mov, .dst0q, .tmp0q, ._, ._ },
.{ ._, ._, .mov, .tmp0q, .src0q, ._, ._ },
.{ ._, .i_, .mul, .tmp0q, .memd(.src1q, 8), ._, ._ },
.{ ._, ._, .add, .tmp1q, .tmp0q, ._, ._ },
.{ ._, ._, .mov, .tmp0q, .src1q, ._, ._ },
.{ ._, .i_, .mul, .tmp0q, .memd(.src0q, 8), ._, ._ },
.{ ._, ._, .add, .tmp1q, .tmp0q, ._, ._ },
.{ ._, ._, .mov, .memd(.dst0q, 8), .tmp1q, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", .bmi2, .adx, null },
.src_constraints = .{
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
.any,
},
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem, .none } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.{ .type = .isize, .kind = .{ .reg = .rcx } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ },
.{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
.{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
.{ ._, ._, .@"or", .tmp2q, .memi(.src0q, .tmp0), ._, ._ },
.{ ._, ._z, .j, .@"2f", ._, ._, ._ },
.{ ._, ._, .lea, .tmp3p, .leaad(.tmp0, .sub_src0_size, 8), ._, ._ },
.{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
.{ .@"1:", ._x, .mul, .tmp6q, .tmp5q, .leai(.tmp1q, .tmp3), ._ },
.{ ._, ._x, .adc, .tmp5q, .tmp4q, ._, ._ },
.{ ._, ._, .mov, .memiad(.dst0q, .tmp3, .add_size, -8), .tmp5q, ._, ._ },
.{ ._, ._rcxz, .j, .@"1f", ._, ._, ._ },
.{ ._, ._x, .ado, .tmp6q, .memia(.dst0q, .tmp3, .add_size), ._, ._ },
.{ ._, ._, .mov, .tmp4q, .tmp6q, ._, ._ },
.{ ._, ._, .lea, .tmp3p, .lead(.tmp3, 8), ._, ._ },
.{ ._, ._mp, .j, .@"1b", ._, ._, ._ },
.{ .@"2:", ._, .mov, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ },
.{ .@"1:", ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", .bmi2, .slow_incdec, null },
.src_constraints = .{
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
.any,
},
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem, .none } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_size_div_8), ._, ._ },
.{ ._, ._, .lea, .tmp1p, .memd(.src1, 8), ._, ._ },
.{ .@"0:", ._, .lea, .tmp2p, .leaa(.tmp0, .sub_src0_size_div_8), ._, ._ },
.{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
.{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
.{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ },
.{ ._, ._, .@"or", .tmp3q, .memsi(.src0q, .@"8", .tmp0), ._, ._ },
.{ ._, ._nz, .j, .@"2f", ._, ._, ._ },
.{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp3q, ._, ._ },
.{ ._, ._mp, .j, .@"3f", ._, ._, ._ },
.{ .@"1:", ._, .adc, .tmp7q, .memsia(.dst0q, .@"8", .tmp2, .add_size), ._, ._ },
.{ ._, ._, .adc, .tmp4b, .si(0), ._, ._ },
.{ ._, ._, .mov, .tmp5q, .tmp7q, ._, ._ },
.{ ._, ._l, .sh, .tmp4b, .ui(4), ._, ._ },
.{ .@"2:", ._x, .mul, .tmp7q, .tmp6q, .leasi(.tmp1q, .@"8", .tmp2), ._ },
.{ ._, ._, .adc, .tmp6q, .tmp5q, ._, ._ },
.{ ._, ._, .mov, .memsia(.dst0q, .@"8", .tmp2, .add_size), .tmp6q, ._, ._ },
.{ ._, ._c, .in, .tmp2p, ._, ._, ._ },
.{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
.{ .@"3:", ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", .bmi2, null, null },
.src_constraints = .{
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
.any,
},
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem, .none } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_size_div_8), ._, ._ },
.{ ._, ._, .lea, .tmp1p, .memd(.src1, 8), ._, ._ },
.{ .@"0:", ._, .lea, .tmp2p, .leaa(.tmp0, .sub_src0_size_div_8), ._, ._ },
.{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
.{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
.{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ },
.{ ._, ._, .@"or", .tmp3q, .memsi(.src0q, .@"8", .tmp0), ._, ._ },
.{ ._, ._nz, .j, .@"2f", ._, ._, ._ },
.{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp3q, ._, ._ },
.{ ._, ._mp, .j, .@"3f", ._, ._, ._ },
.{ .@"1:", ._, .adc, .tmp7q, .memsia(.dst0q, .@"8", .tmp2, .add_size), ._, ._ },
.{ ._, ._, .adc, .tmp4b, .si(0), ._, ._ },
.{ ._, ._, .mov, .tmp5q, .tmp7q, ._, ._ },
.{ ._, ._l, .sh, .tmp4b, .ui(4), ._, ._ },
.{ .@"2:", ._x, .mul, .tmp7q, .tmp6q, .leasi(.tmp1q, .@"8", .tmp2), ._ },
.{ ._, ._, .adc, .tmp6q, .tmp5q, ._, ._ },
.{ ._, ._, .mov, .memsia(.dst0q, .@"8", .tmp2, .add_size), .tmp6q, ._, ._ },
.{ ._, ._c, .in, .tmp2p, ._, ._, ._ },
.{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
.{ .@"3:", ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
.{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
.{ ._, ._ns, .j, .@"0b", ._, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", .slow_incdec, null, null },
.src_constraints = .{
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
.any,
},
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem, .none } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .reg = .rax } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_size_div_8), ._, ._ },
.{ ._, ._, .lea, .tmp1p, .memd(.src1, 8), ._, ._ },
.{ .@"0:", ._, .lea, .tmp2p, .leaa(.tmp0, .sub_src0_size_div_8), ._, ._ },
.{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
.{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
.{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ },
.{ ._, ._, .@"or", .tmp3q, .memsi(.src0q, .@"8", .tmp0), ._, ._ },
.{ ._, ._nz, .j, .@"2f", ._, ._, ._ },
.{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp3q, ._, ._ },
.{ ._, ._mp, .j, .@"3f", ._, ._, ._ },
.{ .@"1:", ._, .adc, .tmp7q, .memsia(.dst0q, .@"8", .tmp2, .add_size), ._, ._ },
.{ ._, ._, .adc, .tmp4b, .si(0), ._, ._ },
.{ ._, ._, .mov, .tmp5q, .tmp7q, ._, ._ },
.{ .@"2:", ._, .mov, .tmp6q, .tmp3q, ._, ._ },
.{ ._, ._, .mul, .leasi(.tmp1q, .@"8", .tmp2), ._, ._, ._ },
.{ ._, ._l, .sh, .tmp4b, .ui(4), ._, ._ },
.{ ._, ._, .adc, .tmp6q, .tmp5q, ._, ._ },
.{ ._, ._, .mov, .memsia(.dst0q, .@"8", .tmp2, .add_size), .tmp6q, ._, ._ },
.{ ._, ._c, .in, .tmp2p, ._, ._, ._ },
.{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
.{ .@"3:", ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
.any,
},
.patterns = &.{
.{ .src = .{ .to_mem, .to_mem, .none } },
},
.extra_temps = .{
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .reg = .rax } },
.{ .type = .u64, .kind = .{ .reg = .rdx } },
.unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_size_div_8), ._, ._ },
.{ ._, ._, .lea, .tmp1p, .memd(.src1, 8), ._, ._ },
.{ .@"0:", ._, .lea, .tmp2p, .leaa(.tmp0, .sub_src0_size_div_8), ._, ._ },
.{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
.{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
.{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ },
.{ ._, ._, .@"or", .tmp3q, .memsi(.src0q, .@"8", .tmp0), ._, ._ },
.{ ._, ._nz, .j, .@"2f", ._, ._, ._ },
.{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp3q, ._, ._ },
.{ ._, ._mp, .j, .@"3f", ._, ._, ._ },
.{ .@"1:", ._, .adc, .tmp7q, .memsia(.dst0q, .@"8", .tmp2, .add_size), ._, ._ },
.{ ._, ._, .adc, .tmp4b, .si(0), ._, ._ },
.{ ._, ._, .mov, .tmp5q, .tmp7q, ._, ._ },
.{ .@"2:", ._, .mov, .tmp6q, .tmp3q, ._, ._ },
.{ ._, ._, .mul, .leasi(.tmp1q, .@"8", .tmp2), ._, ._, ._ },
.{ ._, ._l, .sh, .tmp4b, .ui(4), ._, ._ },
.{ ._, ._, .adc, .tmp6q, .tmp5q, ._, ._ },
.{ ._, ._, .mov, .memsia(.dst0q, .@"8", .tmp2, .add_size), .tmp6q, ._, ._ },
.{ ._, ._c, .in, .tmp2p, ._, ._, ._ },
.{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
.{ .@"3:", ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
.{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
.{ ._, ._ns, .j, .@"0b", ._, ._, ._ },
} },
}, .{
.required_features = .{ .f16c, null, null, null }, .required_features = .{ .f16c, null, null, null },
.src_constraints = .{ .src_constraints = .{
.{ .scalar_float = .{ .of = .word, .is = .word } }, .{ .scalar_float = .{ .of = .word, .is = .word } },
@ -6890,7 +7257,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
} }) catch |err| switch (err) { } }) catch |err| switch (err) {
error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{ error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
@tagName(air_tag), @tagName(air_tag),
cg.typeOf(bin_op.lhs).fmt(pt), ty.fmt(pt),
ops[0].tracking(cg), ops[0].tracking(cg),
ops[1].tracking(cg), ops[1].tracking(cg),
}), }),
@ -92700,7 +93067,7 @@ const Select = struct {
const mir_tag: Mir.Inst.FixedTag = .{ inst[1], inst[2] }; const mir_tag: Mir.Inst.FixedTag = .{ inst[1], inst[2] };
pseudo: { pseudo: {
switch (inst[0]) { switch (inst[0]) {
.@"0:", .@"1:", .@"2:" => |label| s.emitLabel(label), .@"0:", .@"1:", .@"2:", .@"3:" => |label| s.emitLabel(label),
._ => {}, ._ => {},
.pseudo => break :pseudo, .pseudo => break :pseudo,
} }
@ -93578,7 +93945,7 @@ const Select = struct {
Select.Operand, Select.Operand,
Select.Operand, Select.Operand,
}; };
const Label = enum { @"0:", @"1:", @"2:", @"_", pseudo }; const Label = enum { @"0:", @"1:", @"2:", @"3:", @"_", pseudo };
const Operand = struct { const Operand = struct {
flags: packed struct(u16) { flags: packed struct(u16) {
tag: Tag, tag: Tag,
@ -93609,6 +93976,7 @@ const Select = struct {
ptr_size, ptr_size,
ptr_bit_size, ptr_bit_size,
size, size,
src0_size,
delta_size, delta_size,
delta_elem_size, delta_elem_size,
size_add_elem_size, size_add_elem_size,
@ -93641,6 +94009,8 @@ const Select = struct {
const sub_size_div_8: Adjust = .{ .sign = .neg, .lhs = .size, .op = .div, .rhs = .@"8" }; const sub_size_div_8: Adjust = .{ .sign = .neg, .lhs = .size, .op = .div, .rhs = .@"8" };
const sub_size_div_4: Adjust = .{ .sign = .neg, .lhs = .size, .op = .div, .rhs = .@"4" }; const sub_size_div_4: Adjust = .{ .sign = .neg, .lhs = .size, .op = .div, .rhs = .@"4" };
const sub_size: Adjust = .{ .sign = .neg, .lhs = .size, .op = .mul, .rhs = .@"1" }; const sub_size: Adjust = .{ .sign = .neg, .lhs = .size, .op = .mul, .rhs = .@"1" };
const sub_src0_size_div_8: Adjust = .{ .sign = .neg, .lhs = .src0_size, .op = .div, .rhs = .@"8" };
const sub_src0_size: Adjust = .{ .sign = .neg, .lhs = .src0_size, .op = .mul, .rhs = .@"1" };
const add_delta_size_div_8: Adjust = .{ .sign = .pos, .lhs = .delta_size, .op = .div, .rhs = .@"8" }; const add_delta_size_div_8: Adjust = .{ .sign = .pos, .lhs = .delta_size, .op = .div, .rhs = .@"8" };
const add_delta_elem_size: Adjust = .{ .sign = .pos, .lhs = .delta_elem_size, .op = .mul, .rhs = .@"1" }; const add_delta_elem_size: Adjust = .{ .sign = .pos, .lhs = .delta_elem_size, .op = .mul, .rhs = .@"1" };
const add_delta_elem_size_div_8: Adjust = .{ .sign = .pos, .lhs = .delta_elem_size, .op = .div, .rhs = .@"8" }; const add_delta_elem_size_div_8: Adjust = .{ .sign = .pos, .lhs = .delta_elem_size, .op = .div, .rhs = .@"8" };
@ -93882,6 +94252,8 @@ const Select = struct {
const @"1f": Select.Operand = .{ .flags = .{ .tag = .forward_label }, .base = .{ .ref = .tmp1, .size = .none } }; const @"1f": Select.Operand = .{ .flags = .{ .tag = .forward_label }, .base = .{ .ref = .tmp1, .size = .none } };
const @"2b": Select.Operand = .{ .flags = .{ .tag = .backward_label }, .base = .{ .ref = .tmp2, .size = .none } }; const @"2b": Select.Operand = .{ .flags = .{ .tag = .backward_label }, .base = .{ .ref = .tmp2, .size = .none } };
const @"2f": Select.Operand = .{ .flags = .{ .tag = .forward_label }, .base = .{ .ref = .tmp2, .size = .none } }; const @"2f": Select.Operand = .{ .flags = .{ .tag = .forward_label }, .base = .{ .ref = .tmp2, .size = .none } };
const @"3b": Select.Operand = .{ .flags = .{ .tag = .backward_label }, .base = .{ .ref = .tmp3, .size = .none } };
const @"3f": Select.Operand = .{ .flags = .{ .tag = .forward_label }, .base = .{ .ref = .tmp3, .size = .none } };
const tmp0b: Select.Operand = .{ .flags = .{ .tag = .ref }, .base = .tmp0b }; const tmp0b: Select.Operand = .{ .flags = .{ .tag = .ref }, .base = .tmp0b };
const tmp0w: Select.Operand = .{ .flags = .{ .tag = .ref }, .base = .tmp0w }; const tmp0w: Select.Operand = .{ .flags = .{ .tag = .ref }, .base = .tmp0w };
@ -94070,6 +94442,13 @@ const Select = struct {
.base = base, .base = base,
}; };
} }
fn leaad(base: Ref.Sized, adjust: Adjust, disp: i32) Select.Operand {
return .{
.flags = .{ .tag = .lea, .adjust = adjust },
.base = base,
.imm = disp,
};
}
fn lead(base: Ref.Sized, disp: i32) Select.Operand { fn lead(base: Ref.Sized, disp: i32) Select.Operand {
return .{ return .{
.flags = .{ .tag = .lea }, .flags = .{ .tag = .lea },
@ -94226,6 +94605,7 @@ const Select = struct {
.ptr_size => @divExact(s.cg.target.ptrBitWidth(), 8), .ptr_size => @divExact(s.cg.target.ptrBitWidth(), 8),
.ptr_bit_size => s.cg.target.ptrBitWidth(), .ptr_bit_size => s.cg.target.ptrBitWidth(),
.size => @intCast(op.base.ref.typeOf(s).abiSize(s.cg.pt.zcu)), .size => @intCast(op.base.ref.typeOf(s).abiSize(s.cg.pt.zcu)),
.src0_size => @intCast(Select.Operand.Ref.src0.typeOf(s).abiSize(s.cg.pt.zcu)),
.delta_size => @intCast(@as(SignedImm, @intCast(op.base.ref.typeOf(s).abiSize(s.cg.pt.zcu))) - .delta_size => @intCast(@as(SignedImm, @intCast(op.base.ref.typeOf(s).abiSize(s.cg.pt.zcu))) -
@as(SignedImm, @intCast(op.index.ref.typeOf(s).abiSize(s.cg.pt.zcu)))), @as(SignedImm, @intCast(op.index.ref.typeOf(s).abiSize(s.cg.pt.zcu)))),
.delta_elem_size => @intCast(@as(SignedImm, @intCast(op.base.ref.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu))) - .delta_elem_size => @intCast(@as(SignedImm, @intCast(op.base.ref.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu))) -

View File

@ -88,13 +88,32 @@ pub fn emitMir(emit: *Emit) Error!void {
lowered_relocs[0].lowered_inst_index == lowered_index) : ({ lowered_relocs[0].lowered_inst_index == lowered_index) : ({
lowered_relocs = lowered_relocs[1..]; lowered_relocs = lowered_relocs[1..];
}) switch (lowered_relocs[0].target) { }) switch (lowered_relocs[0].target) {
.inst => |target| try relocs.append(emit.lower.allocator, .{ .inst => |target| {
.source = start_offset, const inst_length: u4 = @intCast(end_offset - start_offset);
.source_offset = end_offset - 4, const reloc_offset, const reloc_length = reloc_offset_length: {
.target = target, var reloc_offset = inst_length;
.target_offset = lowered_relocs[0].off, var op_index: usize = lowered_inst.ops.len;
.length = @intCast(end_offset - start_offset), while (true) {
}), op_index -= 1;
const op = lowered_inst.encoding.data.ops[op_index];
if (op == .none) continue;
const enc_length: u4 = @intCast(
std.math.divCeil(u7, @intCast(op.immBitSize()), 8) catch unreachable,
);
reloc_offset -= enc_length;
if (op_index == lowered_relocs[0].op_index)
break :reloc_offset_length .{ reloc_offset, enc_length };
}
};
try relocs.append(emit.lower.allocator, .{
.inst_offset = start_offset,
.inst_length = inst_length,
.source_offset = reloc_offset,
.source_length = reloc_length,
.target = target,
.target_offset = lowered_relocs[0].off,
});
},
.table => try table_relocs.append(emit.lower.allocator, .{ .table => try table_relocs.append(emit.lower.allocator, .{
.source_offset = end_offset - 4, .source_offset = end_offset - 4,
.target_offset = lowered_relocs[0].off, .target_offset = lowered_relocs[0].off,
@ -409,7 +428,7 @@ pub fn emitMir(emit: *Emit) Error!void {
} } }; } } };
}, },
.pseudo_dbg_local_am => loc: { .pseudo_dbg_local_am => loc: {
const mem = emit.lower.mem(mir_inst.data.ax.payload); const mem = emit.lower.mem(undefined, mir_inst.data.ax.payload);
break :loc .{ mir_inst.data.ax.air_inst, .{ .plus = .{ break :loc .{ mir_inst.data.ax.air_inst, .{ .plus = .{
base: { base: {
loc_buf[0] = switch (mem.base()) { loc_buf[0] = switch (mem.base()) {
@ -466,15 +485,18 @@ pub fn emitMir(emit: *Emit) Error!void {
} }
} }
} }
{ for (relocs.items) |reloc| {
// TODO this function currently assumes all relocs via JMP/CALL instructions are 32bit in size. const target = code_offset_mapping[reloc.target];
// This should be reversed like it is done in aarch64 MIR emit code: start with the smallest const disp = @as(i64, @intCast(target)) - @as(i64, @intCast(reloc.inst_offset + reloc.inst_length)) + reloc.target_offset;
// possible resolution, i.e., 8bit, and iteratively converge on the minimum required resolution const inst_bytes = emit.code.items[reloc.inst_offset..][0..reloc.inst_length];
// until the entire decl is correctly emitted with all JMP/CALL instructions within range. switch (reloc.source_length) {
for (relocs.items) |reloc| { else => unreachable,
const target = code_offset_mapping[reloc.target]; inline 1, 4 => |source_length| std.mem.writeInt(
const disp = @as(i64, @intCast(target)) - @as(i64, @intCast(reloc.source + reloc.length)) + reloc.target_offset; @Type(.{ .int = .{ .signedness = .signed, .bits = @as(u16, 8) * source_length } }),
std.mem.writeInt(i32, emit.code.items[reloc.source_offset..][0..4], @intCast(disp), .little); inst_bytes[reloc.source_offset..][0..source_length],
@intCast(disp),
.little,
),
} }
} }
if (emit.lower.mir.table.len > 0) { if (emit.lower.mir.table.len > 0) {
@ -511,15 +533,17 @@ fn fail(emit: *Emit, comptime format: []const u8, args: anytype) Error {
const Reloc = struct { const Reloc = struct {
/// Offset of the instruction. /// Offset of the instruction.
source: u32, inst_offset: u32,
/// Length of the instruction.
inst_length: u4,
/// Offset of the relocation within the instruction. /// Offset of the relocation within the instruction.
source_offset: u32, source_offset: u4,
/// Length of the relocation.
source_length: u4,
/// Target of the relocation. /// Target of the relocation.
target: Mir.Inst.Index, target: Mir.Inst.Index,
/// Offset from the target instruction. /// Offset from the target.
target_offset: i32, target_offset: i32,
/// Length of the instruction.
length: u5,
}; };
const TableReloc = struct { const TableReloc = struct {

View File

@ -304,20 +304,20 @@ pub const Mnemonic = enum {
jnc, jne, jng, jnge, jnl, jnle, jno, jnp, jns, jnz, jo, jp, jpe, jpo, jrcxz, js, jz, jnc, jne, jng, jnge, jnl, jnle, jno, jnp, jns, jnz, jo, jp, jpe, jpo, jrcxz, js, jz,
lahf, lar, lea, leave, lfence, lgdt, lidt, lldt, lmsw, loop, loope, loopne, lahf, lar, lea, leave, lfence, lgdt, lidt, lldt, lmsw, loop, loope, loopne,
lods, lodsb, lodsd, lodsq, lodsw, lods, lodsb, lodsd, lodsq, lodsw,
lsl, ltr, lzcnt, lsl, ltr,
mfence, mov, movbe, mfence, mov, movbe,
movs, movsb, movsd, movsq, movsw, movs, movsb, movsd, movsq, movsw,
movsx, movsxd, movzx, mul, movsx, movsxd, movzx, mul,
neg, nop, not, neg, nop, not,
@"or", out, outs, outsb, outsd, outsw, @"or", out, outs, outsb, outsd, outsw,
pause, pop, popcnt, popf, popfd, popfq, push, pushfq, pause, pop, popf, popfd, popfq, push, pushfq,
rcl, rcr, rcl, rcr,
rdfsbase, rdgsbase, rdmsr, rdpid, rdpkru, rdpmc, rdrand, rdseed, rdssd, rdssq, rdtsc, rdtscp, rdfsbase, rdgsbase, rdmsr, rdpid, rdpkru, rdpmc, rdrand, rdseed, rdssd, rdssq, rdtsc, rdtscp,
ret, rol, ror, rorx, rsm, ret, rol, ror, rsm,
sahf, sal, sar, sarx, sbb, sahf, sal, sar, sbb,
scas, scasb, scasd, scasq, scasw, scas, scasb, scasd, scasq, scasw,
senduipi, serialize, senduipi, serialize,
shl, shld, shlx, shr, shrd, shrx, shl, shld, shr, shrd,
stac, stc, std, sti, str, stui, stac, stc, std, sti, str, stui,
sub, swapgs, syscall, sysenter, sysexit, sysret, sub, swapgs, syscall, sysenter, sysexit, sysret,
seta, setae, setb, setbe, setc, sete, setg, setge, setl, setle, setna, setnae, seta, setae, setb, setbe, setc, sete, setg, setge, setl, setle, setna, setnae,
@ -433,6 +433,8 @@ pub const Mnemonic = enum {
roundpd, roundps, roundsd, roundss, roundpd, roundps, roundsd, roundss,
// SSE4.2 // SSE4.2
crc32, pcmpgtq, crc32, pcmpgtq,
// ABM
lzcnt, popcnt,
// PCLMUL // PCLMUL
pclmulqdq, pclmulqdq,
// AES // AES
@ -440,7 +442,6 @@ pub const Mnemonic = enum {
// SHA // SHA
sha1rnds4, sha1nexte, sha1msg1, sha1msg2, sha256msg1, sha256msg2, sha256rnds2, sha1rnds4, sha1nexte, sha1msg1, sha1msg2, sha256msg1, sha256msg2, sha256rnds2,
// AVX // AVX
andn, bextr, blsi, blsmsk, blsr, bzhi, tzcnt,
vaddpd, vaddps, vaddsd, vaddss, vaddsubpd, vaddsubps, vaddpd, vaddps, vaddsd, vaddss, vaddsubpd, vaddsubps,
vaesdec, vaesdeclast, vaesenc, vaesenclast, vaesimc, vaeskeygenassist, vaesdec, vaesdeclast, vaesenc, vaesenclast, vaesimc, vaeskeygenassist,
vandnpd, vandnps, vandpd, vandps, vandnpd, vandnps, vandpd, vandps,
@ -506,6 +507,10 @@ pub const Mnemonic = enum {
vtestpd, vtestps, vtestpd, vtestps,
vucomisd, vucomiss, vunpckhpd, vunpckhps, vunpcklpd, vunpcklps, vucomisd, vucomiss, vunpckhpd, vunpckhps, vunpcklpd, vunpcklps,
vxorpd, vxorps, vxorpd, vxorps,
// BMI
andn, bextr, blsi, blsmsk, blsr, tzcnt,
// BMI2
bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx,
// F16C // F16C
vcvtph2ps, vcvtps2ph, vcvtph2ps, vcvtps2ph,
// FMA // FMA

View File

@ -10,32 +10,38 @@ mir: Mir,
cc: std.builtin.CallingConvention, cc: std.builtin.CallingConvention,
err_msg: ?*Zcu.ErrorMsg = null, err_msg: ?*Zcu.ErrorMsg = null,
src_loc: Zcu.LazySrcLoc, src_loc: Zcu.LazySrcLoc,
result_insts_len: u8 = undefined, result_insts_len: ResultInstIndex = undefined,
result_relocs_len: u8 = undefined, result_insts: [max_result_insts]Instruction = undefined,
result_insts: [ result_relocs_len: ResultRelocIndex = undefined,
@max( result_relocs: [max_result_relocs]Reloc = undefined,
1, // non-pseudo instructions
3, // (ELF only) TLS local dynamic (LD) sequence in PIC mode const max_result_insts = @max(
2, // cmovcc: cmovcc \ cmovcc 1, // non-pseudo instructions
3, // setcc: setcc \ setcc \ logicop 3, // (ELF only) TLS local dynamic (LD) sequence in PIC mode
2, // jcc: jcc \ jcc 2, // cmovcc: cmovcc \ cmovcc
pseudo_probe_align_insts, 3, // setcc: setcc \ setcc \ logicop
pseudo_probe_adjust_unrolled_max_insts, 2, // jcc: jcc \ jcc
pseudo_probe_adjust_setup_insts, pseudo_probe_align_insts,
pseudo_probe_adjust_loop_insts, pseudo_probe_adjust_unrolled_max_insts,
abi.Win64.callee_preserved_regs.len * 2, // push_regs/pop_regs pseudo_probe_adjust_setup_insts,
abi.SysV.callee_preserved_regs.len * 2, // push_regs/pop_regs pseudo_probe_adjust_loop_insts,
) abi.Win64.callee_preserved_regs.len * 2, // push_regs/pop_regs
]Instruction = undefined, abi.SysV.callee_preserved_regs.len * 2, // push_regs/pop_regs
result_relocs: [ );
@max( const max_result_relocs = @max(
1, // jmp/jcc/call/mov/lea: jmp/jcc/call/mov/lea 1, // jmp/jcc/call/mov/lea: jmp/jcc/call/mov/lea
2, // jcc: jcc \ jcc 2, // jcc: jcc \ jcc
2, // test \ jcc \ probe \ sub \ jmp 2, // test \ jcc \ probe \ sub \ jmp
1, // probe \ sub \ jcc 1, // probe \ sub \ jcc
3, // (ELF only) TLS local dynamic (LD) sequence in PIC mode 3, // (ELF only) TLS local dynamic (LD) sequence in PIC mode
) );
]Reloc = undefined,
const ResultInstIndex = std.math.IntFittingRange(0, max_result_insts - 1);
const ResultRelocIndex = std.math.IntFittingRange(0, max_result_relocs - 1);
const InstOpIndex = std.math.IntFittingRange(
0,
@typeInfo(@FieldType(Instruction, "ops")).array.len - 1,
);
pub const pseudo_probe_align_insts = 5; // test \ jcc \ probe \ sub \ jmp pub const pseudo_probe_align_insts = 5; // test \ jcc \ probe \ sub \ jmp
pub const pseudo_probe_adjust_unrolled_max_insts = pub const pseudo_probe_adjust_unrolled_max_insts =
@ -51,7 +57,8 @@ pub const Error = error{
}; };
pub const Reloc = struct { pub const Reloc = struct {
lowered_inst_index: u8, lowered_inst_index: ResultInstIndex,
op_index: InstOpIndex,
target: Target, target: Target,
off: i32, off: i32,
@ -114,11 +121,11 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
assert(inst.data.rx.fixes == ._); assert(inst.data.rx.fixes == ._);
try lower.emit(.none, .cmovnz, &.{ try lower.emit(.none, .cmovnz, &.{
.{ .reg = inst.data.rx.r1 }, .{ .reg = inst.data.rx.r1 },
.{ .mem = lower.mem(inst.data.rx.payload) }, .{ .mem = lower.mem(1, inst.data.rx.payload) },
}); });
try lower.emit(.none, .cmovp, &.{ try lower.emit(.none, .cmovp, &.{
.{ .reg = inst.data.rx.r1 }, .{ .reg = inst.data.rx.r1 },
.{ .mem = lower.mem(inst.data.rx.payload) }, .{ .mem = lower.mem(1, inst.data.rx.payload) },
}); });
}, },
.pseudo_set_z_and_np_r => { .pseudo_set_z_and_np_r => {
@ -137,13 +144,13 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.pseudo_set_z_and_np_m => { .pseudo_set_z_and_np_m => {
assert(inst.data.rx.fixes == ._); assert(inst.data.rx.fixes == ._);
try lower.emit(.none, .setz, &.{ try lower.emit(.none, .setz, &.{
.{ .mem = lower.mem(inst.data.rx.payload) }, .{ .mem = lower.mem(0, inst.data.rx.payload) },
}); });
try lower.emit(.none, .setnp, &.{ try lower.emit(.none, .setnp, &.{
.{ .reg = inst.data.rx.r1 }, .{ .reg = inst.data.rx.r1 },
}); });
try lower.emit(.none, .@"and", &.{ try lower.emit(.none, .@"and", &.{
.{ .mem = lower.mem(inst.data.rx.payload) }, .{ .mem = lower.mem(0, inst.data.rx.payload) },
.{ .reg = inst.data.rx.r1 }, .{ .reg = inst.data.rx.r1 },
}); });
}, },
@ -163,32 +170,32 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.pseudo_set_nz_or_p_m => { .pseudo_set_nz_or_p_m => {
assert(inst.data.rx.fixes == ._); assert(inst.data.rx.fixes == ._);
try lower.emit(.none, .setnz, &.{ try lower.emit(.none, .setnz, &.{
.{ .mem = lower.mem(inst.data.rx.payload) }, .{ .mem = lower.mem(0, inst.data.rx.payload) },
}); });
try lower.emit(.none, .setp, &.{ try lower.emit(.none, .setp, &.{
.{ .reg = inst.data.rx.r1 }, .{ .reg = inst.data.rx.r1 },
}); });
try lower.emit(.none, .@"or", &.{ try lower.emit(.none, .@"or", &.{
.{ .mem = lower.mem(inst.data.rx.payload) }, .{ .mem = lower.mem(0, inst.data.rx.payload) },
.{ .reg = inst.data.rx.r1 }, .{ .reg = inst.data.rx.r1 },
}); });
}, },
.pseudo_j_z_and_np_inst => { .pseudo_j_z_and_np_inst => {
assert(inst.data.inst.fixes == ._); assert(inst.data.inst.fixes == ._);
try lower.emit(.none, .jnz, &.{ try lower.emit(.none, .jnz, &.{
.{ .imm = lower.reloc(.{ .inst = index + 1 }, 0) }, .{ .imm = lower.reloc(0, .{ .inst = index + 1 }, 0) },
}); });
try lower.emit(.none, .jnp, &.{ try lower.emit(.none, .jnp, &.{
.{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }, 0) }, .{ .imm = lower.reloc(0, .{ .inst = inst.data.inst.inst }, 0) },
}); });
}, },
.pseudo_j_nz_or_p_inst => { .pseudo_j_nz_or_p_inst => {
assert(inst.data.inst.fixes == ._); assert(inst.data.inst.fixes == ._);
try lower.emit(.none, .jnz, &.{ try lower.emit(.none, .jnz, &.{
.{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }, 0) }, .{ .imm = lower.reloc(0, .{ .inst = inst.data.inst.inst }, 0) },
}); });
try lower.emit(.none, .jp, &.{ try lower.emit(.none, .jp, &.{
.{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }, 0) }, .{ .imm = lower.reloc(0, .{ .inst = inst.data.inst.inst }, 0) },
}); });
}, },
@ -198,7 +205,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.{ .imm = .s(@bitCast(inst.data.ri.i)) }, .{ .imm = .s(@bitCast(inst.data.ri.i)) },
}); });
try lower.emit(.none, .jz, &.{ try lower.emit(.none, .jz, &.{
.{ .imm = lower.reloc(.{ .inst = index + 1 }, 0) }, .{ .imm = lower.reloc(0, .{ .inst = index + 1 }, 0) },
}); });
try lower.emit(.none, .lea, &.{ try lower.emit(.none, .lea, &.{
.{ .reg = inst.data.ri.r1 }, .{ .reg = inst.data.ri.r1 },
@ -214,7 +221,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.{ .reg = inst.data.ri.r1.to32() }, .{ .reg = inst.data.ri.r1.to32() },
}); });
try lower.emit(.none, .jmp, &.{ try lower.emit(.none, .jmp, &.{
.{ .imm = lower.reloc(.{ .inst = index }, 0) }, .{ .imm = lower.reloc(0, .{ .inst = index }, 0) },
}); });
assert(lower.result_insts_len == pseudo_probe_align_insts); assert(lower.result_insts_len == pseudo_probe_align_insts);
}, },
@ -260,7 +267,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
.{ .imm = .s(page_size) }, .{ .imm = .s(page_size) },
}); });
try lower.emit(.none, .jae, &.{ try lower.emit(.none, .jae, &.{
.{ .imm = lower.reloc(.{ .inst = index }, 0) }, .{ .imm = lower.reloc(0, .{ .inst = index }, 0) },
}); });
assert(lower.result_insts_len == pseudo_probe_adjust_loop_insts); assert(lower.result_insts_len == pseudo_probe_adjust_loop_insts);
}, },
@ -382,21 +389,22 @@ pub fn imm(lower: *const Lower, ops: Mir.Inst.Ops, i: u32) Immediate {
}; };
} }
pub fn mem(lower: *Lower, payload: u32) Memory { pub fn mem(lower: *Lower, op_index: InstOpIndex, payload: u32) Memory {
var m = lower.mir.resolveFrameLoc(lower.mir.extraData(Mir.Memory, payload).data).decode(); var m = lower.mir.resolveFrameLoc(lower.mir.extraData(Mir.Memory, payload).data).decode();
switch (m) { switch (m) {
.sib => |*sib| switch (sib.base) { .sib => |*sib| switch (sib.base) {
else => {}, else => {},
.table => sib.disp = lower.reloc(.table, sib.disp).signed, .table => sib.disp = lower.reloc(op_index, .table, sib.disp).signed,
}, },
else => {}, else => {},
} }
return m; return m;
} }
fn reloc(lower: *Lower, target: Reloc.Target, off: i32) Immediate { fn reloc(lower: *Lower, op_index: InstOpIndex, target: Reloc.Target, off: i32) Immediate {
lower.result_relocs[lower.result_relocs_len] = .{ lower.result_relocs[lower.result_relocs_len] = .{
.lowered_inst_index = lower.result_insts_len, .lowered_inst_index = lower.result_insts_len,
.op_index = op_index,
.target = target, .target = target,
.off = off, .off = off,
}; };
@ -409,7 +417,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
var emit_mnemonic = mnemonic; var emit_mnemonic = mnemonic;
var emit_ops_storage: [4]Operand = undefined; var emit_ops_storage: [4]Operand = undefined;
const emit_ops = emit_ops_storage[0..ops.len]; const emit_ops = emit_ops_storage[0..ops.len];
for (emit_ops, ops) |*emit_op, op| { for (emit_ops, ops, 0..) |*emit_op, op, op_index| {
emit_op.* = switch (op) { emit_op.* = switch (op) {
else => op, else => op,
.mem => |mem_op| switch (mem_op.base()) { .mem => |mem_op| switch (mem_op.base()) {
@ -428,20 +436,20 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
if (lower.pic) { if (lower.pic) {
// Here, we currently assume local dynamic TLS vars, and so // Here, we currently assume local dynamic TLS vars, and so
// we emit LD model. // we emit LD model.
_ = lower.reloc(.{ .linker_tlsld = sym_index }, 0); _ = lower.reloc(1, .{ .linker_tlsld = sym_index }, 0);
lower.result_insts[lower.result_insts_len] = try .new(.none, .lea, &.{ lower.result_insts[lower.result_insts_len] = try .new(.none, .lea, &.{
.{ .reg = .rdi }, .{ .reg = .rdi },
.{ .mem = Memory.initRip(.none, 0) }, .{ .mem = Memory.initRip(.none, 0) },
}, lower.target); }, lower.target);
lower.result_insts_len += 1; lower.result_insts_len += 1;
_ = lower.reloc(.{ _ = lower.reloc(0, .{
.linker_extern_fn = try elf_file.getGlobalSymbol("__tls_get_addr", null), .linker_extern_fn = try elf_file.getGlobalSymbol("__tls_get_addr", null),
}, 0); }, 0);
lower.result_insts[lower.result_insts_len] = try .new(.none, .call, &.{ lower.result_insts[lower.result_insts_len] = try .new(.none, .call, &.{
.{ .imm = .s(0) }, .{ .imm = .s(0) },
}, lower.target); }, lower.target);
lower.result_insts_len += 1; lower.result_insts_len += 1;
_ = lower.reloc(.{ .linker_dtpoff = sym_index }, 0); _ = lower.reloc(@intCast(op_index), .{ .linker_dtpoff = sym_index }, 0);
emit_mnemonic = .lea; emit_mnemonic = .lea;
break :op .{ .mem = Memory.initSib(.none, .{ break :op .{ .mem = Memory.initSib(.none, .{
.base = .{ .reg = .rax }, .base = .{ .reg = .rax },
@ -454,7 +462,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
.{ .mem = Memory.initSib(.qword, .{ .base = .{ .reg = .fs } }) }, .{ .mem = Memory.initSib(.qword, .{ .base = .{ .reg = .fs } }) },
}, lower.target); }, lower.target);
lower.result_insts_len += 1; lower.result_insts_len += 1;
_ = lower.reloc(.{ .linker_reloc = sym_index }, 0); _ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0);
emit_mnemonic = .lea; emit_mnemonic = .lea;
break :op .{ .mem = Memory.initSib(.none, .{ break :op .{ .mem = Memory.initSib(.none, .{
.base = .{ .reg = .rax }, .base = .{ .reg = .rax },
@ -463,15 +471,17 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
} }
} }
_ = lower.reloc(.{ .linker_reloc = sym_index }, 0);
if (lower.pic) switch (mnemonic) { if (lower.pic) switch (mnemonic) {
.lea => if (elf_sym.flags.is_extern_ptr) { .lea => {
_ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0);
if (!elf_sym.flags.is_extern_ptr) break :op .{ .mem = Memory.initRip(.none, 0) };
emit_mnemonic = .mov; emit_mnemonic = .mov;
break :op .{ .mem = Memory.initRip(.ptr, 0) }; break :op .{ .mem = Memory.initRip(.ptr, 0) };
} else break :op .{ .mem = Memory.initRip(.none, 0) }, },
.mov => { .mov => {
if (elf_sym.flags.is_extern_ptr) { if (elf_sym.flags.is_extern_ptr) {
const reg = ops[0].reg; const reg = ops[0].reg;
_ = lower.reloc(1, .{ .linker_reloc = sym_index }, 0);
lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{ lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{
.{ .reg = reg.to64() }, .{ .reg = reg.to64() },
.{ .mem = Memory.initRip(.qword, 0) }, .{ .mem = Memory.initRip(.qword, 0) },
@ -481,10 +491,13 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
.reg = reg.to64(), .reg = reg.to64(),
} }) }; } }) };
} }
_ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0);
break :op .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) }; break :op .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) };
}, },
else => unreachable, else => unreachable,
} else switch (mnemonic) { };
_ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0);
switch (mnemonic) {
.call => break :op .{ .mem = Memory.initSib(mem_op.sib.ptr_size, .{ .call => break :op .{ .mem = Memory.initSib(mem_op.sib.ptr_size, .{
.base = .{ .reg = .ds }, .base = .{ .reg = .ds },
}) }, }) },
@ -502,7 +515,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
const macho_sym = zo.symbols.items[sym_index]; const macho_sym = zo.symbols.items[sym_index];
if (macho_sym.flags.tlv) { if (macho_sym.flags.tlv) {
_ = lower.reloc(.{ .linker_reloc = sym_index }, 0); _ = lower.reloc(1, .{ .linker_reloc = sym_index }, 0);
lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{ lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{
.{ .reg = .rdi }, .{ .reg = .rdi },
.{ .mem = Memory.initRip(.ptr, 0) }, .{ .mem = Memory.initRip(.ptr, 0) },
@ -516,15 +529,17 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
break :op .{ .reg = .rax }; break :op .{ .reg = .rax };
} }
_ = lower.reloc(.{ .linker_reloc = sym_index }, 0);
break :op switch (mnemonic) { break :op switch (mnemonic) {
.lea => if (macho_sym.flags.is_extern_ptr) { .lea => {
_ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0);
if (!macho_sym.flags.is_extern_ptr) break :op .{ .mem = Memory.initRip(.none, 0) };
emit_mnemonic = .mov; emit_mnemonic = .mov;
break :op .{ .mem = Memory.initRip(.ptr, 0) }; break :op .{ .mem = Memory.initRip(.ptr, 0) };
} else break :op .{ .mem = Memory.initRip(.none, 0) }, },
.mov => { .mov => {
if (macho_sym.flags.is_extern_ptr) { if (macho_sym.flags.is_extern_ptr) {
const reg = ops[0].reg; const reg = ops[0].reg;
_ = lower.reloc(1, .{ .linker_reloc = sym_index }, 0);
lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{ lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{
.{ .reg = reg.to64() }, .{ .reg = reg.to64() },
.{ .mem = Memory.initRip(.qword, 0) }, .{ .mem = Memory.initRip(.qword, 0) },
@ -534,6 +549,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
.reg = reg.to64(), .reg = reg.to64(),
} }) }; } }) };
} }
_ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0);
break :op .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) }; break :op .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) };
}, },
else => unreachable, else => unreachable,
@ -550,7 +566,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
} }
fn generic(lower: *Lower, inst: Mir.Inst) Error!void { fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
@setEvalBranchQuota(2_400); @setEvalBranchQuota(2_500);
const fixes = switch (inst.ops) { const fixes = switch (inst.ops) {
.none => inst.data.none.fixes, .none => inst.data.none.fixes,
.inst => inst.data.inst.fixes, .inst => inst.data.inst.fixes,
@ -595,7 +611,7 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
}, switch (inst.ops) { }, switch (inst.ops) {
.none => &.{}, .none => &.{},
.inst => &.{ .inst => &.{
.{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }, 0) }, .{ .imm = lower.reloc(0, .{ .inst = inst.data.inst.inst }, 0) },
}, },
.i_s, .i_u => &.{ .i_s, .i_u => &.{
.{ .imm = lower.imm(inst.ops, inst.data.i.i) }, .{ .imm = lower.imm(inst.ops, inst.data.i.i) },
@ -642,10 +658,10 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
.{ .imm = lower.imm(inst.ops, inst.data.rri.i) }, .{ .imm = lower.imm(inst.ops, inst.data.rri.i) },
}, },
.m => &.{ .m => &.{
.{ .mem = lower.mem(inst.data.x.payload) }, .{ .mem = lower.mem(0, inst.data.x.payload) },
}, },
.mi_s, .mi_u => &.{ .mi_s, .mi_u => &.{
.{ .mem = lower.mem(inst.data.x.payload + 1) }, .{ .mem = lower.mem(0, inst.data.x.payload + 1) },
.{ .imm = lower.imm( .{ .imm = lower.imm(
inst.ops, inst.ops,
lower.mir.extraData(Mir.Imm32, inst.data.x.payload).data.imm, lower.mir.extraData(Mir.Imm32, inst.data.x.payload).data.imm,
@ -653,64 +669,64 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
}, },
.rm => &.{ .rm => &.{
.{ .reg = inst.data.rx.r1 }, .{ .reg = inst.data.rx.r1 },
.{ .mem = lower.mem(inst.data.rx.payload) }, .{ .mem = lower.mem(1, inst.data.rx.payload) },
}, },
.rmr => &.{ .rmr => &.{
.{ .reg = inst.data.rrx.r1 }, .{ .reg = inst.data.rrx.r1 },
.{ .mem = lower.mem(inst.data.rrx.payload) }, .{ .mem = lower.mem(1, inst.data.rrx.payload) },
.{ .reg = inst.data.rrx.r2 }, .{ .reg = inst.data.rrx.r2 },
}, },
.rmi => &.{ .rmi => &.{
.{ .reg = inst.data.rix.r1 }, .{ .reg = inst.data.rix.r1 },
.{ .mem = lower.mem(inst.data.rix.payload) }, .{ .mem = lower.mem(1, inst.data.rix.payload) },
.{ .imm = lower.imm(inst.ops, inst.data.rix.i) }, .{ .imm = lower.imm(inst.ops, inst.data.rix.i) },
}, },
.rmi_s, .rmi_u => &.{ .rmi_s, .rmi_u => &.{
.{ .reg = inst.data.rx.r1 }, .{ .reg = inst.data.rx.r1 },
.{ .mem = lower.mem(inst.data.rx.payload + 1) }, .{ .mem = lower.mem(1, inst.data.rx.payload + 1) },
.{ .imm = lower.imm( .{ .imm = lower.imm(
inst.ops, inst.ops,
lower.mir.extraData(Mir.Imm32, inst.data.rx.payload).data.imm, lower.mir.extraData(Mir.Imm32, inst.data.rx.payload).data.imm,
) }, ) },
}, },
.mr => &.{ .mr => &.{
.{ .mem = lower.mem(inst.data.rx.payload) }, .{ .mem = lower.mem(0, inst.data.rx.payload) },
.{ .reg = inst.data.rx.r1 }, .{ .reg = inst.data.rx.r1 },
}, },
.mrr => &.{ .mrr => &.{
.{ .mem = lower.mem(inst.data.rrx.payload) }, .{ .mem = lower.mem(0, inst.data.rrx.payload) },
.{ .reg = inst.data.rrx.r1 }, .{ .reg = inst.data.rrx.r1 },
.{ .reg = inst.data.rrx.r2 }, .{ .reg = inst.data.rrx.r2 },
}, },
.mri => &.{ .mri => &.{
.{ .mem = lower.mem(inst.data.rix.payload) }, .{ .mem = lower.mem(0, inst.data.rix.payload) },
.{ .reg = inst.data.rix.r1 }, .{ .reg = inst.data.rix.r1 },
.{ .imm = lower.imm(inst.ops, inst.data.rix.i) }, .{ .imm = lower.imm(inst.ops, inst.data.rix.i) },
}, },
.rrm => &.{ .rrm => &.{
.{ .reg = inst.data.rrx.r1 }, .{ .reg = inst.data.rrx.r1 },
.{ .reg = inst.data.rrx.r2 }, .{ .reg = inst.data.rrx.r2 },
.{ .mem = lower.mem(inst.data.rrx.payload) }, .{ .mem = lower.mem(2, inst.data.rrx.payload) },
}, },
.rrmr => &.{ .rrmr => &.{
.{ .reg = inst.data.rrrx.r1 }, .{ .reg = inst.data.rrrx.r1 },
.{ .reg = inst.data.rrrx.r2 }, .{ .reg = inst.data.rrrx.r2 },
.{ .mem = lower.mem(inst.data.rrrx.payload) }, .{ .mem = lower.mem(2, inst.data.rrrx.payload) },
.{ .reg = inst.data.rrrx.r3 }, .{ .reg = inst.data.rrrx.r3 },
}, },
.rrmi => &.{ .rrmi => &.{
.{ .reg = inst.data.rrix.r1 }, .{ .reg = inst.data.rrix.r1 },
.{ .reg = inst.data.rrix.r2 }, .{ .reg = inst.data.rrix.r2 },
.{ .mem = lower.mem(inst.data.rrix.payload) }, .{ .mem = lower.mem(2, inst.data.rrix.payload) },
.{ .imm = lower.imm(inst.ops, inst.data.rrix.i) }, .{ .imm = lower.imm(inst.ops, inst.data.rrix.i) },
}, },
.extern_fn_reloc, .rel => &.{ .extern_fn_reloc, .rel => &.{
.{ .imm = lower.reloc(.{ .linker_extern_fn = inst.data.reloc.sym_index }, inst.data.reloc.off) }, .{ .imm = lower.reloc(0, .{ .linker_extern_fn = inst.data.reloc.sym_index }, inst.data.reloc.off) },
}, },
.got_reloc, .direct_reloc, .import_reloc => ops: { .got_reloc, .direct_reloc, .import_reloc => ops: {
const reg = inst.data.rx.r1; const reg = inst.data.rx.r1;
const extra = lower.mir.extraData(bits.SymbolOffset, inst.data.rx.payload).data; const extra = lower.mir.extraData(bits.SymbolOffset, inst.data.rx.payload).data;
_ = lower.reloc(switch (inst.ops) { _ = lower.reloc(1, switch (inst.ops) {
.got_reloc => .{ .linker_got = extra.sym_index }, .got_reloc => .{ .linker_got = extra.sym_index },
.direct_reloc => .{ .linker_direct = extra.sym_index }, .direct_reloc => .{ .linker_direct = extra.sym_index },
.import_reloc => .{ .linker_import = extra.sym_index }, .import_reloc => .{ .linker_import = extra.sym_index },

View File

@ -100,6 +100,8 @@ pub const Inst = struct {
/// ___ Division /// ___ Division
_d, _d,
/// ___ Without Affecting Flags
_x,
/// ___ Left /// ___ Left
_l, _l,
/// ___ Left Double /// ___ Left Double
@ -483,6 +485,7 @@ pub const Inst = struct {
/// ASCII adjust al after subtraction /// ASCII adjust al after subtraction
aa, aa,
/// Add with carry /// Add with carry
/// Unsigned integer addition of two operands with carry flag
adc, adc,
/// Add /// Add
/// Add packed integers /// Add packed integers
@ -1162,10 +1165,8 @@ pub const Inst = struct {
fmadd231, fmadd231,
// ADX // ADX
/// Unsigned integer addition of two operands with carry flag
adcx,
/// Unsigned integer addition of two operands with overflow flag /// Unsigned integer addition of two operands with overflow flag
adox, ado,
// AESKLE // AESKLE
/// Encode 128-bit key with key locker /// Encode 128-bit key with key locker

View File

@ -405,9 +405,9 @@ pub const table = [_]Entry{
.{ .jb, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none }, .{ .jb, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none },
.{ .jbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none, .none }, .{ .jbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none, .none },
.{ .jc, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none }, .{ .jc, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none },
.{ .jcxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .short, .@"32bit" }, .{ .jcxz, .d, &.{ .rel8 }, &.{ 0xe3 }, 0, .short, .@"32bit" },
.{ .jecxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .none, .@"32bit" }, .{ .jecxz, .d, &.{ .rel8 }, &.{ 0xe3 }, 0, .none, .@"32bit" },
.{ .jrcxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .none, .@"64bit" }, .{ .jrcxz, .d, &.{ .rel8 }, &.{ 0xe3 }, 0, .none, .@"64bit" },
.{ .je, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none, .none }, .{ .je, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none, .none },
.{ .jg, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none, .none }, .{ .jg, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none, .none },
.{ .jge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none, .none }, .{ .jge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none, .none },
@ -477,10 +477,6 @@ pub const table = [_]Entry{
.{ .ltr, .m, &.{ .rm16 }, &.{ 0x0f, 0x00 }, 3, .none, .none }, .{ .ltr, .m, &.{ .rm16 }, &.{ 0x0f, 0x00 }, 3, .none, .none },
.{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .short, .lzcnt },
.{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none, .lzcnt },
.{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long, .lzcnt },
.{ .mfence, .z, &.{}, &.{ 0x0f, 0xae, 0xf0 }, 0, .none, .none }, .{ .mfence, .z, &.{}, &.{ 0x0f, 0xae, 0xf0 }, 0, .none, .none },
.{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .none, .none }, .{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .none, .none },
@ -630,10 +626,6 @@ pub const table = [_]Entry{
.{ .pop, .m, &.{ .rm16 }, &.{ 0x8f }, 0, .short, .none }, .{ .pop, .m, &.{ .rm16 }, &.{ 0x8f }, 0, .short, .none },
.{ .pop, .m, &.{ .rm64 }, &.{ 0x8f }, 0, .none, .none }, .{ .pop, .m, &.{ .rm64 }, &.{ 0x8f }, 0, .none, .none },
.{ .popcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .short, .popcnt },
.{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none, .popcnt },
.{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long, .popcnt },
.{ .popf, .z, &.{}, &.{ 0x9d }, 0, .short, .none }, .{ .popf, .z, &.{}, &.{ 0x9d }, 0, .short, .none },
.{ .popfd, .z, &.{}, &.{ 0x9d }, 0, .none, .@"32bit" }, .{ .popfd, .z, &.{}, &.{ 0x9d }, 0, .none, .@"32bit" },
.{ .popfq, .z, &.{}, &.{ 0x9d }, 0, .none, .@"64bit" }, .{ .popfq, .z, &.{}, &.{ 0x9d }, 0, .none, .@"64bit" },
@ -1738,6 +1730,15 @@ pub const table = [_]Entry{
.{ .pcmpgtq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x37 }, 0, .none, .sse4_2 }, .{ .pcmpgtq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x37 }, 0, .none, .sse4_2 },
// ABM
.{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .short, .lzcnt },
.{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none, .lzcnt },
.{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long, .lzcnt },
.{ .popcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .short, .popcnt },
.{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none, .popcnt },
.{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long, .popcnt },
// PCLMUL // PCLMUL
.{ .pclmulqdq, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x44 }, 0, .none, .pclmul }, .{ .pclmulqdq, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x44 }, 0, .none, .pclmul },
@ -1771,38 +1772,6 @@ pub const table = [_]Entry{
.{ .sha256msg2, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x38, 0xcd }, 0, .none, .sha }, .{ .sha256msg2, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x38, 0xcd }, 0, .none, .sha },
// AVX // AVX
.{ .andn, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf2 }, 0, .vex_lz_w0, .bmi },
.{ .andn, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf2 }, 0, .vex_lz_w1, .bmi },
.{ .bextr, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi },
.{ .bextr, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi },
.{ .blsi, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 3, .vex_lz_w0, .bmi },
.{ .blsi, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 3, .vex_lz_w1, .bmi },
.{ .blsmsk, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 2, .vex_lz_w0, .bmi },
.{ .blsmsk, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 2, .vex_lz_w1, .bmi },
.{ .blsr, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 1, .vex_lz_w0, .bmi },
.{ .blsr, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 1, .vex_lz_w1, .bmi },
.{ .bzhi, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w0, .bmi2 },
.{ .bzhi, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w1, .bmi2 },
.{ .rorx, .rmi, &.{ .r32, .rm32, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w0, .bmi2 },
.{ .rorx, .rmi, &.{ .r64, .rm64, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w1, .bmi2 },
.{ .sarx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0xf3, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 },
.{ .shlx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 },
.{ .shrx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 },
.{ .sarx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf3, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
.{ .shlx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
.{ .shrx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
.{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .short, .bmi },
.{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .bmi },
.{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long, .bmi },
.{ .vaddpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_128_wig, .avx }, .{ .vaddpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_128_wig, .avx },
.{ .vaddpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_256_wig, .avx }, .{ .vaddpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_256_wig, .avx },
@ -2307,6 +2276,49 @@ pub const table = [_]Entry{
.{ .vxorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .vex_128_wig, .avx }, .{ .vxorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .vex_128_wig, .avx },
.{ .vxorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x57 }, 0, .vex_256_wig, .avx }, .{ .vxorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x57 }, 0, .vex_256_wig, .avx },
// BMI
.{ .andn, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf2 }, 0, .vex_lz_w0, .bmi },
.{ .andn, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf2 }, 0, .vex_lz_w1, .bmi },
.{ .bextr, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi },
.{ .bextr, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi },
.{ .blsi, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 3, .vex_lz_w0, .bmi },
.{ .blsi, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 3, .vex_lz_w1, .bmi },
.{ .blsmsk, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 2, .vex_lz_w0, .bmi },
.{ .blsmsk, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 2, .vex_lz_w1, .bmi },
.{ .blsr, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 1, .vex_lz_w0, .bmi },
.{ .blsr, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 1, .vex_lz_w1, .bmi },
.{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .short, .bmi },
.{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .bmi },
.{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long, .bmi },
// BMI2
.{ .bzhi, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w0, .bmi2 },
.{ .bzhi, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w1, .bmi2 },
.{ .mulx, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0xf2, 0x0f, 0x38, 0xf6 }, 0, .vex_lz_w0, .bmi2 },
.{ .mulx, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0xf2, 0x0f, 0x38, 0xf6 }, 0, .vex_lz_w1, .bmi2 },
.{ .pdep, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0xf2, 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w0, .bmi2 },
.{ .pdep, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0xf2, 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w1, .bmi2 },
.{ .pext, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0xf3, 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w0, .bmi2 },
.{ .pext, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0xf3, 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w1, .bmi2 },
.{ .rorx, .rmi, &.{ .r32, .rm32, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w0, .bmi2 },
.{ .rorx, .rmi, &.{ .r64, .rm64, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w1, .bmi2 },
.{ .sarx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0xf3, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 },
.{ .shlx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 },
.{ .shrx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 },
.{ .sarx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf3, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
.{ .shlx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
.{ .shrx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
// F16C // F16C
.{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c }, .{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c },
.{ .vcvtph2ps, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_256_w0, .f16c }, .{ .vcvtph2ps, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_256_w0, .f16c },

View File

@ -93,6 +93,11 @@ pub fn build(b: *std.Build) void {
.cpu_arch = .x86_64, .cpu_arch = .x86_64,
.cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 }, .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 },
}, },
.{
.cpu_arch = .x86_64,
.cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 },
.cpu_features_add = std.Target.x86.featureSet(&.{.adx}),
},
.{ .{
.cpu_arch = .x86_64, .cpu_arch = .x86_64,
.cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v4 }, .cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v4 },

View File

@ -44,6 +44,17 @@ fn AddOneBit(comptime Type: type) type {
.vector => |vector| @Vector(vector.len, ResultScalar), .vector => |vector| @Vector(vector.len, ResultScalar),
}; };
} }
fn DoubleBits(comptime Type: type) type {
const ResultScalar = switch (@typeInfo(Scalar(Type))) {
.int => |int| @Type(.{ .int = .{ .signedness = int.signedness, .bits = int.bits * 2 } }),
.float => Scalar(Type),
else => @compileError(@typeName(Type)),
};
return switch (@typeInfo(Type)) {
else => ResultScalar,
.vector => |vector| @Vector(vector.len, ResultScalar),
};
}
// inline to avoid a runtime `@splat` // inline to avoid a runtime `@splat`
inline fn splat(comptime Type: type, scalar: Scalar(Type)) Type { inline fn splat(comptime Type: type, scalar: Scalar(Type)) Type {
return switch (@typeInfo(Type)) { return switch (@typeInfo(Type)) {
@ -16216,6 +16227,8 @@ fn binary(comptime op: anytype, comptime opts: struct { compare: Compare = .rela
); );
} }
fn testInts() !void { fn testInts() !void {
try testArgs(i4, 0x3, 0x2);
try testArgs(u4, 0xe, 0x6);
try testArgs(i8, 0x48, 0x6c); try testArgs(i8, 0x48, 0x6c);
try testArgs(u8, 0xbb, 0x43); try testArgs(u8, 0xbb, 0x43);
try testArgs(i16, -0x0fdf, 0x302e); try testArgs(i16, -0x0fdf, 0x302e);
@ -18993,6 +19006,15 @@ test subUnsafe {
try test_sub_unsafe.testFloatVectors(); try test_sub_unsafe.testFloatVectors();
} }
inline fn mulUnsafe(comptime Type: type, lhs: Type, rhs: Type) DoubleBits(Type) {
@setRuntimeSafety(false);
return @as(DoubleBits(Type), lhs) * rhs;
}
test mulUnsafe {
const test_mul_unsafe = binary(mulUnsafe, .{});
try test_mul_unsafe.testInts();
}
inline fn multiply(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs * rhs) { inline fn multiply(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs * rhs) {
if (@inComptime() and @typeInfo(Type) == .vector) { if (@inComptime() and @typeInfo(Type) == .vector) {
// workaround https://github.com/ziglang/zig/issues/22743 // workaround https://github.com/ziglang/zig/issues/22743