mirror of
https://github.com/ziglang/zig.git
synced 2025-12-06 06:13:07 +00:00
x86_64: rewrite unsafe scalar int multiplication
This commit is contained in:
parent
5db585fcde
commit
dcc9fe322e
@ -6036,10 +6036,377 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
|
|||||||
.sub_safe => unreachable,
|
.sub_safe => unreachable,
|
||||||
.mul, .mul_optimized => |air_tag| if (use_old) try cg.airMulDivBinOp(inst, .mul) else fallback: {
|
.mul, .mul_optimized => |air_tag| if (use_old) try cg.airMulDivBinOp(inst, .mul) else fallback: {
|
||||||
const bin_op = air_datas[@intFromEnum(inst)].bin_op;
|
const bin_op = air_datas[@intFromEnum(inst)].bin_op;
|
||||||
if (cg.floatBits(cg.typeOf(bin_op.lhs).scalarType(zcu)) == null) break :fallback try cg.airMulDivBinOp(inst, .mul);
|
const ty = cg.typeOf(bin_op.lhs);
|
||||||
|
if (ty.isVector(zcu) and cg.floatBits(ty.childType(zcu)) == null) break :fallback try cg.airMulDivBinOp(inst, .mul);
|
||||||
var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
|
var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
|
||||||
var res: [1]Temp = undefined;
|
var res: [1]Temp = undefined;
|
||||||
cg.select(&res, &.{cg.typeOf(bin_op.lhs)}, &ops, comptime &.{ .{
|
cg.select(&res, &.{ty}, &ops, comptime &.{ .{
|
||||||
|
.src_constraints = .{ .{ .signed_int = .byte }, .{ .signed_int = .byte }, .any },
|
||||||
|
.patterns = &.{
|
||||||
|
.{ .src = .{ .{ .to_reg = .al }, .mem, .none } },
|
||||||
|
.{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } },
|
||||||
|
.{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } },
|
||||||
|
},
|
||||||
|
.dst_temps = .{ .{ .ref = .src0 }, .unused },
|
||||||
|
.clobbers = .{ .eflags = true },
|
||||||
|
.each = .{ .once = &.{
|
||||||
|
.{ ._, .i_, .mul, .src1b, ._, ._, ._ },
|
||||||
|
} },
|
||||||
|
}, .{
|
||||||
|
.src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any },
|
||||||
|
.patterns = &.{
|
||||||
|
.{ .src = .{ .{ .to_reg = .al }, .mem, .none } },
|
||||||
|
.{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } },
|
||||||
|
.{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } },
|
||||||
|
},
|
||||||
|
.dst_temps = .{ .{ .ref = .src0 }, .unused },
|
||||||
|
.clobbers = .{ .eflags = true },
|
||||||
|
.each = .{ .once = &.{
|
||||||
|
.{ ._, ._, .mul, .src1b, ._, ._, ._ },
|
||||||
|
} },
|
||||||
|
}, .{
|
||||||
|
.src_constraints = .{ .{ .int = .word }, .{ .int = .word }, .any },
|
||||||
|
.patterns = &.{
|
||||||
|
.{ .src = .{ .mem, .imm16, .none } },
|
||||||
|
.{ .src = .{ .imm16, .mem, .none }, .commute = .{ 0, 1 } },
|
||||||
|
.{ .src = .{ .to_gpr, .imm16, .none } },
|
||||||
|
.{ .src = .{ .imm16, .to_gpr, .none }, .commute = .{ 0, 1 } },
|
||||||
|
},
|
||||||
|
.dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused },
|
||||||
|
.clobbers = .{ .eflags = true },
|
||||||
|
.each = .{ .once = &.{
|
||||||
|
.{ ._, .i_, .mul, .dst0w, .src0w, .src1w, ._ },
|
||||||
|
} },
|
||||||
|
}, .{
|
||||||
|
.src_constraints = .{ .{ .int = .word }, .{ .int = .word }, .any },
|
||||||
|
.patterns = &.{
|
||||||
|
.{ .src = .{ .to_mut_gpr, .mem, .none } },
|
||||||
|
.{ .src = .{ .mem, .to_mut_gpr, .none }, .commute = .{ 0, 1 } },
|
||||||
|
.{ .src = .{ .to_mut_gpr, .to_gpr, .none } },
|
||||||
|
},
|
||||||
|
.dst_temps = .{ .{ .ref = .src0 }, .unused },
|
||||||
|
.clobbers = .{ .eflags = true },
|
||||||
|
.each = .{ .once = &.{
|
||||||
|
.{ ._, .i_, .mul, .dst0w, .src1w, ._, ._ },
|
||||||
|
} },
|
||||||
|
}, .{
|
||||||
|
.src_constraints = .{ .{ .int = .dword }, .{ .int = .dword }, .any },
|
||||||
|
.patterns = &.{
|
||||||
|
.{ .src = .{ .mem, .imm32, .none } },
|
||||||
|
.{ .src = .{ .imm32, .mem, .none }, .commute = .{ 0, 1 } },
|
||||||
|
.{ .src = .{ .to_gpr, .imm32, .none } },
|
||||||
|
.{ .src = .{ .imm32, .to_gpr, .none }, .commute = .{ 0, 1 } },
|
||||||
|
},
|
||||||
|
.dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused },
|
||||||
|
.clobbers = .{ .eflags = true },
|
||||||
|
.each = .{ .once = &.{
|
||||||
|
.{ ._, .i_, .mul, .dst0d, .src0d, .src1d, ._ },
|
||||||
|
} },
|
||||||
|
}, .{
|
||||||
|
.src_constraints = .{ .{ .int = .dword }, .{ .int = .dword }, .any },
|
||||||
|
.patterns = &.{
|
||||||
|
.{ .src = .{ .to_mut_gpr, .mem, .none } },
|
||||||
|
.{ .src = .{ .mem, .to_mut_gpr, .none }, .commute = .{ 0, 1 } },
|
||||||
|
.{ .src = .{ .to_mut_gpr, .to_gpr, .none } },
|
||||||
|
},
|
||||||
|
.dst_temps = .{ .{ .ref = .src0 }, .unused },
|
||||||
|
.clobbers = .{ .eflags = true },
|
||||||
|
.each = .{ .once = &.{
|
||||||
|
.{ ._, .i_, .mul, .dst0d, .src1d, ._, ._ },
|
||||||
|
} },
|
||||||
|
}, .{
|
||||||
|
.required_features = .{ .@"64bit", null, null, null },
|
||||||
|
.src_constraints = .{ .{ .int = .qword }, .{ .int = .qword }, .any },
|
||||||
|
.patterns = &.{
|
||||||
|
.{ .src = .{ .mem, .simm32, .none } },
|
||||||
|
.{ .src = .{ .simm32, .mem, .none }, .commute = .{ 0, 1 } },
|
||||||
|
.{ .src = .{ .to_gpr, .simm32, .none } },
|
||||||
|
.{ .src = .{ .simm32, .to_gpr, .none }, .commute = .{ 0, 1 } },
|
||||||
|
},
|
||||||
|
.dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused },
|
||||||
|
.clobbers = .{ .eflags = true },
|
||||||
|
.each = .{ .once = &.{
|
||||||
|
.{ ._, .i_, .mul, .dst0q, .src0q, .src1q, ._ },
|
||||||
|
} },
|
||||||
|
}, .{
|
||||||
|
.required_features = .{ .@"64bit", null, null, null },
|
||||||
|
.src_constraints = .{ .{ .int = .qword }, .{ .int = .qword }, .any },
|
||||||
|
.patterns = &.{
|
||||||
|
.{ .src = .{ .to_mut_gpr, .mem, .none } },
|
||||||
|
.{ .src = .{ .mem, .to_mut_gpr, .none }, .commute = .{ 0, 1 } },
|
||||||
|
.{ .src = .{ .to_mut_gpr, .to_gpr, .none } },
|
||||||
|
},
|
||||||
|
.dst_temps = .{ .{ .ref = .src0 }, .unused },
|
||||||
|
.clobbers = .{ .eflags = true },
|
||||||
|
.each = .{ .once = &.{
|
||||||
|
.{ ._, .i_, .mul, .dst0q, .src1q, ._, ._ },
|
||||||
|
} },
|
||||||
|
}, .{
|
||||||
|
.required_features = .{ .@"64bit", null, null, null },
|
||||||
|
.src_constraints = .{ .{ .int = .xword }, .{ .int = .xword }, .any },
|
||||||
|
.patterns = &.{
|
||||||
|
.{ .src = .{ .to_mem, .to_mem, .none } },
|
||||||
|
},
|
||||||
|
.extra_temps = .{
|
||||||
|
.{ .type = .u64, .kind = .{ .reg = .rax } },
|
||||||
|
.{ .type = .u64, .kind = .{ .reg = .rdx } },
|
||||||
|
.unused,
|
||||||
|
.unused,
|
||||||
|
.unused,
|
||||||
|
.unused,
|
||||||
|
.unused,
|
||||||
|
.unused,
|
||||||
|
.unused,
|
||||||
|
},
|
||||||
|
.dst_temps = .{ .mem, .unused },
|
||||||
|
.clobbers = .{ .eflags = true },
|
||||||
|
.each = .{ .once = &.{
|
||||||
|
.{ ._, ._, .mov, .tmp0q, .src0q, ._, ._ },
|
||||||
|
.{ ._, ._, .mul, .src1q, ._, ._, ._ },
|
||||||
|
.{ ._, ._, .mov, .dst0q, .tmp0q, ._, ._ },
|
||||||
|
.{ ._, ._, .mov, .tmp0q, .src0q, ._, ._ },
|
||||||
|
.{ ._, .i_, .mul, .tmp0q, .memd(.src1q, 8), ._, ._ },
|
||||||
|
.{ ._, ._, .add, .tmp1q, .tmp0q, ._, ._ },
|
||||||
|
.{ ._, ._, .mov, .tmp0q, .src1q, ._, ._ },
|
||||||
|
.{ ._, .i_, .mul, .tmp0q, .memd(.src0q, 8), ._, ._ },
|
||||||
|
.{ ._, ._, .add, .tmp1q, .tmp0q, ._, ._ },
|
||||||
|
.{ ._, ._, .mov, .memd(.dst0q, 8), .tmp1q, ._, ._ },
|
||||||
|
} },
|
||||||
|
}, .{
|
||||||
|
.required_features = .{ .@"64bit", .bmi2, .adx, null },
|
||||||
|
.src_constraints = .{
|
||||||
|
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
|
||||||
|
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
|
||||||
|
.any,
|
||||||
|
},
|
||||||
|
.patterns = &.{
|
||||||
|
.{ .src = .{ .to_mem, .to_mem, .none } },
|
||||||
|
},
|
||||||
|
.extra_temps = .{
|
||||||
|
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .u64, .kind = .{ .reg = .rdx } },
|
||||||
|
.{ .type = .isize, .kind = .{ .reg = .rcx } },
|
||||||
|
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.unused,
|
||||||
|
.unused,
|
||||||
|
},
|
||||||
|
.dst_temps = .{ .mem, .unused },
|
||||||
|
.clobbers = .{ .eflags = true },
|
||||||
|
.each = .{ .once = &.{
|
||||||
|
.{ ._, ._, .mov, .tmp0d, .sia(-8, .src0, .add_size), ._, ._ },
|
||||||
|
.{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ },
|
||||||
|
.{ .@"0:", ._, .xor, .tmp2d, .tmp2d, ._, ._ },
|
||||||
|
.{ ._, ._, .@"or", .tmp2q, .memi(.src0q, .tmp0), ._, ._ },
|
||||||
|
.{ ._, ._z, .j, .@"2f", ._, ._, ._ },
|
||||||
|
.{ ._, ._, .lea, .tmp3p, .leaad(.tmp0, .sub_src0_size, 8), ._, ._ },
|
||||||
|
.{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
|
||||||
|
.{ .@"1:", ._x, .mul, .tmp6q, .tmp5q, .leai(.tmp1q, .tmp3), ._ },
|
||||||
|
.{ ._, ._x, .adc, .tmp5q, .tmp4q, ._, ._ },
|
||||||
|
.{ ._, ._, .mov, .memiad(.dst0q, .tmp3, .add_size, -8), .tmp5q, ._, ._ },
|
||||||
|
.{ ._, ._rcxz, .j, .@"1f", ._, ._, ._ },
|
||||||
|
.{ ._, ._x, .ado, .tmp6q, .memia(.dst0q, .tmp3, .add_size), ._, ._ },
|
||||||
|
.{ ._, ._, .mov, .tmp4q, .tmp6q, ._, ._ },
|
||||||
|
.{ ._, ._, .lea, .tmp3p, .lead(.tmp3, 8), ._, ._ },
|
||||||
|
.{ ._, ._mp, .j, .@"1b", ._, ._, ._ },
|
||||||
|
.{ .@"2:", ._, .mov, .memi(.dst0q, .tmp0), .tmp2q, ._, ._ },
|
||||||
|
.{ .@"1:", ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
|
||||||
|
.{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ },
|
||||||
|
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
|
||||||
|
} },
|
||||||
|
}, .{
|
||||||
|
.required_features = .{ .@"64bit", .bmi2, .slow_incdec, null },
|
||||||
|
.src_constraints = .{
|
||||||
|
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
|
||||||
|
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
|
||||||
|
.any,
|
||||||
|
},
|
||||||
|
.patterns = &.{
|
||||||
|
.{ .src = .{ .to_mem, .to_mem, .none } },
|
||||||
|
},
|
||||||
|
.extra_temps = .{
|
||||||
|
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .u64, .kind = .{ .reg = .rdx } },
|
||||||
|
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.unused,
|
||||||
|
},
|
||||||
|
.dst_temps = .{ .mem, .unused },
|
||||||
|
.clobbers = .{ .eflags = true },
|
||||||
|
.each = .{ .once = &.{
|
||||||
|
.{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_size_div_8), ._, ._ },
|
||||||
|
.{ ._, ._, .lea, .tmp1p, .memd(.src1, 8), ._, ._ },
|
||||||
|
.{ .@"0:", ._, .lea, .tmp2p, .leaa(.tmp0, .sub_src0_size_div_8), ._, ._ },
|
||||||
|
.{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
|
||||||
|
.{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
|
||||||
|
.{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ },
|
||||||
|
.{ ._, ._, .@"or", .tmp3q, .memsi(.src0q, .@"8", .tmp0), ._, ._ },
|
||||||
|
.{ ._, ._nz, .j, .@"2f", ._, ._, ._ },
|
||||||
|
.{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp3q, ._, ._ },
|
||||||
|
.{ ._, ._mp, .j, .@"3f", ._, ._, ._ },
|
||||||
|
.{ .@"1:", ._, .adc, .tmp7q, .memsia(.dst0q, .@"8", .tmp2, .add_size), ._, ._ },
|
||||||
|
.{ ._, ._, .adc, .tmp4b, .si(0), ._, ._ },
|
||||||
|
.{ ._, ._, .mov, .tmp5q, .tmp7q, ._, ._ },
|
||||||
|
.{ ._, ._l, .sh, .tmp4b, .ui(4), ._, ._ },
|
||||||
|
.{ .@"2:", ._x, .mul, .tmp7q, .tmp6q, .leasi(.tmp1q, .@"8", .tmp2), ._ },
|
||||||
|
.{ ._, ._, .adc, .tmp6q, .tmp5q, ._, ._ },
|
||||||
|
.{ ._, ._, .mov, .memsia(.dst0q, .@"8", .tmp2, .add_size), .tmp6q, ._, ._ },
|
||||||
|
.{ ._, ._c, .in, .tmp2p, ._, ._, ._ },
|
||||||
|
.{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
|
||||||
|
.{ .@"3:", ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
|
||||||
|
.{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
|
||||||
|
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
|
||||||
|
} },
|
||||||
|
}, .{
|
||||||
|
.required_features = .{ .@"64bit", .bmi2, null, null },
|
||||||
|
.src_constraints = .{
|
||||||
|
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
|
||||||
|
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
|
||||||
|
.any,
|
||||||
|
},
|
||||||
|
.patterns = &.{
|
||||||
|
.{ .src = .{ .to_mem, .to_mem, .none } },
|
||||||
|
},
|
||||||
|
.extra_temps = .{
|
||||||
|
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .u64, .kind = .{ .reg = .rdx } },
|
||||||
|
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.unused,
|
||||||
|
},
|
||||||
|
.dst_temps = .{ .mem, .unused },
|
||||||
|
.clobbers = .{ .eflags = true },
|
||||||
|
.each = .{ .once = &.{
|
||||||
|
.{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_size_div_8), ._, ._ },
|
||||||
|
.{ ._, ._, .lea, .tmp1p, .memd(.src1, 8), ._, ._ },
|
||||||
|
.{ .@"0:", ._, .lea, .tmp2p, .leaa(.tmp0, .sub_src0_size_div_8), ._, ._ },
|
||||||
|
.{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
|
||||||
|
.{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
|
||||||
|
.{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ },
|
||||||
|
.{ ._, ._, .@"or", .tmp3q, .memsi(.src0q, .@"8", .tmp0), ._, ._ },
|
||||||
|
.{ ._, ._nz, .j, .@"2f", ._, ._, ._ },
|
||||||
|
.{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp3q, ._, ._ },
|
||||||
|
.{ ._, ._mp, .j, .@"3f", ._, ._, ._ },
|
||||||
|
.{ .@"1:", ._, .adc, .tmp7q, .memsia(.dst0q, .@"8", .tmp2, .add_size), ._, ._ },
|
||||||
|
.{ ._, ._, .adc, .tmp4b, .si(0), ._, ._ },
|
||||||
|
.{ ._, ._, .mov, .tmp5q, .tmp7q, ._, ._ },
|
||||||
|
.{ ._, ._l, .sh, .tmp4b, .ui(4), ._, ._ },
|
||||||
|
.{ .@"2:", ._x, .mul, .tmp7q, .tmp6q, .leasi(.tmp1q, .@"8", .tmp2), ._ },
|
||||||
|
.{ ._, ._, .adc, .tmp6q, .tmp5q, ._, ._ },
|
||||||
|
.{ ._, ._, .mov, .memsia(.dst0q, .@"8", .tmp2, .add_size), .tmp6q, ._, ._ },
|
||||||
|
.{ ._, ._c, .in, .tmp2p, ._, ._, ._ },
|
||||||
|
.{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
|
||||||
|
.{ .@"3:", ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
|
||||||
|
.{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
|
||||||
|
.{ ._, ._ns, .j, .@"0b", ._, ._, ._ },
|
||||||
|
} },
|
||||||
|
}, .{
|
||||||
|
.required_features = .{ .@"64bit", .slow_incdec, null, null },
|
||||||
|
.src_constraints = .{
|
||||||
|
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
|
||||||
|
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
|
||||||
|
.any,
|
||||||
|
},
|
||||||
|
.patterns = &.{
|
||||||
|
.{ .src = .{ .to_mem, .to_mem, .none } },
|
||||||
|
},
|
||||||
|
.extra_temps = .{
|
||||||
|
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .u64, .kind = .{ .reg = .rax } },
|
||||||
|
.{ .type = .u64, .kind = .{ .reg = .rdx } },
|
||||||
|
.unused,
|
||||||
|
},
|
||||||
|
.dst_temps = .{ .mem, .unused },
|
||||||
|
.clobbers = .{ .eflags = true },
|
||||||
|
.each = .{ .once = &.{
|
||||||
|
.{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_size_div_8), ._, ._ },
|
||||||
|
.{ ._, ._, .lea, .tmp1p, .memd(.src1, 8), ._, ._ },
|
||||||
|
.{ .@"0:", ._, .lea, .tmp2p, .leaa(.tmp0, .sub_src0_size_div_8), ._, ._ },
|
||||||
|
.{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
|
||||||
|
.{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
|
||||||
|
.{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ },
|
||||||
|
.{ ._, ._, .@"or", .tmp3q, .memsi(.src0q, .@"8", .tmp0), ._, ._ },
|
||||||
|
.{ ._, ._nz, .j, .@"2f", ._, ._, ._ },
|
||||||
|
.{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp3q, ._, ._ },
|
||||||
|
.{ ._, ._mp, .j, .@"3f", ._, ._, ._ },
|
||||||
|
.{ .@"1:", ._, .adc, .tmp7q, .memsia(.dst0q, .@"8", .tmp2, .add_size), ._, ._ },
|
||||||
|
.{ ._, ._, .adc, .tmp4b, .si(0), ._, ._ },
|
||||||
|
.{ ._, ._, .mov, .tmp5q, .tmp7q, ._, ._ },
|
||||||
|
.{ .@"2:", ._, .mov, .tmp6q, .tmp3q, ._, ._ },
|
||||||
|
.{ ._, ._, .mul, .leasi(.tmp1q, .@"8", .tmp2), ._, ._, ._ },
|
||||||
|
.{ ._, ._l, .sh, .tmp4b, .ui(4), ._, ._ },
|
||||||
|
.{ ._, ._, .adc, .tmp6q, .tmp5q, ._, ._ },
|
||||||
|
.{ ._, ._, .mov, .memsia(.dst0q, .@"8", .tmp2, .add_size), .tmp6q, ._, ._ },
|
||||||
|
.{ ._, ._c, .in, .tmp2p, ._, ._, ._ },
|
||||||
|
.{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
|
||||||
|
.{ .@"3:", ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
|
||||||
|
.{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
|
||||||
|
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
|
||||||
|
} },
|
||||||
|
}, .{
|
||||||
|
.required_features = .{ .@"64bit", null, null, null },
|
||||||
|
.src_constraints = .{
|
||||||
|
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
|
||||||
|
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
|
||||||
|
.any,
|
||||||
|
},
|
||||||
|
.patterns = &.{
|
||||||
|
.{ .src = .{ .to_mem, .to_mem, .none } },
|
||||||
|
},
|
||||||
|
.extra_temps = .{
|
||||||
|
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
|
||||||
|
.{ .type = .u64, .kind = .{ .reg = .rax } },
|
||||||
|
.{ .type = .u64, .kind = .{ .reg = .rdx } },
|
||||||
|
.unused,
|
||||||
|
},
|
||||||
|
.dst_temps = .{ .mem, .unused },
|
||||||
|
.clobbers = .{ .eflags = true },
|
||||||
|
.each = .{ .once = &.{
|
||||||
|
.{ ._, ._, .mov, .tmp0d, .sia(-1, .src0, .add_size_div_8), ._, ._ },
|
||||||
|
.{ ._, ._, .lea, .tmp1p, .memd(.src1, 8), ._, ._ },
|
||||||
|
.{ .@"0:", ._, .lea, .tmp2p, .leaa(.tmp0, .sub_src0_size_div_8), ._, ._ },
|
||||||
|
.{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
|
||||||
|
.{ ._, ._, .xor, .tmp4d, .tmp4d, ._, ._ },
|
||||||
|
.{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ },
|
||||||
|
.{ ._, ._, .@"or", .tmp3q, .memsi(.src0q, .@"8", .tmp0), ._, ._ },
|
||||||
|
.{ ._, ._nz, .j, .@"2f", ._, ._, ._ },
|
||||||
|
.{ ._, ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp3q, ._, ._ },
|
||||||
|
.{ ._, ._mp, .j, .@"3f", ._, ._, ._ },
|
||||||
|
.{ .@"1:", ._, .adc, .tmp7q, .memsia(.dst0q, .@"8", .tmp2, .add_size), ._, ._ },
|
||||||
|
.{ ._, ._, .adc, .tmp4b, .si(0), ._, ._ },
|
||||||
|
.{ ._, ._, .mov, .tmp5q, .tmp7q, ._, ._ },
|
||||||
|
.{ .@"2:", ._, .mov, .tmp6q, .tmp3q, ._, ._ },
|
||||||
|
.{ ._, ._, .mul, .leasi(.tmp1q, .@"8", .tmp2), ._, ._, ._ },
|
||||||
|
.{ ._, ._l, .sh, .tmp4b, .ui(4), ._, ._ },
|
||||||
|
.{ ._, ._, .adc, .tmp6q, .tmp5q, ._, ._ },
|
||||||
|
.{ ._, ._, .mov, .memsia(.dst0q, .@"8", .tmp2, .add_size), .tmp6q, ._, ._ },
|
||||||
|
.{ ._, ._c, .in, .tmp2p, ._, ._, ._ },
|
||||||
|
.{ ._, ._nz, .j, .@"1b", ._, ._, ._ },
|
||||||
|
.{ .@"3:", ._, .lea, .tmp1p, .lead(.tmp1, 8), ._, ._ },
|
||||||
|
.{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
|
||||||
|
.{ ._, ._ns, .j, .@"0b", ._, ._, ._ },
|
||||||
|
} },
|
||||||
|
}, .{
|
||||||
.required_features = .{ .f16c, null, null, null },
|
.required_features = .{ .f16c, null, null, null },
|
||||||
.src_constraints = .{
|
.src_constraints = .{
|
||||||
.{ .scalar_float = .{ .of = .word, .is = .word } },
|
.{ .scalar_float = .{ .of = .word, .is = .word } },
|
||||||
@ -6890,7 +7257,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
|
|||||||
} }) catch |err| switch (err) {
|
} }) catch |err| switch (err) {
|
||||||
error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
|
error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
|
||||||
@tagName(air_tag),
|
@tagName(air_tag),
|
||||||
cg.typeOf(bin_op.lhs).fmt(pt),
|
ty.fmt(pt),
|
||||||
ops[0].tracking(cg),
|
ops[0].tracking(cg),
|
||||||
ops[1].tracking(cg),
|
ops[1].tracking(cg),
|
||||||
}),
|
}),
|
||||||
@ -92700,7 +93067,7 @@ const Select = struct {
|
|||||||
const mir_tag: Mir.Inst.FixedTag = .{ inst[1], inst[2] };
|
const mir_tag: Mir.Inst.FixedTag = .{ inst[1], inst[2] };
|
||||||
pseudo: {
|
pseudo: {
|
||||||
switch (inst[0]) {
|
switch (inst[0]) {
|
||||||
.@"0:", .@"1:", .@"2:" => |label| s.emitLabel(label),
|
.@"0:", .@"1:", .@"2:", .@"3:" => |label| s.emitLabel(label),
|
||||||
._ => {},
|
._ => {},
|
||||||
.pseudo => break :pseudo,
|
.pseudo => break :pseudo,
|
||||||
}
|
}
|
||||||
@ -93578,7 +93945,7 @@ const Select = struct {
|
|||||||
Select.Operand,
|
Select.Operand,
|
||||||
Select.Operand,
|
Select.Operand,
|
||||||
};
|
};
|
||||||
const Label = enum { @"0:", @"1:", @"2:", @"_", pseudo };
|
const Label = enum { @"0:", @"1:", @"2:", @"3:", @"_", pseudo };
|
||||||
const Operand = struct {
|
const Operand = struct {
|
||||||
flags: packed struct(u16) {
|
flags: packed struct(u16) {
|
||||||
tag: Tag,
|
tag: Tag,
|
||||||
@ -93609,6 +93976,7 @@ const Select = struct {
|
|||||||
ptr_size,
|
ptr_size,
|
||||||
ptr_bit_size,
|
ptr_bit_size,
|
||||||
size,
|
size,
|
||||||
|
src0_size,
|
||||||
delta_size,
|
delta_size,
|
||||||
delta_elem_size,
|
delta_elem_size,
|
||||||
size_add_elem_size,
|
size_add_elem_size,
|
||||||
@ -93641,6 +94009,8 @@ const Select = struct {
|
|||||||
const sub_size_div_8: Adjust = .{ .sign = .neg, .lhs = .size, .op = .div, .rhs = .@"8" };
|
const sub_size_div_8: Adjust = .{ .sign = .neg, .lhs = .size, .op = .div, .rhs = .@"8" };
|
||||||
const sub_size_div_4: Adjust = .{ .sign = .neg, .lhs = .size, .op = .div, .rhs = .@"4" };
|
const sub_size_div_4: Adjust = .{ .sign = .neg, .lhs = .size, .op = .div, .rhs = .@"4" };
|
||||||
const sub_size: Adjust = .{ .sign = .neg, .lhs = .size, .op = .mul, .rhs = .@"1" };
|
const sub_size: Adjust = .{ .sign = .neg, .lhs = .size, .op = .mul, .rhs = .@"1" };
|
||||||
|
const sub_src0_size_div_8: Adjust = .{ .sign = .neg, .lhs = .src0_size, .op = .div, .rhs = .@"8" };
|
||||||
|
const sub_src0_size: Adjust = .{ .sign = .neg, .lhs = .src0_size, .op = .mul, .rhs = .@"1" };
|
||||||
const add_delta_size_div_8: Adjust = .{ .sign = .pos, .lhs = .delta_size, .op = .div, .rhs = .@"8" };
|
const add_delta_size_div_8: Adjust = .{ .sign = .pos, .lhs = .delta_size, .op = .div, .rhs = .@"8" };
|
||||||
const add_delta_elem_size: Adjust = .{ .sign = .pos, .lhs = .delta_elem_size, .op = .mul, .rhs = .@"1" };
|
const add_delta_elem_size: Adjust = .{ .sign = .pos, .lhs = .delta_elem_size, .op = .mul, .rhs = .@"1" };
|
||||||
const add_delta_elem_size_div_8: Adjust = .{ .sign = .pos, .lhs = .delta_elem_size, .op = .div, .rhs = .@"8" };
|
const add_delta_elem_size_div_8: Adjust = .{ .sign = .pos, .lhs = .delta_elem_size, .op = .div, .rhs = .@"8" };
|
||||||
@ -93882,6 +94252,8 @@ const Select = struct {
|
|||||||
const @"1f": Select.Operand = .{ .flags = .{ .tag = .forward_label }, .base = .{ .ref = .tmp1, .size = .none } };
|
const @"1f": Select.Operand = .{ .flags = .{ .tag = .forward_label }, .base = .{ .ref = .tmp1, .size = .none } };
|
||||||
const @"2b": Select.Operand = .{ .flags = .{ .tag = .backward_label }, .base = .{ .ref = .tmp2, .size = .none } };
|
const @"2b": Select.Operand = .{ .flags = .{ .tag = .backward_label }, .base = .{ .ref = .tmp2, .size = .none } };
|
||||||
const @"2f": Select.Operand = .{ .flags = .{ .tag = .forward_label }, .base = .{ .ref = .tmp2, .size = .none } };
|
const @"2f": Select.Operand = .{ .flags = .{ .tag = .forward_label }, .base = .{ .ref = .tmp2, .size = .none } };
|
||||||
|
const @"3b": Select.Operand = .{ .flags = .{ .tag = .backward_label }, .base = .{ .ref = .tmp3, .size = .none } };
|
||||||
|
const @"3f": Select.Operand = .{ .flags = .{ .tag = .forward_label }, .base = .{ .ref = .tmp3, .size = .none } };
|
||||||
|
|
||||||
const tmp0b: Select.Operand = .{ .flags = .{ .tag = .ref }, .base = .tmp0b };
|
const tmp0b: Select.Operand = .{ .flags = .{ .tag = .ref }, .base = .tmp0b };
|
||||||
const tmp0w: Select.Operand = .{ .flags = .{ .tag = .ref }, .base = .tmp0w };
|
const tmp0w: Select.Operand = .{ .flags = .{ .tag = .ref }, .base = .tmp0w };
|
||||||
@ -94070,6 +94442,13 @@ const Select = struct {
|
|||||||
.base = base,
|
.base = base,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
fn leaad(base: Ref.Sized, adjust: Adjust, disp: i32) Select.Operand {
|
||||||
|
return .{
|
||||||
|
.flags = .{ .tag = .lea, .adjust = adjust },
|
||||||
|
.base = base,
|
||||||
|
.imm = disp,
|
||||||
|
};
|
||||||
|
}
|
||||||
fn lead(base: Ref.Sized, disp: i32) Select.Operand {
|
fn lead(base: Ref.Sized, disp: i32) Select.Operand {
|
||||||
return .{
|
return .{
|
||||||
.flags = .{ .tag = .lea },
|
.flags = .{ .tag = .lea },
|
||||||
@ -94226,6 +94605,7 @@ const Select = struct {
|
|||||||
.ptr_size => @divExact(s.cg.target.ptrBitWidth(), 8),
|
.ptr_size => @divExact(s.cg.target.ptrBitWidth(), 8),
|
||||||
.ptr_bit_size => s.cg.target.ptrBitWidth(),
|
.ptr_bit_size => s.cg.target.ptrBitWidth(),
|
||||||
.size => @intCast(op.base.ref.typeOf(s).abiSize(s.cg.pt.zcu)),
|
.size => @intCast(op.base.ref.typeOf(s).abiSize(s.cg.pt.zcu)),
|
||||||
|
.src0_size => @intCast(Select.Operand.Ref.src0.typeOf(s).abiSize(s.cg.pt.zcu)),
|
||||||
.delta_size => @intCast(@as(SignedImm, @intCast(op.base.ref.typeOf(s).abiSize(s.cg.pt.zcu))) -
|
.delta_size => @intCast(@as(SignedImm, @intCast(op.base.ref.typeOf(s).abiSize(s.cg.pt.zcu))) -
|
||||||
@as(SignedImm, @intCast(op.index.ref.typeOf(s).abiSize(s.cg.pt.zcu)))),
|
@as(SignedImm, @intCast(op.index.ref.typeOf(s).abiSize(s.cg.pt.zcu)))),
|
||||||
.delta_elem_size => @intCast(@as(SignedImm, @intCast(op.base.ref.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu))) -
|
.delta_elem_size => @intCast(@as(SignedImm, @intCast(op.base.ref.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu))) -
|
||||||
|
|||||||
@ -88,13 +88,32 @@ pub fn emitMir(emit: *Emit) Error!void {
|
|||||||
lowered_relocs[0].lowered_inst_index == lowered_index) : ({
|
lowered_relocs[0].lowered_inst_index == lowered_index) : ({
|
||||||
lowered_relocs = lowered_relocs[1..];
|
lowered_relocs = lowered_relocs[1..];
|
||||||
}) switch (lowered_relocs[0].target) {
|
}) switch (lowered_relocs[0].target) {
|
||||||
.inst => |target| try relocs.append(emit.lower.allocator, .{
|
.inst => |target| {
|
||||||
.source = start_offset,
|
const inst_length: u4 = @intCast(end_offset - start_offset);
|
||||||
.source_offset = end_offset - 4,
|
const reloc_offset, const reloc_length = reloc_offset_length: {
|
||||||
.target = target,
|
var reloc_offset = inst_length;
|
||||||
.target_offset = lowered_relocs[0].off,
|
var op_index: usize = lowered_inst.ops.len;
|
||||||
.length = @intCast(end_offset - start_offset),
|
while (true) {
|
||||||
}),
|
op_index -= 1;
|
||||||
|
const op = lowered_inst.encoding.data.ops[op_index];
|
||||||
|
if (op == .none) continue;
|
||||||
|
const enc_length: u4 = @intCast(
|
||||||
|
std.math.divCeil(u7, @intCast(op.immBitSize()), 8) catch unreachable,
|
||||||
|
);
|
||||||
|
reloc_offset -= enc_length;
|
||||||
|
if (op_index == lowered_relocs[0].op_index)
|
||||||
|
break :reloc_offset_length .{ reloc_offset, enc_length };
|
||||||
|
}
|
||||||
|
};
|
||||||
|
try relocs.append(emit.lower.allocator, .{
|
||||||
|
.inst_offset = start_offset,
|
||||||
|
.inst_length = inst_length,
|
||||||
|
.source_offset = reloc_offset,
|
||||||
|
.source_length = reloc_length,
|
||||||
|
.target = target,
|
||||||
|
.target_offset = lowered_relocs[0].off,
|
||||||
|
});
|
||||||
|
},
|
||||||
.table => try table_relocs.append(emit.lower.allocator, .{
|
.table => try table_relocs.append(emit.lower.allocator, .{
|
||||||
.source_offset = end_offset - 4,
|
.source_offset = end_offset - 4,
|
||||||
.target_offset = lowered_relocs[0].off,
|
.target_offset = lowered_relocs[0].off,
|
||||||
@ -409,7 +428,7 @@ pub fn emitMir(emit: *Emit) Error!void {
|
|||||||
} } };
|
} } };
|
||||||
},
|
},
|
||||||
.pseudo_dbg_local_am => loc: {
|
.pseudo_dbg_local_am => loc: {
|
||||||
const mem = emit.lower.mem(mir_inst.data.ax.payload);
|
const mem = emit.lower.mem(undefined, mir_inst.data.ax.payload);
|
||||||
break :loc .{ mir_inst.data.ax.air_inst, .{ .plus = .{
|
break :loc .{ mir_inst.data.ax.air_inst, .{ .plus = .{
|
||||||
base: {
|
base: {
|
||||||
loc_buf[0] = switch (mem.base()) {
|
loc_buf[0] = switch (mem.base()) {
|
||||||
@ -466,15 +485,18 @@ pub fn emitMir(emit: *Emit) Error!void {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
{
|
for (relocs.items) |reloc| {
|
||||||
// TODO this function currently assumes all relocs via JMP/CALL instructions are 32bit in size.
|
const target = code_offset_mapping[reloc.target];
|
||||||
// This should be reversed like it is done in aarch64 MIR emit code: start with the smallest
|
const disp = @as(i64, @intCast(target)) - @as(i64, @intCast(reloc.inst_offset + reloc.inst_length)) + reloc.target_offset;
|
||||||
// possible resolution, i.e., 8bit, and iteratively converge on the minimum required resolution
|
const inst_bytes = emit.code.items[reloc.inst_offset..][0..reloc.inst_length];
|
||||||
// until the entire decl is correctly emitted with all JMP/CALL instructions within range.
|
switch (reloc.source_length) {
|
||||||
for (relocs.items) |reloc| {
|
else => unreachable,
|
||||||
const target = code_offset_mapping[reloc.target];
|
inline 1, 4 => |source_length| std.mem.writeInt(
|
||||||
const disp = @as(i64, @intCast(target)) - @as(i64, @intCast(reloc.source + reloc.length)) + reloc.target_offset;
|
@Type(.{ .int = .{ .signedness = .signed, .bits = @as(u16, 8) * source_length } }),
|
||||||
std.mem.writeInt(i32, emit.code.items[reloc.source_offset..][0..4], @intCast(disp), .little);
|
inst_bytes[reloc.source_offset..][0..source_length],
|
||||||
|
@intCast(disp),
|
||||||
|
.little,
|
||||||
|
),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (emit.lower.mir.table.len > 0) {
|
if (emit.lower.mir.table.len > 0) {
|
||||||
@ -511,15 +533,17 @@ fn fail(emit: *Emit, comptime format: []const u8, args: anytype) Error {
|
|||||||
|
|
||||||
const Reloc = struct {
|
const Reloc = struct {
|
||||||
/// Offset of the instruction.
|
/// Offset of the instruction.
|
||||||
source: u32,
|
inst_offset: u32,
|
||||||
|
/// Length of the instruction.
|
||||||
|
inst_length: u4,
|
||||||
/// Offset of the relocation within the instruction.
|
/// Offset of the relocation within the instruction.
|
||||||
source_offset: u32,
|
source_offset: u4,
|
||||||
|
/// Length of the relocation.
|
||||||
|
source_length: u4,
|
||||||
/// Target of the relocation.
|
/// Target of the relocation.
|
||||||
target: Mir.Inst.Index,
|
target: Mir.Inst.Index,
|
||||||
/// Offset from the target instruction.
|
/// Offset from the target.
|
||||||
target_offset: i32,
|
target_offset: i32,
|
||||||
/// Length of the instruction.
|
|
||||||
length: u5,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const TableReloc = struct {
|
const TableReloc = struct {
|
||||||
|
|||||||
@ -304,20 +304,20 @@ pub const Mnemonic = enum {
|
|||||||
jnc, jne, jng, jnge, jnl, jnle, jno, jnp, jns, jnz, jo, jp, jpe, jpo, jrcxz, js, jz,
|
jnc, jne, jng, jnge, jnl, jnle, jno, jnp, jns, jnz, jo, jp, jpe, jpo, jrcxz, js, jz,
|
||||||
lahf, lar, lea, leave, lfence, lgdt, lidt, lldt, lmsw, loop, loope, loopne,
|
lahf, lar, lea, leave, lfence, lgdt, lidt, lldt, lmsw, loop, loope, loopne,
|
||||||
lods, lodsb, lodsd, lodsq, lodsw,
|
lods, lodsb, lodsd, lodsq, lodsw,
|
||||||
lsl, ltr, lzcnt,
|
lsl, ltr,
|
||||||
mfence, mov, movbe,
|
mfence, mov, movbe,
|
||||||
movs, movsb, movsd, movsq, movsw,
|
movs, movsb, movsd, movsq, movsw,
|
||||||
movsx, movsxd, movzx, mul,
|
movsx, movsxd, movzx, mul,
|
||||||
neg, nop, not,
|
neg, nop, not,
|
||||||
@"or", out, outs, outsb, outsd, outsw,
|
@"or", out, outs, outsb, outsd, outsw,
|
||||||
pause, pop, popcnt, popf, popfd, popfq, push, pushfq,
|
pause, pop, popf, popfd, popfq, push, pushfq,
|
||||||
rcl, rcr,
|
rcl, rcr,
|
||||||
rdfsbase, rdgsbase, rdmsr, rdpid, rdpkru, rdpmc, rdrand, rdseed, rdssd, rdssq, rdtsc, rdtscp,
|
rdfsbase, rdgsbase, rdmsr, rdpid, rdpkru, rdpmc, rdrand, rdseed, rdssd, rdssq, rdtsc, rdtscp,
|
||||||
ret, rol, ror, rorx, rsm,
|
ret, rol, ror, rsm,
|
||||||
sahf, sal, sar, sarx, sbb,
|
sahf, sal, sar, sbb,
|
||||||
scas, scasb, scasd, scasq, scasw,
|
scas, scasb, scasd, scasq, scasw,
|
||||||
senduipi, serialize,
|
senduipi, serialize,
|
||||||
shl, shld, shlx, shr, shrd, shrx,
|
shl, shld, shr, shrd,
|
||||||
stac, stc, std, sti, str, stui,
|
stac, stc, std, sti, str, stui,
|
||||||
sub, swapgs, syscall, sysenter, sysexit, sysret,
|
sub, swapgs, syscall, sysenter, sysexit, sysret,
|
||||||
seta, setae, setb, setbe, setc, sete, setg, setge, setl, setle, setna, setnae,
|
seta, setae, setb, setbe, setc, sete, setg, setge, setl, setle, setna, setnae,
|
||||||
@ -433,6 +433,8 @@ pub const Mnemonic = enum {
|
|||||||
roundpd, roundps, roundsd, roundss,
|
roundpd, roundps, roundsd, roundss,
|
||||||
// SSE4.2
|
// SSE4.2
|
||||||
crc32, pcmpgtq,
|
crc32, pcmpgtq,
|
||||||
|
// ABM
|
||||||
|
lzcnt, popcnt,
|
||||||
// PCLMUL
|
// PCLMUL
|
||||||
pclmulqdq,
|
pclmulqdq,
|
||||||
// AES
|
// AES
|
||||||
@ -440,7 +442,6 @@ pub const Mnemonic = enum {
|
|||||||
// SHA
|
// SHA
|
||||||
sha1rnds4, sha1nexte, sha1msg1, sha1msg2, sha256msg1, sha256msg2, sha256rnds2,
|
sha1rnds4, sha1nexte, sha1msg1, sha1msg2, sha256msg1, sha256msg2, sha256rnds2,
|
||||||
// AVX
|
// AVX
|
||||||
andn, bextr, blsi, blsmsk, blsr, bzhi, tzcnt,
|
|
||||||
vaddpd, vaddps, vaddsd, vaddss, vaddsubpd, vaddsubps,
|
vaddpd, vaddps, vaddsd, vaddss, vaddsubpd, vaddsubps,
|
||||||
vaesdec, vaesdeclast, vaesenc, vaesenclast, vaesimc, vaeskeygenassist,
|
vaesdec, vaesdeclast, vaesenc, vaesenclast, vaesimc, vaeskeygenassist,
|
||||||
vandnpd, vandnps, vandpd, vandps,
|
vandnpd, vandnps, vandpd, vandps,
|
||||||
@ -506,6 +507,10 @@ pub const Mnemonic = enum {
|
|||||||
vtestpd, vtestps,
|
vtestpd, vtestps,
|
||||||
vucomisd, vucomiss, vunpckhpd, vunpckhps, vunpcklpd, vunpcklps,
|
vucomisd, vucomiss, vunpckhpd, vunpckhps, vunpcklpd, vunpcklps,
|
||||||
vxorpd, vxorps,
|
vxorpd, vxorps,
|
||||||
|
// BMI
|
||||||
|
andn, bextr, blsi, blsmsk, blsr, tzcnt,
|
||||||
|
// BMI2
|
||||||
|
bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx,
|
||||||
// F16C
|
// F16C
|
||||||
vcvtph2ps, vcvtps2ph,
|
vcvtph2ps, vcvtps2ph,
|
||||||
// FMA
|
// FMA
|
||||||
|
|||||||
@ -10,32 +10,38 @@ mir: Mir,
|
|||||||
cc: std.builtin.CallingConvention,
|
cc: std.builtin.CallingConvention,
|
||||||
err_msg: ?*Zcu.ErrorMsg = null,
|
err_msg: ?*Zcu.ErrorMsg = null,
|
||||||
src_loc: Zcu.LazySrcLoc,
|
src_loc: Zcu.LazySrcLoc,
|
||||||
result_insts_len: u8 = undefined,
|
result_insts_len: ResultInstIndex = undefined,
|
||||||
result_relocs_len: u8 = undefined,
|
result_insts: [max_result_insts]Instruction = undefined,
|
||||||
result_insts: [
|
result_relocs_len: ResultRelocIndex = undefined,
|
||||||
@max(
|
result_relocs: [max_result_relocs]Reloc = undefined,
|
||||||
1, // non-pseudo instructions
|
|
||||||
3, // (ELF only) TLS local dynamic (LD) sequence in PIC mode
|
const max_result_insts = @max(
|
||||||
2, // cmovcc: cmovcc \ cmovcc
|
1, // non-pseudo instructions
|
||||||
3, // setcc: setcc \ setcc \ logicop
|
3, // (ELF only) TLS local dynamic (LD) sequence in PIC mode
|
||||||
2, // jcc: jcc \ jcc
|
2, // cmovcc: cmovcc \ cmovcc
|
||||||
pseudo_probe_align_insts,
|
3, // setcc: setcc \ setcc \ logicop
|
||||||
pseudo_probe_adjust_unrolled_max_insts,
|
2, // jcc: jcc \ jcc
|
||||||
pseudo_probe_adjust_setup_insts,
|
pseudo_probe_align_insts,
|
||||||
pseudo_probe_adjust_loop_insts,
|
pseudo_probe_adjust_unrolled_max_insts,
|
||||||
abi.Win64.callee_preserved_regs.len * 2, // push_regs/pop_regs
|
pseudo_probe_adjust_setup_insts,
|
||||||
abi.SysV.callee_preserved_regs.len * 2, // push_regs/pop_regs
|
pseudo_probe_adjust_loop_insts,
|
||||||
)
|
abi.Win64.callee_preserved_regs.len * 2, // push_regs/pop_regs
|
||||||
]Instruction = undefined,
|
abi.SysV.callee_preserved_regs.len * 2, // push_regs/pop_regs
|
||||||
result_relocs: [
|
);
|
||||||
@max(
|
const max_result_relocs = @max(
|
||||||
1, // jmp/jcc/call/mov/lea: jmp/jcc/call/mov/lea
|
1, // jmp/jcc/call/mov/lea: jmp/jcc/call/mov/lea
|
||||||
2, // jcc: jcc \ jcc
|
2, // jcc: jcc \ jcc
|
||||||
2, // test \ jcc \ probe \ sub \ jmp
|
2, // test \ jcc \ probe \ sub \ jmp
|
||||||
1, // probe \ sub \ jcc
|
1, // probe \ sub \ jcc
|
||||||
3, // (ELF only) TLS local dynamic (LD) sequence in PIC mode
|
3, // (ELF only) TLS local dynamic (LD) sequence in PIC mode
|
||||||
)
|
);
|
||||||
]Reloc = undefined,
|
|
||||||
|
const ResultInstIndex = std.math.IntFittingRange(0, max_result_insts - 1);
|
||||||
|
const ResultRelocIndex = std.math.IntFittingRange(0, max_result_relocs - 1);
|
||||||
|
const InstOpIndex = std.math.IntFittingRange(
|
||||||
|
0,
|
||||||
|
@typeInfo(@FieldType(Instruction, "ops")).array.len - 1,
|
||||||
|
);
|
||||||
|
|
||||||
pub const pseudo_probe_align_insts = 5; // test \ jcc \ probe \ sub \ jmp
|
pub const pseudo_probe_align_insts = 5; // test \ jcc \ probe \ sub \ jmp
|
||||||
pub const pseudo_probe_adjust_unrolled_max_insts =
|
pub const pseudo_probe_adjust_unrolled_max_insts =
|
||||||
@ -51,7 +57,8 @@ pub const Error = error{
|
|||||||
};
|
};
|
||||||
|
|
||||||
pub const Reloc = struct {
|
pub const Reloc = struct {
|
||||||
lowered_inst_index: u8,
|
lowered_inst_index: ResultInstIndex,
|
||||||
|
op_index: InstOpIndex,
|
||||||
target: Target,
|
target: Target,
|
||||||
off: i32,
|
off: i32,
|
||||||
|
|
||||||
@ -114,11 +121,11 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
|
|||||||
assert(inst.data.rx.fixes == ._);
|
assert(inst.data.rx.fixes == ._);
|
||||||
try lower.emit(.none, .cmovnz, &.{
|
try lower.emit(.none, .cmovnz, &.{
|
||||||
.{ .reg = inst.data.rx.r1 },
|
.{ .reg = inst.data.rx.r1 },
|
||||||
.{ .mem = lower.mem(inst.data.rx.payload) },
|
.{ .mem = lower.mem(1, inst.data.rx.payload) },
|
||||||
});
|
});
|
||||||
try lower.emit(.none, .cmovp, &.{
|
try lower.emit(.none, .cmovp, &.{
|
||||||
.{ .reg = inst.data.rx.r1 },
|
.{ .reg = inst.data.rx.r1 },
|
||||||
.{ .mem = lower.mem(inst.data.rx.payload) },
|
.{ .mem = lower.mem(1, inst.data.rx.payload) },
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
.pseudo_set_z_and_np_r => {
|
.pseudo_set_z_and_np_r => {
|
||||||
@ -137,13 +144,13 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
|
|||||||
.pseudo_set_z_and_np_m => {
|
.pseudo_set_z_and_np_m => {
|
||||||
assert(inst.data.rx.fixes == ._);
|
assert(inst.data.rx.fixes == ._);
|
||||||
try lower.emit(.none, .setz, &.{
|
try lower.emit(.none, .setz, &.{
|
||||||
.{ .mem = lower.mem(inst.data.rx.payload) },
|
.{ .mem = lower.mem(0, inst.data.rx.payload) },
|
||||||
});
|
});
|
||||||
try lower.emit(.none, .setnp, &.{
|
try lower.emit(.none, .setnp, &.{
|
||||||
.{ .reg = inst.data.rx.r1 },
|
.{ .reg = inst.data.rx.r1 },
|
||||||
});
|
});
|
||||||
try lower.emit(.none, .@"and", &.{
|
try lower.emit(.none, .@"and", &.{
|
||||||
.{ .mem = lower.mem(inst.data.rx.payload) },
|
.{ .mem = lower.mem(0, inst.data.rx.payload) },
|
||||||
.{ .reg = inst.data.rx.r1 },
|
.{ .reg = inst.data.rx.r1 },
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
@ -163,32 +170,32 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
|
|||||||
.pseudo_set_nz_or_p_m => {
|
.pseudo_set_nz_or_p_m => {
|
||||||
assert(inst.data.rx.fixes == ._);
|
assert(inst.data.rx.fixes == ._);
|
||||||
try lower.emit(.none, .setnz, &.{
|
try lower.emit(.none, .setnz, &.{
|
||||||
.{ .mem = lower.mem(inst.data.rx.payload) },
|
.{ .mem = lower.mem(0, inst.data.rx.payload) },
|
||||||
});
|
});
|
||||||
try lower.emit(.none, .setp, &.{
|
try lower.emit(.none, .setp, &.{
|
||||||
.{ .reg = inst.data.rx.r1 },
|
.{ .reg = inst.data.rx.r1 },
|
||||||
});
|
});
|
||||||
try lower.emit(.none, .@"or", &.{
|
try lower.emit(.none, .@"or", &.{
|
||||||
.{ .mem = lower.mem(inst.data.rx.payload) },
|
.{ .mem = lower.mem(0, inst.data.rx.payload) },
|
||||||
.{ .reg = inst.data.rx.r1 },
|
.{ .reg = inst.data.rx.r1 },
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
.pseudo_j_z_and_np_inst => {
|
.pseudo_j_z_and_np_inst => {
|
||||||
assert(inst.data.inst.fixes == ._);
|
assert(inst.data.inst.fixes == ._);
|
||||||
try lower.emit(.none, .jnz, &.{
|
try lower.emit(.none, .jnz, &.{
|
||||||
.{ .imm = lower.reloc(.{ .inst = index + 1 }, 0) },
|
.{ .imm = lower.reloc(0, .{ .inst = index + 1 }, 0) },
|
||||||
});
|
});
|
||||||
try lower.emit(.none, .jnp, &.{
|
try lower.emit(.none, .jnp, &.{
|
||||||
.{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }, 0) },
|
.{ .imm = lower.reloc(0, .{ .inst = inst.data.inst.inst }, 0) },
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
.pseudo_j_nz_or_p_inst => {
|
.pseudo_j_nz_or_p_inst => {
|
||||||
assert(inst.data.inst.fixes == ._);
|
assert(inst.data.inst.fixes == ._);
|
||||||
try lower.emit(.none, .jnz, &.{
|
try lower.emit(.none, .jnz, &.{
|
||||||
.{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }, 0) },
|
.{ .imm = lower.reloc(0, .{ .inst = inst.data.inst.inst }, 0) },
|
||||||
});
|
});
|
||||||
try lower.emit(.none, .jp, &.{
|
try lower.emit(.none, .jp, &.{
|
||||||
.{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }, 0) },
|
.{ .imm = lower.reloc(0, .{ .inst = inst.data.inst.inst }, 0) },
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
|
|
||||||
@ -198,7 +205,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
|
|||||||
.{ .imm = .s(@bitCast(inst.data.ri.i)) },
|
.{ .imm = .s(@bitCast(inst.data.ri.i)) },
|
||||||
});
|
});
|
||||||
try lower.emit(.none, .jz, &.{
|
try lower.emit(.none, .jz, &.{
|
||||||
.{ .imm = lower.reloc(.{ .inst = index + 1 }, 0) },
|
.{ .imm = lower.reloc(0, .{ .inst = index + 1 }, 0) },
|
||||||
});
|
});
|
||||||
try lower.emit(.none, .lea, &.{
|
try lower.emit(.none, .lea, &.{
|
||||||
.{ .reg = inst.data.ri.r1 },
|
.{ .reg = inst.data.ri.r1 },
|
||||||
@ -214,7 +221,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
|
|||||||
.{ .reg = inst.data.ri.r1.to32() },
|
.{ .reg = inst.data.ri.r1.to32() },
|
||||||
});
|
});
|
||||||
try lower.emit(.none, .jmp, &.{
|
try lower.emit(.none, .jmp, &.{
|
||||||
.{ .imm = lower.reloc(.{ .inst = index }, 0) },
|
.{ .imm = lower.reloc(0, .{ .inst = index }, 0) },
|
||||||
});
|
});
|
||||||
assert(lower.result_insts_len == pseudo_probe_align_insts);
|
assert(lower.result_insts_len == pseudo_probe_align_insts);
|
||||||
},
|
},
|
||||||
@ -260,7 +267,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct {
|
|||||||
.{ .imm = .s(page_size) },
|
.{ .imm = .s(page_size) },
|
||||||
});
|
});
|
||||||
try lower.emit(.none, .jae, &.{
|
try lower.emit(.none, .jae, &.{
|
||||||
.{ .imm = lower.reloc(.{ .inst = index }, 0) },
|
.{ .imm = lower.reloc(0, .{ .inst = index }, 0) },
|
||||||
});
|
});
|
||||||
assert(lower.result_insts_len == pseudo_probe_adjust_loop_insts);
|
assert(lower.result_insts_len == pseudo_probe_adjust_loop_insts);
|
||||||
},
|
},
|
||||||
@ -382,21 +389,22 @@ pub fn imm(lower: *const Lower, ops: Mir.Inst.Ops, i: u32) Immediate {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn mem(lower: *Lower, payload: u32) Memory {
|
pub fn mem(lower: *Lower, op_index: InstOpIndex, payload: u32) Memory {
|
||||||
var m = lower.mir.resolveFrameLoc(lower.mir.extraData(Mir.Memory, payload).data).decode();
|
var m = lower.mir.resolveFrameLoc(lower.mir.extraData(Mir.Memory, payload).data).decode();
|
||||||
switch (m) {
|
switch (m) {
|
||||||
.sib => |*sib| switch (sib.base) {
|
.sib => |*sib| switch (sib.base) {
|
||||||
else => {},
|
else => {},
|
||||||
.table => sib.disp = lower.reloc(.table, sib.disp).signed,
|
.table => sib.disp = lower.reloc(op_index, .table, sib.disp).signed,
|
||||||
},
|
},
|
||||||
else => {},
|
else => {},
|
||||||
}
|
}
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn reloc(lower: *Lower, target: Reloc.Target, off: i32) Immediate {
|
fn reloc(lower: *Lower, op_index: InstOpIndex, target: Reloc.Target, off: i32) Immediate {
|
||||||
lower.result_relocs[lower.result_relocs_len] = .{
|
lower.result_relocs[lower.result_relocs_len] = .{
|
||||||
.lowered_inst_index = lower.result_insts_len,
|
.lowered_inst_index = lower.result_insts_len,
|
||||||
|
.op_index = op_index,
|
||||||
.target = target,
|
.target = target,
|
||||||
.off = off,
|
.off = off,
|
||||||
};
|
};
|
||||||
@ -409,7 +417,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
|
|||||||
var emit_mnemonic = mnemonic;
|
var emit_mnemonic = mnemonic;
|
||||||
var emit_ops_storage: [4]Operand = undefined;
|
var emit_ops_storage: [4]Operand = undefined;
|
||||||
const emit_ops = emit_ops_storage[0..ops.len];
|
const emit_ops = emit_ops_storage[0..ops.len];
|
||||||
for (emit_ops, ops) |*emit_op, op| {
|
for (emit_ops, ops, 0..) |*emit_op, op, op_index| {
|
||||||
emit_op.* = switch (op) {
|
emit_op.* = switch (op) {
|
||||||
else => op,
|
else => op,
|
||||||
.mem => |mem_op| switch (mem_op.base()) {
|
.mem => |mem_op| switch (mem_op.base()) {
|
||||||
@ -428,20 +436,20 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
|
|||||||
if (lower.pic) {
|
if (lower.pic) {
|
||||||
// Here, we currently assume local dynamic TLS vars, and so
|
// Here, we currently assume local dynamic TLS vars, and so
|
||||||
// we emit LD model.
|
// we emit LD model.
|
||||||
_ = lower.reloc(.{ .linker_tlsld = sym_index }, 0);
|
_ = lower.reloc(1, .{ .linker_tlsld = sym_index }, 0);
|
||||||
lower.result_insts[lower.result_insts_len] = try .new(.none, .lea, &.{
|
lower.result_insts[lower.result_insts_len] = try .new(.none, .lea, &.{
|
||||||
.{ .reg = .rdi },
|
.{ .reg = .rdi },
|
||||||
.{ .mem = Memory.initRip(.none, 0) },
|
.{ .mem = Memory.initRip(.none, 0) },
|
||||||
}, lower.target);
|
}, lower.target);
|
||||||
lower.result_insts_len += 1;
|
lower.result_insts_len += 1;
|
||||||
_ = lower.reloc(.{
|
_ = lower.reloc(0, .{
|
||||||
.linker_extern_fn = try elf_file.getGlobalSymbol("__tls_get_addr", null),
|
.linker_extern_fn = try elf_file.getGlobalSymbol("__tls_get_addr", null),
|
||||||
}, 0);
|
}, 0);
|
||||||
lower.result_insts[lower.result_insts_len] = try .new(.none, .call, &.{
|
lower.result_insts[lower.result_insts_len] = try .new(.none, .call, &.{
|
||||||
.{ .imm = .s(0) },
|
.{ .imm = .s(0) },
|
||||||
}, lower.target);
|
}, lower.target);
|
||||||
lower.result_insts_len += 1;
|
lower.result_insts_len += 1;
|
||||||
_ = lower.reloc(.{ .linker_dtpoff = sym_index }, 0);
|
_ = lower.reloc(@intCast(op_index), .{ .linker_dtpoff = sym_index }, 0);
|
||||||
emit_mnemonic = .lea;
|
emit_mnemonic = .lea;
|
||||||
break :op .{ .mem = Memory.initSib(.none, .{
|
break :op .{ .mem = Memory.initSib(.none, .{
|
||||||
.base = .{ .reg = .rax },
|
.base = .{ .reg = .rax },
|
||||||
@ -454,7 +462,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
|
|||||||
.{ .mem = Memory.initSib(.qword, .{ .base = .{ .reg = .fs } }) },
|
.{ .mem = Memory.initSib(.qword, .{ .base = .{ .reg = .fs } }) },
|
||||||
}, lower.target);
|
}, lower.target);
|
||||||
lower.result_insts_len += 1;
|
lower.result_insts_len += 1;
|
||||||
_ = lower.reloc(.{ .linker_reloc = sym_index }, 0);
|
_ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0);
|
||||||
emit_mnemonic = .lea;
|
emit_mnemonic = .lea;
|
||||||
break :op .{ .mem = Memory.initSib(.none, .{
|
break :op .{ .mem = Memory.initSib(.none, .{
|
||||||
.base = .{ .reg = .rax },
|
.base = .{ .reg = .rax },
|
||||||
@ -463,15 +471,17 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_ = lower.reloc(.{ .linker_reloc = sym_index }, 0);
|
|
||||||
if (lower.pic) switch (mnemonic) {
|
if (lower.pic) switch (mnemonic) {
|
||||||
.lea => if (elf_sym.flags.is_extern_ptr) {
|
.lea => {
|
||||||
|
_ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0);
|
||||||
|
if (!elf_sym.flags.is_extern_ptr) break :op .{ .mem = Memory.initRip(.none, 0) };
|
||||||
emit_mnemonic = .mov;
|
emit_mnemonic = .mov;
|
||||||
break :op .{ .mem = Memory.initRip(.ptr, 0) };
|
break :op .{ .mem = Memory.initRip(.ptr, 0) };
|
||||||
} else break :op .{ .mem = Memory.initRip(.none, 0) },
|
},
|
||||||
.mov => {
|
.mov => {
|
||||||
if (elf_sym.flags.is_extern_ptr) {
|
if (elf_sym.flags.is_extern_ptr) {
|
||||||
const reg = ops[0].reg;
|
const reg = ops[0].reg;
|
||||||
|
_ = lower.reloc(1, .{ .linker_reloc = sym_index }, 0);
|
||||||
lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{
|
lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{
|
||||||
.{ .reg = reg.to64() },
|
.{ .reg = reg.to64() },
|
||||||
.{ .mem = Memory.initRip(.qword, 0) },
|
.{ .mem = Memory.initRip(.qword, 0) },
|
||||||
@ -481,10 +491,13 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
|
|||||||
.reg = reg.to64(),
|
.reg = reg.to64(),
|
||||||
} }) };
|
} }) };
|
||||||
}
|
}
|
||||||
|
_ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0);
|
||||||
break :op .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) };
|
break :op .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) };
|
||||||
},
|
},
|
||||||
else => unreachable,
|
else => unreachable,
|
||||||
} else switch (mnemonic) {
|
};
|
||||||
|
_ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0);
|
||||||
|
switch (mnemonic) {
|
||||||
.call => break :op .{ .mem = Memory.initSib(mem_op.sib.ptr_size, .{
|
.call => break :op .{ .mem = Memory.initSib(mem_op.sib.ptr_size, .{
|
||||||
.base = .{ .reg = .ds },
|
.base = .{ .reg = .ds },
|
||||||
}) },
|
}) },
|
||||||
@ -502,7 +515,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
|
|||||||
const macho_sym = zo.symbols.items[sym_index];
|
const macho_sym = zo.symbols.items[sym_index];
|
||||||
|
|
||||||
if (macho_sym.flags.tlv) {
|
if (macho_sym.flags.tlv) {
|
||||||
_ = lower.reloc(.{ .linker_reloc = sym_index }, 0);
|
_ = lower.reloc(1, .{ .linker_reloc = sym_index }, 0);
|
||||||
lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{
|
lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{
|
||||||
.{ .reg = .rdi },
|
.{ .reg = .rdi },
|
||||||
.{ .mem = Memory.initRip(.ptr, 0) },
|
.{ .mem = Memory.initRip(.ptr, 0) },
|
||||||
@ -516,15 +529,17 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
|
|||||||
break :op .{ .reg = .rax };
|
break :op .{ .reg = .rax };
|
||||||
}
|
}
|
||||||
|
|
||||||
_ = lower.reloc(.{ .linker_reloc = sym_index }, 0);
|
|
||||||
break :op switch (mnemonic) {
|
break :op switch (mnemonic) {
|
||||||
.lea => if (macho_sym.flags.is_extern_ptr) {
|
.lea => {
|
||||||
|
_ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0);
|
||||||
|
if (!macho_sym.flags.is_extern_ptr) break :op .{ .mem = Memory.initRip(.none, 0) };
|
||||||
emit_mnemonic = .mov;
|
emit_mnemonic = .mov;
|
||||||
break :op .{ .mem = Memory.initRip(.ptr, 0) };
|
break :op .{ .mem = Memory.initRip(.ptr, 0) };
|
||||||
} else break :op .{ .mem = Memory.initRip(.none, 0) },
|
},
|
||||||
.mov => {
|
.mov => {
|
||||||
if (macho_sym.flags.is_extern_ptr) {
|
if (macho_sym.flags.is_extern_ptr) {
|
||||||
const reg = ops[0].reg;
|
const reg = ops[0].reg;
|
||||||
|
_ = lower.reloc(1, .{ .linker_reloc = sym_index }, 0);
|
||||||
lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{
|
lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{
|
||||||
.{ .reg = reg.to64() },
|
.{ .reg = reg.to64() },
|
||||||
.{ .mem = Memory.initRip(.qword, 0) },
|
.{ .mem = Memory.initRip(.qword, 0) },
|
||||||
@ -534,6 +549,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
|
|||||||
.reg = reg.to64(),
|
.reg = reg.to64(),
|
||||||
} }) };
|
} }) };
|
||||||
}
|
}
|
||||||
|
_ = lower.reloc(@intCast(op_index), .{ .linker_reloc = sym_index }, 0);
|
||||||
break :op .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) };
|
break :op .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) };
|
||||||
},
|
},
|
||||||
else => unreachable,
|
else => unreachable,
|
||||||
@ -550,7 +566,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
|
fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
|
||||||
@setEvalBranchQuota(2_400);
|
@setEvalBranchQuota(2_500);
|
||||||
const fixes = switch (inst.ops) {
|
const fixes = switch (inst.ops) {
|
||||||
.none => inst.data.none.fixes,
|
.none => inst.data.none.fixes,
|
||||||
.inst => inst.data.inst.fixes,
|
.inst => inst.data.inst.fixes,
|
||||||
@ -595,7 +611,7 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
|
|||||||
}, switch (inst.ops) {
|
}, switch (inst.ops) {
|
||||||
.none => &.{},
|
.none => &.{},
|
||||||
.inst => &.{
|
.inst => &.{
|
||||||
.{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }, 0) },
|
.{ .imm = lower.reloc(0, .{ .inst = inst.data.inst.inst }, 0) },
|
||||||
},
|
},
|
||||||
.i_s, .i_u => &.{
|
.i_s, .i_u => &.{
|
||||||
.{ .imm = lower.imm(inst.ops, inst.data.i.i) },
|
.{ .imm = lower.imm(inst.ops, inst.data.i.i) },
|
||||||
@ -642,10 +658,10 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
|
|||||||
.{ .imm = lower.imm(inst.ops, inst.data.rri.i) },
|
.{ .imm = lower.imm(inst.ops, inst.data.rri.i) },
|
||||||
},
|
},
|
||||||
.m => &.{
|
.m => &.{
|
||||||
.{ .mem = lower.mem(inst.data.x.payload) },
|
.{ .mem = lower.mem(0, inst.data.x.payload) },
|
||||||
},
|
},
|
||||||
.mi_s, .mi_u => &.{
|
.mi_s, .mi_u => &.{
|
||||||
.{ .mem = lower.mem(inst.data.x.payload + 1) },
|
.{ .mem = lower.mem(0, inst.data.x.payload + 1) },
|
||||||
.{ .imm = lower.imm(
|
.{ .imm = lower.imm(
|
||||||
inst.ops,
|
inst.ops,
|
||||||
lower.mir.extraData(Mir.Imm32, inst.data.x.payload).data.imm,
|
lower.mir.extraData(Mir.Imm32, inst.data.x.payload).data.imm,
|
||||||
@ -653,64 +669,64 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
|
|||||||
},
|
},
|
||||||
.rm => &.{
|
.rm => &.{
|
||||||
.{ .reg = inst.data.rx.r1 },
|
.{ .reg = inst.data.rx.r1 },
|
||||||
.{ .mem = lower.mem(inst.data.rx.payload) },
|
.{ .mem = lower.mem(1, inst.data.rx.payload) },
|
||||||
},
|
},
|
||||||
.rmr => &.{
|
.rmr => &.{
|
||||||
.{ .reg = inst.data.rrx.r1 },
|
.{ .reg = inst.data.rrx.r1 },
|
||||||
.{ .mem = lower.mem(inst.data.rrx.payload) },
|
.{ .mem = lower.mem(1, inst.data.rrx.payload) },
|
||||||
.{ .reg = inst.data.rrx.r2 },
|
.{ .reg = inst.data.rrx.r2 },
|
||||||
},
|
},
|
||||||
.rmi => &.{
|
.rmi => &.{
|
||||||
.{ .reg = inst.data.rix.r1 },
|
.{ .reg = inst.data.rix.r1 },
|
||||||
.{ .mem = lower.mem(inst.data.rix.payload) },
|
.{ .mem = lower.mem(1, inst.data.rix.payload) },
|
||||||
.{ .imm = lower.imm(inst.ops, inst.data.rix.i) },
|
.{ .imm = lower.imm(inst.ops, inst.data.rix.i) },
|
||||||
},
|
},
|
||||||
.rmi_s, .rmi_u => &.{
|
.rmi_s, .rmi_u => &.{
|
||||||
.{ .reg = inst.data.rx.r1 },
|
.{ .reg = inst.data.rx.r1 },
|
||||||
.{ .mem = lower.mem(inst.data.rx.payload + 1) },
|
.{ .mem = lower.mem(1, inst.data.rx.payload + 1) },
|
||||||
.{ .imm = lower.imm(
|
.{ .imm = lower.imm(
|
||||||
inst.ops,
|
inst.ops,
|
||||||
lower.mir.extraData(Mir.Imm32, inst.data.rx.payload).data.imm,
|
lower.mir.extraData(Mir.Imm32, inst.data.rx.payload).data.imm,
|
||||||
) },
|
) },
|
||||||
},
|
},
|
||||||
.mr => &.{
|
.mr => &.{
|
||||||
.{ .mem = lower.mem(inst.data.rx.payload) },
|
.{ .mem = lower.mem(0, inst.data.rx.payload) },
|
||||||
.{ .reg = inst.data.rx.r1 },
|
.{ .reg = inst.data.rx.r1 },
|
||||||
},
|
},
|
||||||
.mrr => &.{
|
.mrr => &.{
|
||||||
.{ .mem = lower.mem(inst.data.rrx.payload) },
|
.{ .mem = lower.mem(0, inst.data.rrx.payload) },
|
||||||
.{ .reg = inst.data.rrx.r1 },
|
.{ .reg = inst.data.rrx.r1 },
|
||||||
.{ .reg = inst.data.rrx.r2 },
|
.{ .reg = inst.data.rrx.r2 },
|
||||||
},
|
},
|
||||||
.mri => &.{
|
.mri => &.{
|
||||||
.{ .mem = lower.mem(inst.data.rix.payload) },
|
.{ .mem = lower.mem(0, inst.data.rix.payload) },
|
||||||
.{ .reg = inst.data.rix.r1 },
|
.{ .reg = inst.data.rix.r1 },
|
||||||
.{ .imm = lower.imm(inst.ops, inst.data.rix.i) },
|
.{ .imm = lower.imm(inst.ops, inst.data.rix.i) },
|
||||||
},
|
},
|
||||||
.rrm => &.{
|
.rrm => &.{
|
||||||
.{ .reg = inst.data.rrx.r1 },
|
.{ .reg = inst.data.rrx.r1 },
|
||||||
.{ .reg = inst.data.rrx.r2 },
|
.{ .reg = inst.data.rrx.r2 },
|
||||||
.{ .mem = lower.mem(inst.data.rrx.payload) },
|
.{ .mem = lower.mem(2, inst.data.rrx.payload) },
|
||||||
},
|
},
|
||||||
.rrmr => &.{
|
.rrmr => &.{
|
||||||
.{ .reg = inst.data.rrrx.r1 },
|
.{ .reg = inst.data.rrrx.r1 },
|
||||||
.{ .reg = inst.data.rrrx.r2 },
|
.{ .reg = inst.data.rrrx.r2 },
|
||||||
.{ .mem = lower.mem(inst.data.rrrx.payload) },
|
.{ .mem = lower.mem(2, inst.data.rrrx.payload) },
|
||||||
.{ .reg = inst.data.rrrx.r3 },
|
.{ .reg = inst.data.rrrx.r3 },
|
||||||
},
|
},
|
||||||
.rrmi => &.{
|
.rrmi => &.{
|
||||||
.{ .reg = inst.data.rrix.r1 },
|
.{ .reg = inst.data.rrix.r1 },
|
||||||
.{ .reg = inst.data.rrix.r2 },
|
.{ .reg = inst.data.rrix.r2 },
|
||||||
.{ .mem = lower.mem(inst.data.rrix.payload) },
|
.{ .mem = lower.mem(2, inst.data.rrix.payload) },
|
||||||
.{ .imm = lower.imm(inst.ops, inst.data.rrix.i) },
|
.{ .imm = lower.imm(inst.ops, inst.data.rrix.i) },
|
||||||
},
|
},
|
||||||
.extern_fn_reloc, .rel => &.{
|
.extern_fn_reloc, .rel => &.{
|
||||||
.{ .imm = lower.reloc(.{ .linker_extern_fn = inst.data.reloc.sym_index }, inst.data.reloc.off) },
|
.{ .imm = lower.reloc(0, .{ .linker_extern_fn = inst.data.reloc.sym_index }, inst.data.reloc.off) },
|
||||||
},
|
},
|
||||||
.got_reloc, .direct_reloc, .import_reloc => ops: {
|
.got_reloc, .direct_reloc, .import_reloc => ops: {
|
||||||
const reg = inst.data.rx.r1;
|
const reg = inst.data.rx.r1;
|
||||||
const extra = lower.mir.extraData(bits.SymbolOffset, inst.data.rx.payload).data;
|
const extra = lower.mir.extraData(bits.SymbolOffset, inst.data.rx.payload).data;
|
||||||
_ = lower.reloc(switch (inst.ops) {
|
_ = lower.reloc(1, switch (inst.ops) {
|
||||||
.got_reloc => .{ .linker_got = extra.sym_index },
|
.got_reloc => .{ .linker_got = extra.sym_index },
|
||||||
.direct_reloc => .{ .linker_direct = extra.sym_index },
|
.direct_reloc => .{ .linker_direct = extra.sym_index },
|
||||||
.import_reloc => .{ .linker_import = extra.sym_index },
|
.import_reloc => .{ .linker_import = extra.sym_index },
|
||||||
|
|||||||
@ -100,6 +100,8 @@ pub const Inst = struct {
|
|||||||
/// ___ Division
|
/// ___ Division
|
||||||
_d,
|
_d,
|
||||||
|
|
||||||
|
/// ___ Without Affecting Flags
|
||||||
|
_x,
|
||||||
/// ___ Left
|
/// ___ Left
|
||||||
_l,
|
_l,
|
||||||
/// ___ Left Double
|
/// ___ Left Double
|
||||||
@ -483,6 +485,7 @@ pub const Inst = struct {
|
|||||||
/// ASCII adjust al after subtraction
|
/// ASCII adjust al after subtraction
|
||||||
aa,
|
aa,
|
||||||
/// Add with carry
|
/// Add with carry
|
||||||
|
/// Unsigned integer addition of two operands with carry flag
|
||||||
adc,
|
adc,
|
||||||
/// Add
|
/// Add
|
||||||
/// Add packed integers
|
/// Add packed integers
|
||||||
@ -1162,10 +1165,8 @@ pub const Inst = struct {
|
|||||||
fmadd231,
|
fmadd231,
|
||||||
|
|
||||||
// ADX
|
// ADX
|
||||||
/// Unsigned integer addition of two operands with carry flag
|
|
||||||
adcx,
|
|
||||||
/// Unsigned integer addition of two operands with overflow flag
|
/// Unsigned integer addition of two operands with overflow flag
|
||||||
adox,
|
ado,
|
||||||
|
|
||||||
// AESKLE
|
// AESKLE
|
||||||
/// Encode 128-bit key with key locker
|
/// Encode 128-bit key with key locker
|
||||||
|
|||||||
@ -405,9 +405,9 @@ pub const table = [_]Entry{
|
|||||||
.{ .jb, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none },
|
.{ .jb, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none },
|
||||||
.{ .jbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none, .none },
|
.{ .jbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none, .none },
|
||||||
.{ .jc, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none },
|
.{ .jc, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none },
|
||||||
.{ .jcxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .short, .@"32bit" },
|
.{ .jcxz, .d, &.{ .rel8 }, &.{ 0xe3 }, 0, .short, .@"32bit" },
|
||||||
.{ .jecxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .none, .@"32bit" },
|
.{ .jecxz, .d, &.{ .rel8 }, &.{ 0xe3 }, 0, .none, .@"32bit" },
|
||||||
.{ .jrcxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .none, .@"64bit" },
|
.{ .jrcxz, .d, &.{ .rel8 }, &.{ 0xe3 }, 0, .none, .@"64bit" },
|
||||||
.{ .je, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none, .none },
|
.{ .je, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none, .none },
|
||||||
.{ .jg, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none, .none },
|
.{ .jg, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none, .none },
|
||||||
.{ .jge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none, .none },
|
.{ .jge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none, .none },
|
||||||
@ -477,10 +477,6 @@ pub const table = [_]Entry{
|
|||||||
|
|
||||||
.{ .ltr, .m, &.{ .rm16 }, &.{ 0x0f, 0x00 }, 3, .none, .none },
|
.{ .ltr, .m, &.{ .rm16 }, &.{ 0x0f, 0x00 }, 3, .none, .none },
|
||||||
|
|
||||||
.{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .short, .lzcnt },
|
|
||||||
.{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none, .lzcnt },
|
|
||||||
.{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long, .lzcnt },
|
|
||||||
|
|
||||||
.{ .mfence, .z, &.{}, &.{ 0x0f, 0xae, 0xf0 }, 0, .none, .none },
|
.{ .mfence, .z, &.{}, &.{ 0x0f, 0xae, 0xf0 }, 0, .none, .none },
|
||||||
|
|
||||||
.{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .none, .none },
|
.{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .none, .none },
|
||||||
@ -630,10 +626,6 @@ pub const table = [_]Entry{
|
|||||||
.{ .pop, .m, &.{ .rm16 }, &.{ 0x8f }, 0, .short, .none },
|
.{ .pop, .m, &.{ .rm16 }, &.{ 0x8f }, 0, .short, .none },
|
||||||
.{ .pop, .m, &.{ .rm64 }, &.{ 0x8f }, 0, .none, .none },
|
.{ .pop, .m, &.{ .rm64 }, &.{ 0x8f }, 0, .none, .none },
|
||||||
|
|
||||||
.{ .popcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .short, .popcnt },
|
|
||||||
.{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none, .popcnt },
|
|
||||||
.{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long, .popcnt },
|
|
||||||
|
|
||||||
.{ .popf, .z, &.{}, &.{ 0x9d }, 0, .short, .none },
|
.{ .popf, .z, &.{}, &.{ 0x9d }, 0, .short, .none },
|
||||||
.{ .popfd, .z, &.{}, &.{ 0x9d }, 0, .none, .@"32bit" },
|
.{ .popfd, .z, &.{}, &.{ 0x9d }, 0, .none, .@"32bit" },
|
||||||
.{ .popfq, .z, &.{}, &.{ 0x9d }, 0, .none, .@"64bit" },
|
.{ .popfq, .z, &.{}, &.{ 0x9d }, 0, .none, .@"64bit" },
|
||||||
@ -1738,6 +1730,15 @@ pub const table = [_]Entry{
|
|||||||
|
|
||||||
.{ .pcmpgtq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x37 }, 0, .none, .sse4_2 },
|
.{ .pcmpgtq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x37 }, 0, .none, .sse4_2 },
|
||||||
|
|
||||||
|
// ABM
|
||||||
|
.{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .short, .lzcnt },
|
||||||
|
.{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none, .lzcnt },
|
||||||
|
.{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long, .lzcnt },
|
||||||
|
|
||||||
|
.{ .popcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .short, .popcnt },
|
||||||
|
.{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none, .popcnt },
|
||||||
|
.{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long, .popcnt },
|
||||||
|
|
||||||
// PCLMUL
|
// PCLMUL
|
||||||
.{ .pclmulqdq, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x44 }, 0, .none, .pclmul },
|
.{ .pclmulqdq, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x44 }, 0, .none, .pclmul },
|
||||||
|
|
||||||
@ -1771,38 +1772,6 @@ pub const table = [_]Entry{
|
|||||||
.{ .sha256msg2, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x38, 0xcd }, 0, .none, .sha },
|
.{ .sha256msg2, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x38, 0xcd }, 0, .none, .sha },
|
||||||
|
|
||||||
// AVX
|
// AVX
|
||||||
.{ .andn, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf2 }, 0, .vex_lz_w0, .bmi },
|
|
||||||
.{ .andn, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf2 }, 0, .vex_lz_w1, .bmi },
|
|
||||||
|
|
||||||
.{ .bextr, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi },
|
|
||||||
.{ .bextr, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi },
|
|
||||||
|
|
||||||
.{ .blsi, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 3, .vex_lz_w0, .bmi },
|
|
||||||
.{ .blsi, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 3, .vex_lz_w1, .bmi },
|
|
||||||
|
|
||||||
.{ .blsmsk, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 2, .vex_lz_w0, .bmi },
|
|
||||||
.{ .blsmsk, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 2, .vex_lz_w1, .bmi },
|
|
||||||
|
|
||||||
.{ .blsr, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 1, .vex_lz_w0, .bmi },
|
|
||||||
.{ .blsr, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 1, .vex_lz_w1, .bmi },
|
|
||||||
|
|
||||||
.{ .bzhi, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w0, .bmi2 },
|
|
||||||
.{ .bzhi, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w1, .bmi2 },
|
|
||||||
|
|
||||||
.{ .rorx, .rmi, &.{ .r32, .rm32, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w0, .bmi2 },
|
|
||||||
.{ .rorx, .rmi, &.{ .r64, .rm64, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w1, .bmi2 },
|
|
||||||
|
|
||||||
.{ .sarx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0xf3, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 },
|
|
||||||
.{ .shlx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 },
|
|
||||||
.{ .shrx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 },
|
|
||||||
.{ .sarx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf3, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
|
|
||||||
.{ .shlx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
|
|
||||||
.{ .shrx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
|
|
||||||
|
|
||||||
.{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .short, .bmi },
|
|
||||||
.{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .bmi },
|
|
||||||
.{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long, .bmi },
|
|
||||||
|
|
||||||
.{ .vaddpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_128_wig, .avx },
|
.{ .vaddpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_128_wig, .avx },
|
||||||
.{ .vaddpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_256_wig, .avx },
|
.{ .vaddpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_256_wig, .avx },
|
||||||
|
|
||||||
@ -2307,6 +2276,49 @@ pub const table = [_]Entry{
|
|||||||
.{ .vxorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .vex_128_wig, .avx },
|
.{ .vxorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .vex_128_wig, .avx },
|
||||||
.{ .vxorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x57 }, 0, .vex_256_wig, .avx },
|
.{ .vxorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x57 }, 0, .vex_256_wig, .avx },
|
||||||
|
|
||||||
|
// BMI
|
||||||
|
.{ .andn, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf2 }, 0, .vex_lz_w0, .bmi },
|
||||||
|
.{ .andn, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf2 }, 0, .vex_lz_w1, .bmi },
|
||||||
|
|
||||||
|
.{ .bextr, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi },
|
||||||
|
.{ .bextr, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi },
|
||||||
|
|
||||||
|
.{ .blsi, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 3, .vex_lz_w0, .bmi },
|
||||||
|
.{ .blsi, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 3, .vex_lz_w1, .bmi },
|
||||||
|
|
||||||
|
.{ .blsmsk, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 2, .vex_lz_w0, .bmi },
|
||||||
|
.{ .blsmsk, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 2, .vex_lz_w1, .bmi },
|
||||||
|
|
||||||
|
.{ .blsr, .vm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x38, 0xf3 }, 1, .vex_lz_w0, .bmi },
|
||||||
|
.{ .blsr, .vm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x38, 0xf3 }, 1, .vex_lz_w1, .bmi },
|
||||||
|
|
||||||
|
.{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .short, .bmi },
|
||||||
|
.{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .bmi },
|
||||||
|
.{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long, .bmi },
|
||||||
|
|
||||||
|
// BMI2
|
||||||
|
.{ .bzhi, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w0, .bmi2 },
|
||||||
|
.{ .bzhi, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w1, .bmi2 },
|
||||||
|
|
||||||
|
.{ .mulx, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0xf2, 0x0f, 0x38, 0xf6 }, 0, .vex_lz_w0, .bmi2 },
|
||||||
|
.{ .mulx, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0xf2, 0x0f, 0x38, 0xf6 }, 0, .vex_lz_w1, .bmi2 },
|
||||||
|
|
||||||
|
.{ .pdep, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0xf2, 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w0, .bmi2 },
|
||||||
|
.{ .pdep, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0xf2, 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w1, .bmi2 },
|
||||||
|
|
||||||
|
.{ .pext, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0xf3, 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w0, .bmi2 },
|
||||||
|
.{ .pext, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0xf3, 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w1, .bmi2 },
|
||||||
|
|
||||||
|
.{ .rorx, .rmi, &.{ .r32, .rm32, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w0, .bmi2 },
|
||||||
|
.{ .rorx, .rmi, &.{ .r64, .rm64, .imm8 }, &.{ 0xf2, 0x0f, 0x3a }, 0, .vex_lz_w1, .bmi2 },
|
||||||
|
|
||||||
|
.{ .sarx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0xf3, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 },
|
||||||
|
.{ .shlx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 },
|
||||||
|
.{ .shrx, .rmv, &.{ .r32, .rm32, .r32 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w0, .bmi2 },
|
||||||
|
.{ .sarx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf3, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
|
||||||
|
.{ .shlx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0x66, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
|
||||||
|
.{ .shrx, .rmv, &.{ .r64, .rm64, .r64 }, &.{ 0xf2, 0x0f, 0x38, 0xf7 }, 0, .vex_lz_w1, .bmi2 },
|
||||||
|
|
||||||
// F16C
|
// F16C
|
||||||
.{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c },
|
.{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c },
|
||||||
.{ .vcvtph2ps, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_256_w0, .f16c },
|
.{ .vcvtph2ps, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_256_w0, .f16c },
|
||||||
|
|||||||
@ -93,6 +93,11 @@ pub fn build(b: *std.Build) void {
|
|||||||
.cpu_arch = .x86_64,
|
.cpu_arch = .x86_64,
|
||||||
.cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 },
|
.cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 },
|
||||||
},
|
},
|
||||||
|
.{
|
||||||
|
.cpu_arch = .x86_64,
|
||||||
|
.cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v3 },
|
||||||
|
.cpu_features_add = std.Target.x86.featureSet(&.{.adx}),
|
||||||
|
},
|
||||||
.{
|
.{
|
||||||
.cpu_arch = .x86_64,
|
.cpu_arch = .x86_64,
|
||||||
.cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v4 },
|
.cpu_model = .{ .explicit = &std.Target.x86.cpu.x86_64_v4 },
|
||||||
|
|||||||
@ -44,6 +44,17 @@ fn AddOneBit(comptime Type: type) type {
|
|||||||
.vector => |vector| @Vector(vector.len, ResultScalar),
|
.vector => |vector| @Vector(vector.len, ResultScalar),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
fn DoubleBits(comptime Type: type) type {
|
||||||
|
const ResultScalar = switch (@typeInfo(Scalar(Type))) {
|
||||||
|
.int => |int| @Type(.{ .int = .{ .signedness = int.signedness, .bits = int.bits * 2 } }),
|
||||||
|
.float => Scalar(Type),
|
||||||
|
else => @compileError(@typeName(Type)),
|
||||||
|
};
|
||||||
|
return switch (@typeInfo(Type)) {
|
||||||
|
else => ResultScalar,
|
||||||
|
.vector => |vector| @Vector(vector.len, ResultScalar),
|
||||||
|
};
|
||||||
|
}
|
||||||
// inline to avoid a runtime `@splat`
|
// inline to avoid a runtime `@splat`
|
||||||
inline fn splat(comptime Type: type, scalar: Scalar(Type)) Type {
|
inline fn splat(comptime Type: type, scalar: Scalar(Type)) Type {
|
||||||
return switch (@typeInfo(Type)) {
|
return switch (@typeInfo(Type)) {
|
||||||
@ -16216,6 +16227,8 @@ fn binary(comptime op: anytype, comptime opts: struct { compare: Compare = .rela
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
fn testInts() !void {
|
fn testInts() !void {
|
||||||
|
try testArgs(i4, 0x3, 0x2);
|
||||||
|
try testArgs(u4, 0xe, 0x6);
|
||||||
try testArgs(i8, 0x48, 0x6c);
|
try testArgs(i8, 0x48, 0x6c);
|
||||||
try testArgs(u8, 0xbb, 0x43);
|
try testArgs(u8, 0xbb, 0x43);
|
||||||
try testArgs(i16, -0x0fdf, 0x302e);
|
try testArgs(i16, -0x0fdf, 0x302e);
|
||||||
@ -18993,6 +19006,15 @@ test subUnsafe {
|
|||||||
try test_sub_unsafe.testFloatVectors();
|
try test_sub_unsafe.testFloatVectors();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline fn mulUnsafe(comptime Type: type, lhs: Type, rhs: Type) DoubleBits(Type) {
|
||||||
|
@setRuntimeSafety(false);
|
||||||
|
return @as(DoubleBits(Type), lhs) * rhs;
|
||||||
|
}
|
||||||
|
test mulUnsafe {
|
||||||
|
const test_mul_unsafe = binary(mulUnsafe, .{});
|
||||||
|
try test_mul_unsafe.testInts();
|
||||||
|
}
|
||||||
|
|
||||||
inline fn multiply(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs * rhs) {
|
inline fn multiply(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs * rhs) {
|
||||||
if (@inComptime() and @typeInfo(Type) == .vector) {
|
if (@inComptime() and @typeInfo(Type) == .vector) {
|
||||||
// workaround https://github.com/ziglang/zig/issues/22743
|
// workaround https://github.com/ziglang/zig/issues/22743
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user