stage2 x86_64: simplify inst encoder to a set of dumb helper fns

This commit is contained in:
gracefu 2021-04-11 16:09:47 +08:00
parent 613f39eb62
commit 0409f9e024
No known key found for this signature in database
GPG Key ID: 2B0D39CC4E035325
2 changed files with 681 additions and 617 deletions

View File

@ -20,6 +20,8 @@ const build_options = @import("build_options");
const LazySrcLoc = Module.LazySrcLoc;
const RegisterManager = @import("register_manager.zig").RegisterManager;
const X8664Encoder = @import("codegen/x86_64.zig").Encoder;
/// The codegen-related data that is stored in `ir.Inst.Block` instructions.
pub const BlockData = struct {
relocs: std.ArrayListUnmanaged(Reloc) = undefined,
@ -1617,9 +1619,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
///
/// opcode | operand shape
/// --------+----------------------
/// 80 /opx | r/m8, imm8
/// 81 /opx | r/m16/32/64, imm16/32
/// 83 /opx | r/m16/32/64, imm8
/// 80 /opx | *r/m8*, imm8
/// 81 /opx | *r/m16/32/64*, imm16/32
/// 83 /opx | *r/m16/32/64*, imm8
///
/// "mr"-style instructions use the low bits of opcode to indicate shape of instruction:
///
@ -1634,12 +1636,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
///
/// opcode | operand shape
/// -------+-------------------------
/// mr + 0 | r/m8, r8
/// mr + 1 | r/m16/32/64, r16/32/64
/// mr + 2 | r8, r/m8
/// mr + 3 | r16/32/64, r/m16/32/64
/// mr + 4 | AL, imm8
/// mr + 5 | rAX, imm16/32
/// mr + 0 | *r/m8*, r8
/// mr + 1 | *r/m16/32/64*, r16/32/64
/// mr + 2 | *r8*, r/m8
/// mr + 3 | *r16/32/64*, r/m16/32/64
/// mr + 4 | *AL*, imm8
/// mr + 5 | *rAX*, imm16/32
///
/// TODO: rotates and shifts share the same structure, so we can potentially implement them
/// at a later date with very similar code.
@ -1656,12 +1658,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
///
/// opcode | operand shape
/// --------+------------------
/// c0 /opx | r/m8, imm8
/// c1 /opx | r/m16/32/64, imm8
/// d0 /opx | r/m8, 1
/// d1 /opx | r/m16/32/64, 1
/// d2 /opx | r/m8, CL (for context, CL is register 1)
/// d3 /opx | r/m16/32/64, CL (for context, CL is register 1)
/// c0 /opx | *r/m8*, imm8
/// c1 /opx | *r/m16/32/64*, imm8
/// d0 /opx | *r/m8*, 1
/// d1 /opx | *r/m16/32/64*, 1
/// d2 /opx | *r/m8*, CL (for context, CL is register 1)
/// d3 /opx | *r/m16/32/64*, CL (for context, CL is register 1)
fn genX8664BinMathCode(
self: *Self,
src: LazySrcLoc,
@ -1687,77 +1689,84 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
.ptr_stack_offset => unreachable,
.ptr_embedded_in_code => unreachable,
.register => |src_reg| {
// register, register use mr + 1 addressing mode: r/m16/32/64, r16/32/64
try self.encodeX8664Instruction(src, Instruction{
.operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
.primary_opcode_1b = mr + 1,
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
// https://github.com/ziglang/zig/issues/6515
.modrm = @as(
?Instruction.ModrmEffectiveAddress,
Instruction.ModrmEffectiveAddress{ .reg = dst_reg },
),
.reg = src_reg,
// for register, register use mr + 1
// addressing mode: *r/m16/32/64*, r16/32/64
const operand_size = dst_ty.abiSize(self.target.*);
const encoder = try X8664Encoder.init(self.code, 3);
encoder.rex(.{
.w = operand_size == 64,
.r = src_reg.isExtended(),
.b = dst_reg.isExtended(),
});
encoder.opcode_1byte(mr + 1);
encoder.modRm_direct(
src_reg.low_id(),
dst_reg.low_id(),
);
},
.immediate => |imm| {
// register, immediate use opx = 81 or 83 addressing modes:
// opx = 81: r/m16/32/64, imm16/32
// opx = 83: r/m16/32/64, imm8
const imm32 = @intCast(u31, imm); // This case must be handled before calling genX8664BinMathCode.
if (imm32 <= math.maxInt(u7)) {
try self.encodeX8664Instruction(src, Instruction{
.operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
.primary_opcode_1b = 0x83,
.opcode_extension = opx,
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
// https://github.com/ziglang/zig/issues/6515
.modrm = @as(
?Instruction.ModrmEffectiveAddress,
Instruction.ModrmEffectiveAddress{ .reg = dst_reg },
),
.immediate_bytes = 1,
.immediate = imm32,
const imm32 = @intCast(i32, imm); // This case must be handled before calling genX8664BinMathCode.
if (imm32 <= math.maxInt(i8)) {
const operand_size = dst_ty.abiSize(self.target.*);
const encoder = try X8664Encoder.init(self.code, 4);
encoder.rex(.{
.w = operand_size == 64,
.b = dst_reg.isExtended(),
});
encoder.opcode_1byte(0x83);
encoder.modRm_direct(
opx,
dst_reg.low_id(),
);
encoder.imm8(@intCast(i8, imm32));
} else {
try self.encodeX8664Instruction(src, Instruction{
.operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
.primary_opcode_1b = 0x81,
.opcode_extension = opx,
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
// https://github.com/ziglang/zig/issues/6515
.modrm = @as(
?Instruction.ModrmEffectiveAddress,
Instruction.ModrmEffectiveAddress{ .reg = dst_reg },
),
.immediate_bytes = 4,
.immediate = imm32,
const operand_size = dst_ty.abiSize(self.target.*);
const encoder = try X8664Encoder.init(self.code, 7);
encoder.rex(.{
.w = operand_size == 64,
.b = dst_reg.isExtended(),
});
encoder.opcode_1byte(0x81);
encoder.modRm_direct(
opx,
dst_reg.low_id(),
);
encoder.imm32(@intCast(i32, imm32));
}
},
.embedded_in_code, .memory => {
return self.fail(src, "TODO implement x86 ADD/SUB/CMP source memory", .{});
},
.stack_offset => |off| {
// register, indirect use mr + 3
// addressing mode: *r16/32/64*, r/m16/32/64
const abi_size = dst_ty.abiSize(self.target.*);
const adj_off = off + abi_size;
if (off > math.maxInt(i32)) {
return self.fail(src, "stack offset too large", .{});
}
try self.encodeX8664Instruction(src, Instruction{
.operand_size_64 = abi_size == 64,
.primary_opcode_1b = mr + 0x3,
.reg = dst_reg,
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
// https://github.com/ziglang/zig/issues/6515
.modrm = @as(
?Instruction.ModrmEffectiveAddress,
Instruction.ModrmEffectiveAddress{ .mem_disp = .{
.reg = Register.ebp,
.disp = -@intCast(i32, adj_off),
} },
),
const encoder = try X8664Encoder.init(self.code, 7);
encoder.rex(.{
.w = abi_size == 64,
.r = dst_reg.isExtended(),
});
encoder.opcode_1byte(mr + 3);
if (adj_off <= std.math.maxInt(i8)) {
encoder.modRm_indirectDisp8(
dst_reg.low_id(),
Register.ebp.low_id(),
);
encoder.disp8(-@intCast(i8, adj_off));
} else {
encoder.modRm_indirectDisp32(
dst_reg.low_id(),
Register.ebp.low_id(),
);
encoder.disp32(-@intCast(i32, adj_off));
}
},
.compare_flags_unsigned => {
return self.fail(src, "TODO implement x86 ADD/SUB/CMP source compare flag (unsigned)", .{});
@ -1825,17 +1834,18 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
//
// Use the following imul opcode
// 0F AF /r: IMUL r32/64, r/m32/64
try self.encodeX8664Instruction(src, Instruction{
.operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
.primary_opcode_2b = 0xaf,
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
// https://github.com/ziglang/zig/issues/6515
.modrm = @as(
?Instruction.ModrmEffectiveAddress,
Instruction.ModrmEffectiveAddress{ .reg = src_reg },
),
.reg = dst_reg,
const abi_size = dst_ty.abiSize(self.target.*);
const encoder = try X8664Encoder.init(self.code, 4);
encoder.rex(.{
.w = abi_size == 64,
.r = dst_reg.isExtended(),
.b = src_reg.isExtended(),
});
encoder.opcode_2byte(0x0f, 0xaf);
encoder.modRm_direct(
dst_reg.low_id(),
src_reg.low_id(),
);
},
.immediate => |imm| {
// register, immediate:
@ -1853,33 +1863,33 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// 2) perform register,register mul
// 0F AF /r: IMUL r32/64, r/m32/64
if (math.minInt(i8) <= imm and imm <= math.maxInt(i8)) {
try self.encodeX8664Instruction(src, Instruction{
.operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
.primary_opcode_1b = 0x6B,
.reg = dst_reg,
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
// https://github.com/ziglang/zig/issues/6515
.modrm = @as(
?Instruction.ModrmEffectiveAddress,
Instruction.ModrmEffectiveAddress{ .reg = dst_reg },
),
.immediate_bytes = 1,
.immediate = imm,
const abi_size = dst_ty.abiSize(self.target.*);
const encoder = try X8664Encoder.init(self.code, 4);
encoder.rex(.{
.w = abi_size == 64,
.r = dst_reg.isExtended(),
.b = dst_reg.isExtended(),
});
encoder.opcode_1byte(0x6B);
encoder.modRm_direct(
dst_reg.low_id(),
dst_reg.low_id(),
);
encoder.imm8(@intCast(i8, imm));
} else if (math.minInt(i32) <= imm and imm <= math.maxInt(i32)) {
try self.encodeX8664Instruction(src, Instruction{
.operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
.primary_opcode_1b = 0x69,
.reg = dst_reg,
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
// https://github.com/ziglang/zig/issues/6515
.modrm = @as(
?Instruction.ModrmEffectiveAddress,
Instruction.ModrmEffectiveAddress{ .reg = dst_reg },
),
.immediate_bytes = 4,
.immediate = imm,
const abi_size = dst_ty.abiSize(self.target.*);
const encoder = try X8664Encoder.init(self.code, 7);
encoder.rex(.{
.w = abi_size == 64,
.r = dst_reg.isExtended(),
.b = dst_reg.isExtended(),
});
encoder.opcode_1byte(0x69);
encoder.modRm_direct(
dst_reg.low_id(),
dst_reg.low_id(),
);
encoder.imm32(@intCast(i32, imm));
} else {
const src_reg = try self.copyToTmpRegister(src, dst_ty, src_mcv);
return self.genX8664Imul(src, dst_ty, dst_mcv, MCValue{ .register = src_reg });
@ -1910,17 +1920,18 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// register, register
// Use the following imul opcode
// 0F AF /r: IMUL r32/64, r/m32/64
try self.encodeX8664Instruction(src, Instruction{
.operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
.primary_opcode_2b = 0xaf,
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
// https://github.com/ziglang/zig/issues/6515
.modrm = @as(
?Instruction.ModrmEffectiveAddress,
Instruction.ModrmEffectiveAddress{ .reg = src_reg },
),
.reg = dst_reg,
const abi_size = dst_ty.abiSize(self.target.*);
const encoder = try X8664Encoder.init(self.code, 4);
encoder.rex(.{
.w = abi_size == 64,
.r = dst_reg.isExtended(),
.b = src_reg.isExtended(),
});
encoder.opcode_2byte(0x0f, 0xaf);
encoder.modRm_direct(
dst_reg.low_id(),
src_reg.low_id(),
);
// copy dst_reg back out
return self.genSetStack(src, dst_ty, off, MCValue{ .register = dst_reg });
},
@ -1950,20 +1961,29 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
if (off > math.maxInt(i32)) {
return self.fail(src, "stack offset too large", .{});
}
try self.encodeX8664Instruction(src, Instruction{
.operand_size_64 = abi_size == 64,
.primary_opcode_1b = opcode,
.reg = reg,
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
// https://github.com/ziglang/zig/issues/6515
.modrm = @as(
?Instruction.ModrmEffectiveAddress,
Instruction.ModrmEffectiveAddress{ .mem_disp = .{
.reg = Register.ebp,
.disp = -@intCast(i32, adj_off),
} },
),
const i_adj_off = -@intCast(i32, adj_off);
const encoder = try X8664Encoder.init(self.code, 7);
encoder.rex(.{
.w = abi_size == 64,
.r = reg.isExtended(),
});
encoder.opcode_1byte(opcode);
if (i_adj_off < std.math.maxInt(i8)) {
// example: 48 89 55 7f mov QWORD PTR [rbp+0x7f],rdx
encoder.modRm_indirectDisp8(
reg.low_id(),
Register.ebp.low_id(),
);
encoder.disp8(@intCast(i8, i_adj_off));
} else {
// example: 48 89 95 80 00 00 00 mov QWORD PTR [rbp+0x80],rdx
encoder.modRm_indirectDisp32(
reg.low_id(),
Register.ebp.low_id(),
);
encoder.disp32(i_adj_off);
}
}
fn genArgDbgInfo(self: *Self, inst: *ir.Inst.Arg, mcv: MCValue) !void {
@ -2630,25 +2650,20 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
},
.register => |reg| blk: {
// test reg, 1
try self.encodeX8664Instruction(inst.base.src, Instruction{
// TODO detect al, ax, eax
const encoder = try X8664Encoder.init(self.code, 4);
encoder.rex(.{
// TODO audit this codegen: we force w = true here to make
// the value affect the big register
.operand_size_64 = true,
.primary_opcode_1b = 0xf6, // f6/0 is TEST r/m8, imm8
.opcode_extension = 0,
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
// https://github.com/ziglang/zig/issues/6515
// TODO detect al, ax, eax, there's another opcode 0xa8 for that
.modrm = @as(
?Instruction.ModrmEffectiveAddress,
Instruction.ModrmEffectiveAddress{ .reg = reg },
),
.immediate_bytes = 1,
.immediate = 1,
.w = true,
.b = reg.isExtended(),
});
encoder.opcode_1byte(0xf6);
encoder.modRm_direct(
0,
reg.low_id(),
);
encoder.disp8(1);
break :blk 0x84;
},
else => return self.fail(inst.base.src, "TODO implement condbr {s} when condition is {s}", .{ self.target.cpu.arch, @tagName(cond) }),
@ -3170,39 +3185,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
}
}
/// Encodes a REX prefix as specified, and appends it to the instruction
/// stream. This only modifies the instruction stream if at least one bit
/// is set true, which has a few implications:
///
/// * The length of the instruction buffer will be modified *if* the
/// resulting REX is meaningful, but will remain the same if it is not.
/// * Deliberately inserting a "meaningless REX" requires explicit usage of
/// 0x40, and cannot be done via this function.
/// W => 64 bit mode
/// R => extension to the MODRM.reg field
/// X => extension to the SIB.index field
/// B => extension to the MODRM.rm field or the SIB.base field
fn rex(self: *Self, arg: struct { b: bool = false, w: bool = false, x: bool = false, r: bool = false }) void {
comptime assert(arch == .x86_64);
// From section 2.2.1.2 of the manual, REX is encoded as b0100WRXB.
var value: u8 = 0x40;
if (arg.b) {
value |= 0x1;
}
if (arg.x) {
value |= 0x2;
}
if (arg.r) {
value |= 0x4;
}
if (arg.w) {
value |= 0x8;
}
if (value != 0x40) {
self.code.appendAssumeCapacity(value);
}
}
/// Sets the value without any modifications to register allocation metadata or stack allocation metadata.
fn setRegOrMem(self: *Self, src: LazySrcLoc, ty: Type, loc: MCValue, val: MCValue) !void {
switch (loc) {
@ -3750,27 +3732,25 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
}
},
.compare_flags_unsigned => |op| {
try self.encodeX8664Instruction(src, Instruction{
// TODO audit this codegen: we force w = true here to make
// the value affect the big register
.operand_size_64 = true,
.primary_opcode_2b = switch (op) {
.gte => 0x93,
.gt => 0x97,
.neq => 0x95,
.lt => 0x92,
.lte => 0x96,
.eq => 0x94,
},
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
// https://github.com/ziglang/zig/issues/6515
.modrm = @as(
?Instruction.ModrmEffectiveAddress,
Instruction.ModrmEffectiveAddress{ .reg = reg },
),
const encoder = try X8664Encoder.init(self.code, 7);
// TODO audit this codegen: we force w = true here to make
// the value affect the big register
encoder.rex(.{
.w = true,
.b = reg.isExtended(),
});
encoder.opcode_2byte(0x0f, switch (op) {
.gte => 0x93,
.gt => 0x97,
.neq => 0x95,
.lt => 0x92,
.lte => 0x96,
.eq => 0x94,
});
encoder.modRm_direct(
0,
reg.low_id(),
);
},
.compare_flags_signed => |op| {
return self.fail(src, "TODO set register with compare flags value (signed)", .{});
@ -3780,34 +3760,43 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// register is the fastest way to zero a register.
if (x == 0) {
// The encoding for `xor r32, r32` is `0x31 /r`.
const encoder = try X8664Encoder.init(self.code, 3);
// If we're accessing e.g. r8d, we need to use a REX prefix before the actual operation. Since
// this is a 32-bit operation, the W flag is set to zero. X is also zero, as we're not using a SIB.
// Both R and B are set, as we're extending, in effect, the register bits *and* the operand.
encoder.rex(.{
.r = reg.isExtended(),
.b = reg.isExtended(),
});
encoder.opcode_1byte(0x31);
// Section 3.1.1.1 of the Intel x64 Manual states that "/r indicates that the
// ModR/M byte of the instruction contains a register operand and an r/m operand."
try self.encodeX8664Instruction(src, Instruction{
.primary_opcode_1b = 0x31,
encoder.modRm_direct(
reg.low_id(),
reg.low_id(),
);
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
// https://github.com/ziglang/zig/issues/6515
.reg = @as(?Register, reg),
.modrm = @as(
?Instruction.ModrmEffectiveAddress,
Instruction.ModrmEffectiveAddress{ .reg = reg },
),
});
return;
}
if (x <= math.maxInt(u32)) {
if (x <= math.maxInt(i32)) {
// Next best case: if we set the lower four bytes, the upper four will be zeroed.
//
// The encoding for `mov IMM32 -> REG` is (0xB8 + R) IMM.
try self.encodeX8664Instruction(src, Instruction{
// B8 + R
.primary_opcode_1b = 0xB8,
.opcode_reg = @as(?Register, reg),
// IMM32
.immediate_bytes = 4,
.immediate = x,
const encoder = try X8664Encoder.init(self.code, 6);
// Just as with XORing, we need a REX prefix. This time though, we only
// need the B bit set, as we're extending the opcode's register field,
// and there is no Mod R/M byte.
encoder.rex(.{
.b = reg.isExtended(),
});
encoder.opcode_withReg(0xB8, reg.low_id());
// no ModR/M byte
// IMM
encoder.imm32(@intCast(i32, x));
return;
}
// Worst case: we need to load the 64-bit register with the IMM. GNU's assemblers calls
@ -3817,37 +3806,40 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// This encoding is, in fact, the *same* as the one used for 32-bit loads. The only
// difference is that we set REX.W before the instruction, which extends the load to
// 64-bit and uses the full bit-width of the register.
try self.encodeX8664Instruction(src, Instruction{
.operand_size_64 = true,
// B8 + R
.primary_opcode_1b = 0xB8,
.opcode_reg = @as(?Register, reg),
// IMM64
.immediate_bytes = 8,
.immediate = x,
});
{
const encoder = try X8664Encoder.init(self.code, 10);
encoder.rex(.{
.w = true,
.b = reg.isExtended(),
});
encoder.opcode_withReg(0xB8, reg.low_id());
encoder.imm64(x);
}
},
.embedded_in_code => |code_offset| {
// We need the offset from RIP in a signed i32 twos complement.
// The instruction is 7 bytes long and RIP points to the next instruction.
// 64-bit LEA is encoded as REX.W 8D /r.
const rip = self.code.items.len;
const rip = self.code.items.len + 7;
const big_offset = @intCast(i64, code_offset) - @intCast(i64, rip);
const offset = @intCast(i32, big_offset);
try self.encodeX8664Instruction(src, Instruction{
.operand_size_64 = true,
const encoder = try X8664Encoder.init(self.code, 7);
// LEA
.primary_opcode_1b = 0x8D,
.reg = reg,
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
// https://github.com/ziglang/zig/issues/6515
.modrm = @as(
?Instruction.ModrmEffectiveAddress,
Instruction.ModrmEffectiveAddress{ .disp32 = @bitCast(i32, offset) },
),
// byte 1, always exists because w = true
encoder.rex(.{
.w = true,
.r = reg.isExtended(),
});
// byte 2
encoder.opcode_1byte(0x8D);
// byte 3
encoder.modRm_RIPDisp32(reg.low_id());
// byte 4-7
encoder.disp32(offset);
// Double check that we haven't done any math errors
assert(rip == self.code.items.len);
},
.register => |src_reg| {
// If the registers are the same, nothing to do.
@ -3855,20 +3847,15 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
return;
// This is a variant of 8B /r.
try self.encodeX8664Instruction(src, Instruction{
.operand_size_64 = ty.abiSize(self.target.*) == 64,
.primary_opcode_1b = 0x8B,
.reg = reg,
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
// https://github.com/ziglang/zig/issues/6515
.modrm = @as(
?Instruction.ModrmEffectiveAddress,
Instruction.ModrmEffectiveAddress{ .reg = src_reg },
),
const abi_size = ty.abiSize(self.target.*);
const encoder = try X8664Encoder.init(self.code, 3);
encoder.rex(.{
.w = abi_size == 64,
.r = reg.isExtended(),
.b = src_reg.isExtended(),
});
encoder.opcode_1byte(0x8B);
encoder.modRm_direct(reg.low_id(), src_reg.low_id());
},
.memory => |x| {
if (self.bin_file.options.pie) {
@ -3886,32 +3873,28 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
return self.fail(src, "TODO implement genSetReg for PIE GOT indirection on this platform", .{});
}
const abi_size = ty.abiSize(self.target.*);
const encoder = try X8664Encoder.init(self.code, 7);
// LEA reg, [<offset>]
// manually do this instruction to make sure the offset into the disp32 field won't change.
try self.code.ensureCapacity(self.code.items.len + 7);
self.rex(.{ .w = ty.abiSize(self.target.*) == 64, .r = reg.isExtended() });
self.code.appendSliceAssumeCapacity(&[_]u8{
0x8D,
0x05 | (@as(u8, reg.id() & 0b111) << 3),
// TODO: Check if this breaks on macho if abi_size != 64 and reg is not extended
// this causes rex byte to be omitted, which might mean the offset (+3) above is wrong.
encoder.rex(.{
.w = abi_size == 64,
.r = reg.isExtended(),
});
mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), 0);
encoder.opcode_1byte(0x8D);
encoder.modRm_RIPDisp32(reg.low_id());
encoder.disp32(0);
// MOV reg, [reg]
try self.encodeX8664Instruction(src, Instruction{
.operand_size_64 = ty.abiSize(self.target.*) == 64,
.primary_opcode_1b = 0x8B,
.reg = reg,
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
// https://github.com/ziglang/zig/issues/6515
.modrm = @as(
?Instruction.ModrmEffectiveAddress,
Instruction.ModrmEffectiveAddress{ .mem = reg },
),
encoder.rex(.{
.w = abi_size == 64,
.r = reg.isExtended(),
.b = reg.isExtended(),
});
} else if (x <= math.maxInt(u32)) {
encoder.opcode_1byte(0x8B);
encoder.modRm_indirectDisp0(reg.low_id(), reg.low_id());
} else if (x <= math.maxInt(i32)) {
// Moving from memory to a register is a variant of `8B /r`.
// Since we're using 64-bit moves, we require a REX.
// This variant also requires a SIB, as it would otherwise be RIP-relative.
@ -3919,14 +3902,18 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// The SIB must be 0x25, to indicate a disp32 with no scaled index.
// 0b00RRR100, where RRR is the lower three bits of the register ID.
// The instruction is thus eight bytes; REX 0x8B 0b00RRR100 0x25 followed by a four-byte disp32.
try self.code.ensureCapacity(self.code.items.len + 8);
self.rex(.{ .w = ty.abiSize(self.target.*) == 64, .r = reg.isExtended() });
self.code.appendSliceAssumeCapacity(&[_]u8{
0x8B,
0x04 | (@as(u8, reg.id() & 0b111) << 3), // R
0x25,
const abi_size = ty.abiSize(self.target.*);
const encoder = try X8664Encoder.init(self.code, 8);
encoder.rex(.{
.w = abi_size == 64,
.r = reg.isExtended(),
});
mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), @intCast(u32, x));
encoder.opcode_1byte(0x8B);
// effective address = [SIB]
encoder.modRm_SIBDisp0(reg.low_id());
// SIB = disp32
encoder.sib_disp32();
encoder.disp32(@intCast(i32, x));
} else {
// If this is RAX, we can use a direct load; otherwise, we need to load the address, then indirectly load
// the value.
@ -3935,12 +3922,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// moffs64* is a 64-bit offset "relative to segment base", which really just means the
// absolute address for all practical purposes.
try self.encodeX8664Instruction(src, Instruction{
.operand_size_64 = true,
.primary_opcode_1b = 0xa1,
.immediate_bytes = 8,
.immediate = x,
const encoder = try X8664Encoder.init(self.code, 10);
encoder.rex(.{
.w = true,
});
encoder.opcode_1byte(0xA1);
encoder.writeIntLittle(u64, x);
} else {
// This requires two instructions; a move imm as used above, followed by an indirect load using the register
// as the address and the register as the destination.
@ -3957,17 +3944,17 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// Now, the register contains the address of the value to load into it
// Currently, we're only allowing 64-bit registers, so we need the `REX.W 8B /r` variant.
// TODO: determine whether to allow other sized registers, and if so, handle them properly.
try self.encodeX8664Instruction(src, Instruction{
.operand_size_64 = ty.abiSize(self.target.*) == 64,
.primary_opcode_1b = 0x8B,
.reg = reg,
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
// https://github.com/ziglang/zig/issues/6515
.modrm = @as(
?Instruction.ModrmEffectiveAddress,
Instruction.ModrmEffectiveAddress{ .mem = reg },
),
// mov reg, [reg]
const abi_size = ty.abiSize(self.target.*);
const encoder = try X8664Encoder.init(self.code, 3);
encoder.rex(.{
.w = abi_size == 64,
.r = reg.isExtended(),
.b = reg.isExtended(),
});
encoder.opcode_1byte(0x8B);
encoder.modRm_indirectDisp0(reg.low_id(), reg.low_id());
}
}
},
@ -3978,20 +3965,21 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
return self.fail(src, "stack offset too large", .{});
}
const ioff = -@intCast(i32, off);
try self.encodeX8664Instruction(src, Instruction{
.operand_size_64 = ty.abiSize(self.target.*) == 64,
.primary_opcode_1b = 0x8B,
.reg = reg,
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
// https://github.com/ziglang/zig/issues/6515
.modrm = @as(
?Instruction.ModrmEffectiveAddress,
Instruction.ModrmEffectiveAddress{ .mem_disp = .{
.reg = Register.ebp,
.disp = ioff,
} },
),
const encoder = try X8664Encoder.init(self.code, 3);
encoder.rex(.{
.w = abi_size == 64,
.r = reg.isExtended(),
});
encoder.opcode_1byte(0x8B);
if (std.math.minInt(i8) <= ioff and ioff <= std.math.maxInt(i8)) {
// Example: 48 8b 4d 7f mov rcx,QWORD PTR [rbp+0x7f]
encoder.modRm_indirectDisp8(reg.low_id(), Register.ebp.low_id());
encoder.disp8(@intCast(i8, ioff));
} else {
// Example: 48 8b 8d 80 00 00 00 mov rcx,QWORD PTR [rbp+0x80]
encoder.modRm_indirectDisp32(reg.low_id(), Register.ebp.low_id());
encoder.disp32(ioff);
}
},
},
else => return self.fail(src, "TODO implement getSetReg for {}", .{self.target.cpu.arch}),

View File

@ -3,6 +3,7 @@ const testing = std.testing;
const mem = std.mem;
const assert = std.debug.assert;
const ArrayList = std.ArrayList;
const Allocator = std.mem.Allocator;
const Type = @import("../Type.zig");
const DW = std.dwarf;
@ -145,51 +146,57 @@ pub const callee_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8
pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 };
pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx };
/// Represents an unencoded x86 instruction.
/// Encoding helper functions for x86_64 instructions
///
/// Roughly based on the table headings at http://ref.x86asm.net/coder64.html
pub const Instruction = struct {
/// Opcode prefix, needed for certain rare ops (e.g. MOVSS)
opcode_prefix: ?u8 = null,
/// Many of these helpers do very little, but they can help make things
/// slightly more readable with more descriptive field names / function names.
///
/// Some of them also have asserts to ensure that we aren't doing dumb things.
/// For example, trying to use register 4 (esp) in an indirect modr/m byte is illegal,
/// you need to encode it with an SIB byte.
///
/// Note that ALL of these helper functions will assume capacity,
/// so ensure that the `code` has sufficient capacity before using them.
/// The `init` method is the recommended way to ensure capacity.
pub const Encoder = struct {
/// Non-owning reference to the code array
code: *ArrayList(u8),
/// One-byte primary opcode
primary_opcode_1b: ?u8 = null,
/// Two-byte primary opcode (always prefixed with 0f)
primary_opcode_2b: ?u8 = null,
// TODO: Support 3-byte opcodes
const Self = @This();
/// Secondary opcode
secondary_opcode: ?u8 = null,
/// Wrap `code` in Encoder to make it easier to call these helper functions
///
/// maximum_inst_size should contain the maximum number of bytes
/// that the encoded instruction will take.
/// This is because the helper functions will assume capacity
/// in order to avoid bounds checking.
pub fn init(code: *ArrayList(u8), maximum_inst_size: u8) !Self {
try code.ensureCapacity(code.items.len + maximum_inst_size);
return Self{ .code = code };
}
/// Opcode extension (to be placed in the ModR/M byte in place of reg)
opcode_extension: ?u3 = null,
/// Directly write a number to the code array with big endianness
pub fn writeIntBig(self: Self, comptime T: type, value: T) void {
mem.writeIntBig(
T,
self.code.addManyAsArrayAssumeCapacity(@divExact(@typeInfo(T).Int.bits, 8)),
value,
);
}
/// Legacy prefixes to use with this instruction
/// Most of the time, this field will be 0 and no prefixes are added.
/// Otherwise, a prefix will be added for each field set.
legacy_prefixes: LegacyPrefixes = .{},
/// Directly write a number to the code array with little endianness
pub fn writeIntLittle(self: Self, comptime T: type, value: T) void {
mem.writeIntLittle(
T,
self.code.addManyAsArrayAssumeCapacity(@divExact(@typeInfo(T).Int.bits, 8)),
value,
);
}
/// 64-bit operand size
operand_size_64: bool = false,
// --------
// Prefixes
// --------
/// The opcode-reg field,
/// stored in the 3 least significant bits of the opcode
/// on certain instructions + REX if extended
opcode_reg: ?Register = null,
/// The reg field
reg: ?Register = null,
/// The mod + r/m field
modrm: ?ModrmEffectiveAddress = null,
/// Location of the 3rd operand, if applicable
sib: ?SibEffectiveAddress = null,
/// Number of bytes of immediate
immediate_bytes: u8 = 0,
/// The value of the immediate
immediate: u64 = 0,
/// See legacy_prefixes
pub const LegacyPrefixes = packed struct {
/// LOCK
prefix_f0: bool = false,
@ -212,322 +219,391 @@ pub const Instruction = struct {
/// Branch taken
prefix_3e: bool = false,
/// Operand size override
/// Operand size override (enables 16 bit operation)
prefix_66: bool = false,
/// Address size override
/// Address size override (enables 16 bit address size)
prefix_67: bool = false,
padding: u5 = 0,
};
/// Encodes an effective address for the Mod + R/M part of the ModR/M byte
///
/// Note that depending on the instruction, not all effective addresses are allowed.
///
/// Examples:
/// eax: .reg = .eax
/// [eax]: .mem = .eax
/// [eax + 8]: .mem_disp = .{ .reg = .eax, .disp = 8 }
/// [eax - 8]: .mem_disp = .{ .reg = .eax, .disp = -8 }
/// [55]: .disp32 = 55
pub const ModrmEffectiveAddress = union(enum) {
reg: Register,
mem: Register,
mem_disp: struct {
reg: Register,
disp: i32,
},
disp32: u32,
pub fn isExtended(self: @This()) bool {
return switch (self) {
.reg => |reg| reg.isExtended(),
.mem => |memea| memea.isExtended(),
.mem_disp => |mem_disp| mem_disp.reg.isExtended(),
.disp32 => false,
};
}
};
/// Encodes an effective address for the SIB byte
///
/// Note that depending on the instruction, not all effective addresses are allowed.
///
/// Examples:
/// [eax + ebx * 2]: .base_index = .{ .base = .eax, .index = .ebx, .scale = 2 }
/// [eax]: .base_index = .{ .base = .eax, .index = null, .scale = 1 }
/// [ebx * 2 + 256]: .index_disp = .{ .index = .ebx, .scale = 2, .disp = 256 }
/// [[ebp] + ebx * 2 + 8]: .ebp_index_disp = .{ .index = .ebx, .scale = 2, .disp = 8 }
pub const SibEffectiveAddress = union(enum) {
base_index: struct {
base: Register,
index: ?Register,
scale: u8, // 1, 2, 4, or 8
},
index_disp: struct {
index: ?Register,
scale: u8, // 1, 2, 4, or 8
disp: u32,
},
ebp_index_disp: struct {
index: ?Register,
scale: u8, // 1, 2, 4, or 8
disp: u32,
},
pub fn baseIsExtended(self: @This()) bool {
return switch (self) {
.base_index => |base_index| base_index.base.isExtended(),
.index_disp, .ebp_index_disp => false,
};
}
pub fn indexIsExtended(self: @This()) bool {
return switch (self) {
.base_index => |base_index| if (base_index.index) |idx| idx.isExtended() else false,
.index_disp => |index_disp| if (index_disp.index) |idx| idx.isExtended() else false,
.ebp_index_disp => |ebp_index_disp| if (ebp_index_disp.index) |idx| idx.isExtended() else false,
};
}
};
/// Writes the encoded Instruction to the code ArrayList
pub fn encodeInto(inst: Instruction, code: *ArrayList(u8)) !void {
// We need to write the following, in that order:
// - Legacy prefixes (0 to 13 bytes)
// - REX prefix (0 to 1 byte)
// - Opcode (1, 2, or 3 bytes)
// - ModR/M (0 or 1 byte)
// - SIB (0 or 1 byte)
// - Displacement (0, 1, 2, or 4 bytes)
// - Immediate (0, 1, 2, 4, or 8 bytes)
// By this calculation, an instruction could be up to 31 bytes long (will probably not happen)
try code.ensureCapacity(code.items.len + 31);
// Legacy prefixes
if (@bitCast(u16, inst.legacy_prefixes) != 0) {
/// Encodes legacy prefixes
pub fn legacyPrefixes(self: Self, prefixes: LegacyPrefixes) void {
if (@bitCast(u16, prefixes) != 0) {
// Hopefully this path isn't taken very often, so we'll do it the slow way for now
// LOCK
if (inst.legacy_prefixes.prefix_f0) code.appendAssumeCapacity(0xf0);
if (prefixes.prefix_f0) self.code.appendAssumeCapacity(0xf0);
// REPNZ, REPNE, REP, Scalar Double-precision
if (inst.legacy_prefixes.prefix_f2) code.appendAssumeCapacity(0xf2);
if (prefixes.prefix_f2) self.code.appendAssumeCapacity(0xf2);
// REPZ, REPE, REP, Scalar Single-precision
if (inst.legacy_prefixes.prefix_f3) code.appendAssumeCapacity(0xf3);
if (prefixes.prefix_f3) self.code.appendAssumeCapacity(0xf3);
// CS segment override or Branch not taken
if (inst.legacy_prefixes.prefix_2e) code.appendAssumeCapacity(0x2e);
if (prefixes.prefix_2e) self.code.appendAssumeCapacity(0x2e);
// DS segment override
if (inst.legacy_prefixes.prefix_36) code.appendAssumeCapacity(0x36);
if (prefixes.prefix_36) self.code.appendAssumeCapacity(0x36);
// ES segment override
if (inst.legacy_prefixes.prefix_26) code.appendAssumeCapacity(0x26);
if (prefixes.prefix_26) self.code.appendAssumeCapacity(0x26);
// FS segment override
if (inst.legacy_prefixes.prefix_64) code.appendAssumeCapacity(0x64);
if (prefixes.prefix_64) self.code.appendAssumeCapacity(0x64);
// GS segment override
if (inst.legacy_prefixes.prefix_65) code.appendAssumeCapacity(0x65);
if (prefixes.prefix_65) self.code.appendAssumeCapacity(0x65);
// Branch taken
if (inst.legacy_prefixes.prefix_3e) code.appendAssumeCapacity(0x3e);
if (prefixes.prefix_3e) self.code.appendAssumeCapacity(0x3e);
// Operand size override
if (inst.legacy_prefixes.prefix_66) code.appendAssumeCapacity(0x66);
if (prefixes.prefix_66) self.code.appendAssumeCapacity(0x66);
// Address size override
if (inst.legacy_prefixes.prefix_67) code.appendAssumeCapacity(0x67);
if (prefixes.prefix_67) self.code.appendAssumeCapacity(0x67);
}
}
// REX prefix
//
// A REX prefix has the following form:
// 0b0100_WRXB
// 0100: fixed bits
// W: stands for "wide", indicates that the instruction uses 64-bit operands.
// R, X, and B each contain the 4th bit of a register
// these have to be set when using registers 8-15.
// R: stands for "reg", extends the reg field in the ModR/M byte.
// X: stands for "index", extends the index field in the SIB byte.
// B: stands for "base", extends either the r/m field in the ModR/M byte,
// the base field in the SIB byte,
// or the opcode reg field in the Opcode byte.
{
var value: u8 = 0x40;
if (inst.opcode_reg) |opcode_reg| {
if (opcode_reg.isExtended()) {
value |= 0x1;
}
}
if (inst.modrm) |modrm| {
if (modrm.isExtended()) {
value |= 0x1;
}
}
if (inst.sib) |sib| {
if (sib.baseIsExtended()) {
value |= 0x1;
}
if (sib.indexIsExtended()) {
value |= 0x2;
}
}
if (inst.reg) |reg| {
if (reg.isExtended()) {
value |= 0x4;
}
}
if (inst.operand_size_64) {
value |= 0x8;
}
if (value != 0x40) {
code.appendAssumeCapacity(value);
}
/// Use 16 bit operand size
///
/// Note that this flag is overridden by REX.W, if both are present.
pub fn prefix16BitMode(self: Self) void {
self.code.appendAssumeCapacity(0x66);
}
/// From section 2.2.1.2 of the manual, REX is encoded as b0100WRXB
pub const Rex = struct {
/// Wide, enables 64-bit operation
w: bool = false,
/// Extends the reg field in the ModR/M byte
r: bool = false,
/// Extends the index field in the SIB byte
x: bool = false,
/// Extends the r/m field in the ModR/M byte,
/// or the base field in the SIB byte,
/// or the reg field in the Opcode byte
b: bool = false,
};
/// Encodes a REX prefix byte given all the fields
///
/// Use this byte whenever you need 64 bit operation,
/// or one of reg, index, r/m, base, or opcode-reg might be extended.
///
/// See struct `Rex` for a description of each field.
///
/// Does not add a prefix byte if none of the fields are set!
pub fn rex(self: Self, byte: Rex) void {
var value: u8 = 0b0100_0000;
if (byte.w) value |= 0b1000;
if (byte.r) value |= 0b0100;
if (byte.x) value |= 0b0010;
if (byte.b) value |= 0b0001;
if (value != 0b0100_0000) {
self.code.appendAssumeCapacity(value);
}
}
// Opcode
if (inst.primary_opcode_1b) |opcode| {
var value = opcode;
if (inst.opcode_reg) |opcode_reg| {
value |= opcode_reg.low_id();
}
code.appendAssumeCapacity(value);
} else if (inst.primary_opcode_2b) |opcode| {
code.appendAssumeCapacity(0x0f);
var value = opcode;
if (inst.opcode_reg) |opcode_reg| {
value |= opcode_reg.low_id();
}
code.appendAssumeCapacity(value);
}
// ------
// Opcode
// ------
var disp8: ?u8 = null;
var disp16: ?u16 = null;
var disp32: ?u32 = null;
/// Encodes a 1 byte opcode
pub fn opcode_1byte(self: Self, opcode: u8) void {
self.code.appendAssumeCapacity(opcode);
}
// ModR/M
//
// Example ModR/M byte:
// c7: ModR/M byte that contains:
// 11 000 111:
// ^ ^ ^
// mod | |
// reg |
// r/m
// where mod = 11 indicates that both operands are registers,
// reg = 000 indicates that the first operand is register EAX
// r/m = 111 indicates that the second operand is register EDI (since mod = 11)
if (inst.modrm != null or inst.reg != null or inst.opcode_extension != null) {
var value: u8 = 0;
/// Encodes a 2 byte opcode
///
/// e.g. IMUL has the opcode 0x0f 0xaf, so you use
///
/// encoder.opcode_2byte(0x0f, 0xaf);
pub fn opcode_2byte(self: Self, prefix: u8, opcode: u8) void {
self.code.appendAssumeCapacity(prefix);
self.code.appendAssumeCapacity(opcode);
}
// mod + rm
if (inst.modrm) |modrm| {
switch (modrm) {
.reg => |reg| {
value |= reg.low_id();
value |= 0b11_000_000;
},
.mem => |memea| {
assert(memea.low_id() != 4 and memea.low_id() != 5);
value |= memea.low_id();
// value |= 0b00_000_000;
},
.mem_disp => |mem_disp| {
assert(mem_disp.reg.low_id() != 4);
value |= mem_disp.reg.low_id();
if (mem_disp.disp < 128) {
// Use 1 byte of displacement
value |= 0b01_000_000;
disp8 = @bitCast(u8, @intCast(i8, mem_disp.disp));
} else {
// Use all 4 bytes of displacement
value |= 0b10_000_000;
disp32 = @bitCast(u32, mem_disp.disp);
}
},
.disp32 => |d| {
value |= 0b00_000_101;
disp32 = d;
},
}
}
/// Encodes a 1 byte opcode with a reg field
///
/// Remember to add a REX prefix byte if reg is extended!
pub fn opcode_withReg(self: Self, opcode: u8, reg: u3) void {
assert(opcode & 0b111 == 0);
self.code.appendAssumeCapacity(opcode | reg);
}
// reg
if (inst.reg) |reg| {
value |= @as(u8, reg.low_id()) << 3;
} else if (inst.opcode_extension) |ext| {
value |= @as(u8, ext) << 3;
}
// ------
// ModR/M
// ------
code.appendAssumeCapacity(value);
}
/// Construct a ModR/M byte given all the fields
///
/// Remember to add a REX prefix byte if reg or rm are extended!
pub fn modRm(self: Self, mod: u2, reg_or_opx: u3, rm: u3) void {
self.code.appendAssumeCapacity(
@as(u8, mod) << 6 | @as(u8, reg_or_opx) << 3 | rm,
);
}
// SIB
{
if (inst.sib) |sib| {
return error.TODOSIBByteForX8664;
}
}
/// Construct a ModR/M byte using direct r/m addressing
/// r/m effective address: r/m
///
/// Note reg's effective address is always just reg for the ModR/M byte.
/// Remember to add a REX prefix byte if reg or rm are extended!
pub fn modRm_direct(self: Self, reg_or_opx: u3, rm: u3) void {
self.modRm(0b11, reg_or_opx, rm);
}
// Displacement
//
// The size of the displacement depends on the instruction used and is very fragile.
// The bytes are simply written in LE order.
{
/// Construct a ModR/M byte using indirect r/m addressing
/// r/m effective address: [r/m]
///
/// Note reg's effective address is always just reg for the ModR/M byte.
/// Remember to add a REX prefix byte if reg or rm are extended!
pub fn modRm_indirectDisp0(self: Self, reg_or_opx: u3, rm: u3) void {
assert(rm != 4 and rm != 5);
self.modRm(0b00, reg_or_opx, rm);
}
// These writes won't fail because we ensured capacity earlier.
if (disp8) |d|
code.appendAssumeCapacity(d)
else if (disp16) |d|
mem.writeIntLittle(u16, code.addManyAsArrayAssumeCapacity(2), d)
else if (disp32) |d|
mem.writeIntLittle(u32, code.addManyAsArrayAssumeCapacity(4), d);
}
/// Construct a ModR/M byte using indirect SIB addressing
/// r/m effective address: [SIB]
///
/// Note reg's effective address is always just reg for the ModR/M byte.
/// Remember to add a REX prefix byte if reg or rm are extended!
pub fn modRm_SIBDisp0(self: Self, reg_or_opx: u3) void {
self.modRm(0b00, reg_or_opx, 0b100);
}
// Immediate
//
// The size of the immediate depends on the instruction used and is very fragile.
// The bytes are simply written in LE order.
{
// These writes won't fail because we ensured capacity earlier.
if (inst.immediate_bytes == 1)
code.appendAssumeCapacity(@intCast(u8, inst.immediate))
else if (inst.immediate_bytes == 2)
mem.writeIntLittle(u16, code.addManyAsArrayAssumeCapacity(2), @intCast(u16, inst.immediate))
else if (inst.immediate_bytes == 4)
mem.writeIntLittle(u32, code.addManyAsArrayAssumeCapacity(4), @intCast(u32, inst.immediate))
else if (inst.immediate_bytes == 8)
mem.writeIntLittle(u64, code.addManyAsArrayAssumeCapacity(8), inst.immediate);
}
/// Construct a ModR/M byte using RIP-relative addressing
/// r/m effective address: [RIP + disp32]
///
/// Note reg's effective address is always just reg for the ModR/M byte.
/// Remember to add a REX prefix byte if reg or rm are extended!
pub fn modRm_RIPDisp32(self: Self, reg_or_opx: u3) void {
self.modRm(0b00, reg_or_opx, 0b101);
}
/// Construct a ModR/M byte using indirect r/m with a 8bit displacement
/// r/m effective address: [r/m + disp8]
///
/// Note reg's effective address is always just reg for the ModR/M byte.
/// Remember to add a REX prefix byte if reg or rm are extended!
pub fn modRm_indirectDisp8(self: Self, reg_or_opx: u3, rm: u3) void {
assert(rm != 4);
self.modRm(0b01, reg_or_opx, rm);
}
/// Construct a ModR/M byte using indirect SIB with a 8bit displacement
/// r/m effective address: [SIB + disp8]
///
/// Note reg's effective address is always just reg for the ModR/M byte.
/// Remember to add a REX prefix byte if reg or rm are extended!
pub fn modRm_SIBDisp8(self: Self, reg_or_opx: u3) void {
self.modRm(0b01, reg_or_opx, 0b100);
}
/// Construct a ModR/M byte using indirect r/m with a 32bit displacement
/// r/m effective address: [r/m + disp32]
///
/// Note reg's effective address is always just reg for the ModR/M byte.
/// Remember to add a REX prefix byte if reg or rm are extended!
pub fn modRm_indirectDisp32(self: Self, reg_or_opx: u3, rm: u3) void {
assert(rm != 4);
self.modRm(0b10, reg_or_opx, rm);
}
/// Construct a ModR/M byte using indirect SIB with a 32bit displacement
/// r/m effective address: [SIB + disp32]
///
/// Note reg's effective address is always just reg for the ModR/M byte.
/// Remember to add a REX prefix byte if reg or rm are extended!
pub fn modRm_SIBDisp32(self: Self, reg_or_opx: u3) void {
self.modRm(0b10, reg_or_opx, 0b100);
}
// ---
// SIB
// ---
/// Construct a SIB byte given all the fields
///
/// Remember to add a REX prefix byte if index or base are extended!
pub fn sib(self: Self, scale: u2, index: u3, base: u3) void {
self.code.appendAssumeCapacity(
@as(u8, scale) << 6 | @as(u8, index) << 3 | base,
);
}
/// Construct a SIB byte with scale * index + base, no frills.
/// r/m effective address: [base + scale * index]
///
/// Remember to add a REX prefix byte if index or base are extended!
pub fn sib_scaleIndexBase(self: Self, scale: u2, index: u3, base: u3) void {
assert(base != 5);
self.sib(scale, index, base);
}
/// Construct a SIB byte with scale * index + disp32
/// r/m effective address: [scale * index + disp32]
///
/// Remember to add a REX prefix byte if index or base are extended!
pub fn sib_scaleIndexDisp32(self: Self, scale: u2, index: u3) void {
assert(index != 4);
// scale is actually ignored
// index = 4 means no index
// base = 5 means no base, if mod == 0.
self.sib(scale, index, 5);
}
/// Construct a SIB byte with just base
/// r/m effective address: [base]
///
/// Remember to add a REX prefix byte if index or base are extended!
pub fn sib_base(self: Self, base: u3) void {
assert(base != 5);
// scale is actually ignored
// index = 4 means no index
self.sib(0, 4, base);
}
/// Construct a SIB byte with just disp32
/// r/m effective address: [disp32]
///
/// Remember to add a REX prefix byte if index or base are extended!
pub fn sib_disp32(self: Self) void {
// scale is actually ignored
// index = 4 means no index
// base = 5 means no base, if mod == 0.
self.sib(0, 4, 5);
}
/// Construct a SIB byte with scale * index + base + disp8
/// r/m effective address: [base + scale * index + disp8]
///
/// Remember to add a REX prefix byte if index or base are extended!
pub fn sib_scaleIndexBaseDisp8(self: Self, scale: u2, index: u3, base: u3) void {
self.sib(scale, index, base);
}
/// Construct a SIB byte with base + disp8, no index
/// r/m effective address: [base + disp8]
///
/// Remember to add a REX prefix byte if index or base are extended!
pub fn sib_baseDisp8(self: Self, base: u3) void {
// scale is ignored
// index = 4 means no index
self.sib(0, 4, base);
}
/// Construct a SIB byte with scale * index + base + disp32
/// r/m effective address: [base + scale * index + disp32]
///
/// Remember to add a REX prefix byte if index or base are extended!
pub fn sib_scaleIndexBaseDisp32(self: Self, scale: u2, index: u3, base: u3) void {
self.sib(scale, index, base);
}
/// Construct a SIB byte with base + disp32, no index
/// r/m effective address: [base + disp32]
///
/// Remember to add a REX prefix byte if index or base are extended!
pub fn sib_baseDisp32(self: Self, base: u3) void {
// scale is ignored
// index = 4 means no index
self.sib(0, 4, base);
}
// -------------------------
// Trivial (no bit fiddling)
// -------------------------
/// Encode an 8 bit immediate
///
/// It is sign-extended to 64 bits by the cpu.
pub fn imm8(self: Self, imm: i8) void {
self.code.appendAssumeCapacity(@bitCast(u8, imm));
}
/// Encode an 8 bit displacement
///
/// It is sign-extended to 64 bits by the cpu.
pub fn disp8(self: Self, disp: i8) void {
self.code.appendAssumeCapacity(@bitCast(u8, disp));
}
/// Encode an 16 bit immediate
///
/// It is sign-extended to 64 bits by the cpu.
pub fn imm16(self: Self, imm: i16) void {
self.writeIntLittle(i16, imm);
}
/// Encode an 32 bit immediate
///
/// It is sign-extended to 64 bits by the cpu.
pub fn imm32(self: Self, imm: i32) void {
self.writeIntLittle(i32, imm);
}
/// Encode an 32 bit displacement
///
/// It is sign-extended to 64 bits by the cpu.
pub fn disp32(self: Self, disp: i32) void {
self.writeIntLittle(i32, disp);
}
/// Encode an 64 bit immediate
///
/// It is sign-extended to 64 bits by the cpu.
pub fn imm64(self: Self, imm: u64) void {
self.writeIntLittle(u64, imm);
}
};
fn expectEncoded(inst: Instruction, expected: []const u8) !void {
test "x86_64 Encoder helpers" {
var code = ArrayList(u8).init(testing.allocator);
defer code.deinit();
try inst.encodeInto(&code);
testing.expectEqualSlices(u8, expected, code.items);
}
test "x86_64 Instruction.encodeInto" {
// simple integer multiplication
// imul eax,edi
// 0faf c7
try expectEncoded(Instruction{
.primary_opcode_2b = 0xaf, // imul
.reg = .eax, // destination
.modrm = .{ .reg = .edi }, // source
}, &[_]u8{ 0x0f, 0xaf, 0xc7 });
{
try code.resize(0);
const encoder = try Encoder.init(&code, 4);
encoder.rex(.{
.r = Register.eax.isExtended(),
.b = Register.edi.isExtended(),
});
encoder.opcode_2byte(0x0f, 0xaf);
encoder.modRm_direct(
Register.eax.low_id(),
Register.edi.low_id(),
);
testing.expectEqualSlices(u8, &[_]u8{ 0x0f, 0xaf, 0xc7 }, code.items);
}
// simple mov
// mov eax,edi
// 89 f8
try expectEncoded(Instruction{
.primary_opcode_1b = 0x89, // mov (with rm as destination)
.reg = .edi, // source
.modrm = .{ .reg = .eax }, // destination
}, &[_]u8{ 0x89, 0xf8 });
{
try code.resize(0);
const encoder = try Encoder.init(&code, 3);
encoder.rex(.{
.r = Register.edi.isExtended(),
.b = Register.eax.isExtended(),
});
encoder.opcode_1byte(0x89);
encoder.modRm_direct(
Register.edi.low_id(),
Register.eax.low_id(),
);
testing.expectEqualSlices(u8, &[_]u8{ 0x89, 0xf8 }, code.items);
}
// signed integer addition of 32-bit sign extended immediate to 64 bit register
@ -542,19 +618,19 @@ test "x86_64 Instruction.encodeInto" {
// : 000 <-- opcode_extension = 0 because opcode extension is /0. /0 specifies ADD
// : 001 <-- 001 is rcx
// ffffff7f : 2147483647
try expectEncoded(Instruction{
// REX.W +
.operand_size_64 = true,
// 81
.primary_opcode_1b = 0x81,
// /0
.opcode_extension = 0,
// rcx
.modrm = .{ .reg = .rcx },
// immediate
.immediate_bytes = 4,
.immediate = 2147483647,
}, &[_]u8{ 0x48, 0x81, 0xc1, 0xff, 0xff, 0xff, 0x7f });
{
try code.resize(0);
const encoder = try Encoder.init(&code, 7);
encoder.rex(.{ .w = true }); // use 64 bit operation
encoder.opcode_1byte(0x81);
encoder.modRm_direct(
0,
Register.rcx.low_id(),
);
encoder.imm32(2147483647);
testing.expectEqualSlices(u8, &[_]u8{ 0x48, 0x81, 0xc1, 0xff, 0xff, 0xff, 0x7f }, code.items);
}
}
// TODO add these registers to the enum and populate dwarfLocOp