x86_64: introduce assemble() helper which encodes/decodes into MIR -> Instruction

This commit is contained in:
Jakub Konka 2023-03-08 23:45:05 +01:00
parent 5b37701028
commit 6e882d730b
4 changed files with 1750 additions and 2514 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -12,6 +12,8 @@ const builtin = @import("builtin");
const assert = std.debug.assert;
const bits = @import("bits.zig");
const encoder = @import("encoder.zig");
const Air = @import("../../Air.zig");
const CodeGen = @import("CodeGen.zig");
const IntegerBitSet = std.bit_set.IntegerBitSet;
@ -21,421 +23,239 @@ instructions: std.MultiArrayList(Inst).Slice,
/// The meaning of this data is determined by `Inst.Tag` value.
extra: []const u32,
pub const Mnemonic = encoder.Instruction.Mnemonic;
pub const Operand = encoder.Instruction.Operand;
pub const Inst = struct {
tag: Tag,
ops: Ops,
/// The meaning of this depends on `tag` and `ops`.
data: Data,
pub const Tag = enum(u16) {
/// ops flags: form:
/// 0b00 reg1, reg2
/// 0b00 reg1, imm32
/// 0b01 reg1, [reg2 + imm32]
/// 0b01 reg1, [ds:imm32]
/// 0b10 [reg1 + imm32], reg2
/// 0b11 reg1, imm_s
/// Notes:
/// * If reg2 is `none` then it means Data field `imm` is used as the immediate.
/// * When two imm32 values are required, Data field `payload` points at `ImmPair`.
adc,
/// ops flags: form:
/// 0b00 byte ptr [reg1 + imm32], imm8
/// 0b01 word ptr [reg1 + imm32], imm16
/// 0b10 dword ptr [reg1 + imm32], imm32
/// 0b11 qword ptr [reg1 + imm32], imm32 (sign-extended to imm64)
/// Notes:
/// * Uses `ImmPair` as payload
adc_mem_imm,
/// form: reg1, [reg2 + scale*index + imm32]
/// ops flags scale
/// 0b00 1
/// 0b01 2
/// 0b10 4
/// 0b11 8
/// Notes:
/// * Uses `IndexRegisterDisp` as payload
adc_scale_src,
/// form: [reg1 + scale*index + imm32], reg2
/// ops flags scale
/// 0b00 1
/// 0b01 2
/// 0b10 4
/// 0b11 8
/// Notes:
/// * Uses `IndexRegisterDisp` payload.
adc_scale_dst,
/// form: [reg1 + scale*rax + imm32], imm32
/// ops flags scale
/// 0b00 1
/// 0b01 2
/// 0b10 4
/// 0b11 8
/// Notes:
/// * Uses `IndexRegisterDispImm` payload.
adc_scale_imm,
/// ops flags: form:
/// 0b00 byte ptr [reg1 + index + imm32], imm8
/// 0b01 word ptr [reg1 + index + imm32], imm16
/// 0b10 dword ptr [reg1 + index + imm32], imm32
/// 0b11 qword ptr [reg1 + index + imm32], imm32 (sign-extended to imm64)
/// Notes:
/// * Uses `IndexRegisterDispImm` payload.
adc_mem_index_imm,
// The following instructions all have the same encoding as `adc`.
add,
add_mem_imm,
add_scale_src,
add_scale_dst,
add_scale_imm,
add_mem_index_imm,
sub,
sub_mem_imm,
sub_scale_src,
sub_scale_dst,
sub_scale_imm,
sub_mem_index_imm,
xor,
xor_mem_imm,
xor_scale_src,
xor_scale_dst,
xor_scale_imm,
xor_mem_index_imm,
@"and",
and_mem_imm,
and_scale_src,
and_scale_dst,
and_scale_imm,
and_mem_index_imm,
@"or",
or_mem_imm,
or_scale_src,
or_scale_dst,
or_scale_imm,
or_mem_index_imm,
rol,
rol_mem_imm,
rol_scale_src,
rol_scale_dst,
rol_scale_imm,
rol_mem_index_imm,
ror,
ror_mem_imm,
ror_scale_src,
ror_scale_dst,
ror_scale_imm,
ror_mem_index_imm,
rcl,
rcl_mem_imm,
rcl_scale_src,
rcl_scale_dst,
rcl_scale_imm,
rcl_mem_index_imm,
rcr,
rcr_mem_imm,
rcr_scale_src,
rcr_scale_dst,
rcr_scale_imm,
rcr_mem_index_imm,
sbb,
sbb_mem_imm,
sbb_scale_src,
sbb_scale_dst,
sbb_scale_imm,
sbb_mem_index_imm,
cmp,
cmp_mem_imm,
cmp_scale_src,
cmp_scale_dst,
cmp_scale_imm,
cmp_mem_index_imm,
mov,
mov_mem_imm,
mov_scale_src,
mov_scale_dst,
mov_scale_imm,
mov_mem_index_imm,
/// ops flags: form:
/// 0b00 reg1, reg2,
/// 0b01 reg1, byte ptr [reg2 + imm32]
/// 0b10 reg1, word ptr [reg2 + imm32]
/// 0b11 reg1, dword ptr [reg2 + imm32]
mov_sign_extend,
/// ops flags: form:
/// 0b00 reg1, reg2
/// 0b01 reg1, byte ptr [reg2 + imm32]
/// 0b10 reg1, word ptr [reg2 + imm32]
mov_zero_extend,
/// ops flags: form:
/// 0b00 reg1, [reg2 + imm32]
/// 0b00 reg1, [ds:imm32]
/// 0b01 reg1, [rip + imm32]
/// 0b10 reg1, [reg2 + index + imm32]
/// Notes:
/// * 0b10 uses `IndexRegisterDisp` payload
lea,
/// ops flags: form:
/// 0b00 reg1, [rip + reloc] // via GOT PIC
/// 0b01 reg1, [rip + reloc] // direct load PIC
/// 0b10 reg1, [rip + reloc] // via imports table PIC
/// Notes:
/// * `Data` contains `relocation`
lea_pic,
/// ops flags: form:
/// 0b00 reg1, 1
/// 0b01 reg1, .cl
/// 0b10 reg1, imm8
/// Notes:
/// * If flags == 0b10, uses `imm`.
shl,
shl_mem_imm,
shl_scale_src,
shl_scale_dst,
shl_scale_imm,
shl_mem_index_imm,
sal,
sal_mem_imm,
sal_scale_src,
sal_scale_dst,
sal_scale_imm,
sal_mem_index_imm,
shr,
shr_mem_imm,
shr_scale_src,
shr_scale_dst,
shr_scale_imm,
shr_mem_index_imm,
sar,
sar_mem_imm,
sar_scale_src,
sar_scale_dst,
sar_scale_imm,
sar_mem_index_imm,
/// ops flags: form:
/// 0b00 reg1
/// 0b00 byte ptr [reg2 + imm32]
/// 0b01 word ptr [reg2 + imm32]
/// 0b10 dword ptr [reg2 + imm32]
/// 0b11 qword ptr [reg2 + imm32]
imul,
idiv,
mul,
div,
/// ops flags: form:
/// 0b00 AX <- AL
/// 0b01 DX:AX <- AX
/// 0b10 EDX:EAX <- EAX
/// 0b11 RDX:RAX <- RAX
cwd,
/// ops flags: form:
/// 0b00 reg1, reg2
/// 0b01 reg1, [reg2 + imm32]
/// 0b01 reg1, [imm32] if reg2 is none
/// 0b10 reg1, reg2, imm32
/// 0b11 reg1, [reg2 + imm32], imm32
imul_complex,
/// ops flags: form:
/// 0b00 reg1, imm64
/// 0b01 rax, moffs64
/// Notes:
/// * If reg1 is 64-bit, the immediate is 64-bit and stored
/// within extra data `Imm64`.
/// * For 0b01, reg1 (or reg2) need to be
/// a version of rax. If reg1 == .none, then reg2 == .rax,
/// or vice versa.
movabs,
/// ops flags: form:
/// 0b00 word ptr [reg1 + imm32]
/// 0b01 dword ptr [reg1 + imm32]
/// 0b10 qword ptr [reg1 + imm32]
/// Notes:
/// * source is always ST(0)
/// * only supports memory operands as destination
fisttp,
/// ops flags: form:
/// 0b01 dword ptr [reg1 + imm32]
/// 0b10 qword ptr [reg1 + imm32]
fld,
/// ops flags: form:
/// 0b00 inst
/// 0b01 reg1
/// 0b01 [imm32] if reg1 is none
/// 0b10 [reg1 + imm32]
jmp,
call,
/// ops flags:
/// unused
/// Notes:
/// * uses `inst_cc` in Data.
cond_jmp,
/// ops flags:
/// 0b00 reg1
/// Notes:
/// * uses condition code (CC) stored as part of data
cond_set_byte,
/// ops flags:
/// 0b00 reg1, reg2,
/// 0b01 reg1, word ptr [reg2 + imm]
/// 0b10 reg1, dword ptr [reg2 + imm]
/// 0b11 reg1, qword ptr [reg2 + imm]
/// Notes:
/// * uses condition code (CC) stored as part of data
cond_mov,
/// ops flags: form:
/// 0b00 reg1
/// 0b01 [reg1 + imm32]
/// 0b10 imm32
/// Notes:
/// * If 0b10 is specified and the tag is push, pushes immediate onto the stack
/// using the mnemonic PUSH imm32.
push,
pop,
/// ops flags: form:
/// 0b00 retf imm16
/// 0b01 retf
/// 0b10 retn imm16
/// 0b11 retn
ret,
/// Fast system call
syscall,
/// ops flags: form:
/// 0b00 reg1, imm32 if reg2 == .none
/// 0b00 reg1, reg2
/// TODO handle more cases
@"test",
/// Undefined Instruction
ud,
/// Breakpoint form:
/// 0b00 int3
interrupt,
/// Nop
nop,
/// SSE/AVX instructions
/// ops flags: form:
/// 0b00 reg1, qword ptr [reg2 + imm32]
/// 0b01 qword ptr [reg1 + imm32], reg2
/// 0b10 reg1, reg2
mov_f64,
mov_f32,
/// ops flags: form:
/// 0b00 reg1, reg2
add_f64,
add_f32,
/// ops flags: form:
/// 0b00 reg1, reg2
cmp_f64,
cmp_f32,
/// Pseudo-instructions
/// call extern function
/// Notes:
/// * target of the call is stored as `relocation` in `Data` union.
call_extern,
/// end of prologue
dbg_prologue_end,
/// start of epilogue
dbg_epilogue_begin,
/// update debug line
dbg_line,
/// push registers
/// Uses `payload` field with `SaveRegisterList` as payload.
push_regs,
/// pop registers
/// Uses `payload` field with `SaveRegisterList` as payload.
pop_regs,
};
/// The position of an MIR instruction within the `Mir` instructions array.
pub const Index = u32;
pub const Ops = packed struct {
reg1: u7,
reg2: u7,
flags: u2,
pub const Tag = enum(u8) {
/// Add with carry
adc,
/// Add
add,
/// Logical and
@"and",
/// Call
call,
/// Convert byte to word
cbw,
/// Convert word to doubleword
cwde,
/// Convert doubleword to quadword
cdqe,
/// Convert word to doubleword
cwd,
/// Convert doubleword to quadword
cdq,
/// Convert doubleword to quadword
cqo,
/// Logical compare
cmp,
/// Conditional move
cmovcc,
/// Unsigned division
div,
/// Store integer with truncation
fisttp,
/// Load floating-point value
fld,
/// Signed division
idiv,
/// Signed multiplication
imul,
///
int3,
/// Conditional jump
jcc,
/// Jump
jmp,
/// Load effective address
lea,
/// Move
mov,
/// Move with sign extension
movsx,
/// Move with zero extension
movzx,
/// Multiply
mul,
/// No-op
nop,
/// Logical or
@"or",
/// Pop
pop,
/// Push
push,
/// Return
ret,
/// Arithmetic shift left
sal,
/// Arithmetic shift right
sar,
/// Integer subtraction with borrow
sbb,
/// Set byte on condition
setcc,
/// Logical shift left
shl,
/// Logical shift right
shr,
/// Subtract
sub,
/// Syscall
syscall,
/// Test condition
@"test",
/// Undefined instruction
ud2,
/// Logical exclusive-or
xor,
pub fn encode(vals: struct {
reg1: Register = .none,
reg2: Register = .none,
flags: u2 = 0b00,
}) Ops {
return .{
.reg1 = @enumToInt(vals.reg1),
.reg2 = @enumToInt(vals.reg2),
.flags = vals.flags,
};
}
/// Add single precision floating point
addss,
/// Compare scalar single-precision floating-point values
cmpss,
/// Move scalar single-precision floating-point value
movss,
/// Unordered compare scalar single-precision floating-point values
ucomiss,
/// Add double precision floating point
addsd,
/// Compare scalar double-precision floating-point values
cmpsd,
/// Move scalar double-precision floating-point value
movsd,
/// Unordered compare scalar double-precision floating-point values
ucomisd,
pub fn decode(ops: Ops) struct {
reg1: Register,
reg2: Register,
flags: u2,
} {
return .{
.reg1 = @intToEnum(Register, ops.reg1),
.reg2 = @intToEnum(Register, ops.reg2),
.flags = ops.flags,
};
}
/// End of prologue
dbg_prologue_end,
/// Start of epilogue
dbg_epilogue_begin,
/// Update debug line
/// Uses `payload` payload with data of type `DbgLineColumn`.
dbg_line,
/// Push registers
/// Uses `payload` payload with data of type `SaveRegisterList`.
push_regs,
/// Pop registers
/// Uses `payload` payload with data of type `SaveRegisterList`.
pop_regs,
};
pub const Ops = enum(u8) {
/// No data associated with this instruction (only mnemonic is used).
none,
/// Single register operand.
/// Uses `r` payload.
r,
/// Register, register operands.
/// Uses `rr` payload.
rr,
/// Register, register, register operands.
/// Uses `rrr` payload.
rrr,
/// Register, immediate (sign-extended) operands.
/// Uses `ri_s` payload.
ri_s,
/// Register, immediate (unsigned) operands.
/// Uses `ri_u` payload.
ri_u,
/// Register, 64-bit unsigned immediate operands.
/// Uses `rx` payload with payload type `Imm64`.
ri64,
/// Immediate (sign-extended) operand.
/// Uses `imm_s` payload.
imm_s,
/// Immediate (unsigned) operand.
/// Uses `imm_u` payload.
imm_u,
/// Relative displacement operand.
/// Uses `rel` payload.
rel,
/// Register, memory operands.
/// Uses `rx` payload.
rm,
/// Register, memory, immediate (unsigned) operands
/// Uses `rx` payload.
rmi_u,
/// Register, memory, immediate (sign-extended) operands
/// Uses `rx` payload.
rmi_s,
/// Memory, immediate (unsigned) operands.
/// Uses `payload` payload.
mi_u,
/// Memory, immediate (sign-extend) operands.
/// Uses `payload` payload.
mi_s,
/// Memory, register operands.
/// Uses `payload` payload.
mr,
/// Lea into register with linker relocation.
/// Uses `payload` payload with data of type `LeaRegisterReloc`.
lea_r_reloc,
/// References another Mir instruction directly.
/// Uses `inst` payload.
inst,
/// References another Mir instruction directly with condition code (CC).
/// Uses `inst_cc` payload.
inst_cc,
/// Uses `payload` payload with data of type `MemoryConditionCode`.
m_cc,
/// Uses `rx` payload with extra data of type `MemoryConditionCode`.
rm_cc,
/// Uses `reloc` payload.
reloc,
};
/// All instructions have a 4-byte payload, which is contained within
/// this union. `Tag` determines which union field is active, as well as
/// how to interpret the data within.
pub const Data = union {
/// Another instruction.
/// References another Mir instruction.
inst: Index,
/// A 32-bit immediate value.
imm: u32,
/// A 32-bit signed immediate value.
imm_s: i32,
/// A 32-bit signed displacement value.
disp: i32,
/// A condition code for use with EFLAGS register.
cc: bits.Condition,
/// Another instruction with condition code.
/// Used by `cond_jmp`.
/// Another instruction with condition code (CC).
/// Used by `jcc`.
inst_cc: struct {
/// Another instruction.
inst: Index,
/// A condition code for use with EFLAGS register.
cc: bits.Condition,
},
/// A 32-bit signed immediate value.
imm_s: i32,
/// A 32-bit unsigned immediate value.
imm_u: u32,
/// A 32-bit signed relative offset value.
rel: i32,
r: Register,
rr: struct {
r1: Register,
r2: Register,
},
rrr: struct {
r1: Register,
r2: Register,
r3: Register,
},
/// Register, signed immediate.
ri_s: struct {
r1: Register,
imm: i32,
},
/// Register, unsigned immediate.
ri_u: struct {
r1: Register,
imm: u32,
},
/// Register, followed by custom payload found in extra.
rx: struct {
r1: Register,
payload: u32,
},
/// Relocation for the linker where:
/// * `atom_index` is the index of the source
/// * `sym_index` is the index of the target
@ -458,62 +278,19 @@ pub const Inst = struct {
}
};
pub const IndexRegisterDisp = struct {
/// Index register to use with SIB-based encoding
index: u32,
/// Displacement value
disp: i32,
pub fn encode(index: Register, disp: i32) IndexRegisterDisp {
return .{
.index = @enumToInt(index),
.disp = disp,
};
}
pub fn decode(this: IndexRegisterDisp) struct {
index: Register,
disp: i32,
} {
return .{
.index = @intToEnum(Register, this.index),
.disp = this.disp,
};
}
};
/// TODO: would it be worth making `IndexRegisterDisp` and `IndexRegisterDispImm` a variable length list
/// instead of having two structs, one a superset of the other one?
pub const IndexRegisterDispImm = struct {
/// Index register to use with SIB-based encoding
index: u32,
/// Displacement value
disp: i32,
/// Immediate
imm: u32,
pub fn encode(index: Register, disp: i32, imm: u32) IndexRegisterDispImm {
return .{
.index = @enumToInt(index),
.disp = disp,
.imm = imm,
};
}
pub fn decode(this: IndexRegisterDispImm) struct {
index: Register,
disp: i32,
imm: u32,
} {
return .{
.index = @intToEnum(Register, this.index),
.disp = this.disp,
.imm = this.imm,
};
}
pub const LeaRegisterReloc = struct {
/// Destination register.
reg: Register,
/// Type of the load.
load_type: enum(u2) {
got,
direct,
import,
},
/// Index of the containing atom.
atom_index: u32,
/// Index into the linker's symbol table.
sym_index: u32,
};
/// Used in conjunction with `SaveRegisterList` payload to transfer a list of used registers
@ -557,16 +334,13 @@ pub const RegisterList = struct {
};
pub const SaveRegisterList = struct {
/// Base register
base_reg: u32,
/// Use `RegisterList` to populate.
register_list: u32,
stack_end: u32,
};
pub const ImmPair = struct {
dest_off: i32,
operand: u32,
};
pub const Imm64 = struct {
msb: u32,
lsb: u32,

View File

@ -4,10 +4,6 @@ const math = std.math;
const bits = @import("bits.zig");
const Encoding = @import("Encoding.zig");
const Immediate = bits.Immediate;
const Memory = bits.Memory;
const Moffs = bits.Moffs;
const PtrSize = bits.PtrSize;
const Register = bits.Register;
pub const Instruction = struct {
@ -25,6 +21,9 @@ pub const Instruction = struct {
mem: Memory,
imm: Immediate,
pub const Memory = bits.Memory;
pub const Immediate = bits.Immediate;
/// Returns the bitsize of the operand.
pub fn bitSize(op: Operand) u64 {
return switch (op) {
@ -296,7 +295,7 @@ pub const Instruction = struct {
try encoder.opcode_1byte(prefix);
}
fn encodeMemory(encoding: Encoding, mem: Memory, operand: Operand, encoder: anytype) !void {
fn encodeMemory(encoding: Encoding, mem: Operand.Memory, operand: Operand, encoder: anytype) !void {
const operand_enc = switch (operand) {
.reg => |reg| reg.lowEnc(),
.none => encoding.modRmExt(),
@ -379,7 +378,7 @@ pub const Instruction = struct {
}
}
fn encodeImm(imm: Immediate, kind: Encoding.Op, encoder: anytype) !void {
fn encodeImm(imm: Operand.Immediate, kind: Encoding.Op, encoder: anytype) !void {
const raw = imm.asUnsigned(kind.bitSize());
switch (kind.bitSize()) {
8 => try encoder.imm8(@intCast(u8, raw)),