x86_64: add simple disassembler interface to the encoder

This commit is contained in:
Jakub Konka 2023-09-13 17:54:39 +02:00
parent 9de0df76a8
commit 7a16a97671

View File

@ -0,0 +1,474 @@
const Disassembler = @This();
const std = @import("std");
const assert = std.debug.assert;
const math = std.math;
const bits = @import("bits.zig");
const encoder = @import("encoder.zig");
const Encoding = @import("Encoding.zig");
const Immediate = bits.Immediate;
const Instruction = encoder.Instruction;
const LegacyPrefixes = encoder.LegacyPrefixes;
const Memory = bits.Memory;
const Register = bits.Register;
const Rex = encoder.Rex;
pub const Error = error{
EndOfStream,
LegacyPrefixAfterRex,
UnknownOpcode,
Todo,
};
code: []const u8,
pos: usize = 0,
pub fn init(code: []const u8) Disassembler {
return .{ .code = code };
}
pub fn next(dis: *Disassembler) Error!?Instruction {
const prefixes = dis.parsePrefixes() catch |err| switch (err) {
error.EndOfStream => return null,
else => |e| return e,
};
const enc = try dis.parseEncoding(prefixes) orelse return error.UnknownOpcode;
switch (enc.data.op_en) {
.np => return inst(enc, .{}),
.d, .i => {
const imm = try dis.parseImm(enc.data.ops[0]);
return inst(enc, .{
.op1 = .{ .imm = imm },
});
},
.zi => {
const imm = try dis.parseImm(enc.data.ops[1]);
return inst(enc, .{
.op1 = .{ .reg = Register.rax.toBitSize(enc.data.ops[0].regBitSize()) },
.op2 = .{ .imm = imm },
});
},
.o, .oi => {
const reg_low_enc = @as(u3, @truncate(dis.code[dis.pos - 1]));
const op2: Instruction.Operand = if (enc.data.op_en == .oi) .{
.imm = try dis.parseImm(enc.data.ops[1]),
} else .none;
return inst(enc, .{
.op1 = .{ .reg = parseGpRegister(reg_low_enc, prefixes.rex.b, prefixes.rex, enc.data.ops[0].regBitSize()) },
.op2 = op2,
});
},
.m, .mi, .m1, .mc => {
const modrm = try dis.parseModRmByte();
const act_enc = Encoding.findByOpcode(enc.opcode(), .{
.legacy = prefixes.legacy,
.rex = prefixes.rex,
}, modrm.op1) orelse return error.UnknownOpcode;
const sib = if (modrm.sib()) try dis.parseSibByte() else null;
if (modrm.direct()) {
const op2: Instruction.Operand = switch (act_enc.data.op_en) {
.mi => .{ .imm = try dis.parseImm(act_enc.data.ops[1]) },
.m1 => .{ .imm = Immediate.u(1) },
.mc => .{ .reg = .cl },
.m => .none,
else => unreachable,
};
return inst(act_enc, .{
.op1 = .{ .reg = parseGpRegister(modrm.op2, prefixes.rex.b, prefixes.rex, act_enc.data.ops[0].regBitSize()) },
.op2 = op2,
});
}
const disp = try dis.parseDisplacement(modrm, sib);
const op2: Instruction.Operand = switch (act_enc.data.op_en) {
.mi => .{ .imm = try dis.parseImm(act_enc.data.ops[1]) },
.m1 => .{ .imm = Immediate.u(1) },
.mc => .{ .reg = .cl },
.m => .none,
else => unreachable,
};
if (modrm.rip()) {
return inst(act_enc, .{
.op1 = .{ .mem = Memory.rip(Memory.PtrSize.fromBitSize(act_enc.data.ops[0].memBitSize()), disp) },
.op2 = op2,
});
}
const scale_index = if (sib) |info| info.scaleIndex(prefixes.rex) else null;
const base = if (sib) |info|
info.baseReg(modrm, prefixes)
else
parseGpRegister(modrm.op2, prefixes.rex.b, prefixes.rex, 64);
return inst(act_enc, .{
.op1 = .{ .mem = Memory.sib(Memory.PtrSize.fromBitSize(act_enc.data.ops[0].memBitSize()), .{
.base = if (base) |base_reg| .{ .reg = base_reg } else .none,
.scale_index = scale_index,
.disp = disp,
}) },
.op2 = op2,
});
},
.fd => {
const seg = segmentRegister(prefixes.legacy);
const offset = try dis.parseOffset();
return inst(enc, .{
.op1 = .{ .reg = Register.rax.toBitSize(enc.data.ops[0].regBitSize()) },
.op2 = .{ .mem = Memory.moffs(seg, offset) },
});
},
.td => {
const seg = segmentRegister(prefixes.legacy);
const offset = try dis.parseOffset();
return inst(enc, .{
.op1 = .{ .mem = Memory.moffs(seg, offset) },
.op2 = .{ .reg = Register.rax.toBitSize(enc.data.ops[1].regBitSize()) },
});
},
.mr, .mri, .mrc => {
const modrm = try dis.parseModRmByte();
const sib = if (modrm.sib()) try dis.parseSibByte() else null;
const src_bit_size = enc.data.ops[1].regBitSize();
if (modrm.direct()) {
return inst(enc, .{
.op1 = .{ .reg = parseGpRegister(modrm.op2, prefixes.rex.b, prefixes.rex, enc.data.ops[0].regBitSize()) },
.op2 = .{ .reg = parseGpRegister(modrm.op1, prefixes.rex.x, prefixes.rex, src_bit_size) },
});
}
const dst_bit_size = enc.data.ops[0].memBitSize();
const disp = try dis.parseDisplacement(modrm, sib);
const op3: Instruction.Operand = switch (enc.data.op_en) {
.mri => .{ .imm = try dis.parseImm(enc.data.ops[2]) },
.mrc => .{ .reg = .cl },
.mr => .none,
else => unreachable,
};
if (modrm.rip()) {
return inst(enc, .{
.op1 = .{ .mem = Memory.rip(Memory.PtrSize.fromBitSize(dst_bit_size), disp) },
.op2 = .{ .reg = parseGpRegister(modrm.op1, prefixes.rex.r, prefixes.rex, src_bit_size) },
.op3 = op3,
});
}
const scale_index = if (sib) |info| info.scaleIndex(prefixes.rex) else null;
const base = if (sib) |info|
info.baseReg(modrm, prefixes)
else
parseGpRegister(modrm.op2, prefixes.rex.b, prefixes.rex, 64);
return inst(enc, .{
.op1 = .{ .mem = Memory.sib(Memory.PtrSize.fromBitSize(dst_bit_size), .{
.base = if (base) |base_reg| .{ .reg = base_reg } else .none,
.scale_index = scale_index,
.disp = disp,
}) },
.op2 = .{ .reg = parseGpRegister(modrm.op1, prefixes.rex.r, prefixes.rex, src_bit_size) },
.op3 = op3,
});
},
.rm, .rmi => {
const modrm = try dis.parseModRmByte();
const sib = if (modrm.sib()) try dis.parseSibByte() else null;
const dst_bit_size = enc.data.ops[0].regBitSize();
if (modrm.direct()) {
const op3: Instruction.Operand = switch (enc.data.op_en) {
.rm => .none,
.rmi => .{ .imm = try dis.parseImm(enc.data.ops[2]) },
else => unreachable,
};
return inst(enc, .{
.op1 = .{ .reg = parseGpRegister(modrm.op1, prefixes.rex.x, prefixes.rex, dst_bit_size) },
.op2 = .{ .reg = parseGpRegister(modrm.op2, prefixes.rex.b, prefixes.rex, enc.data.ops[1].regBitSize()) },
.op3 = op3,
});
}
const src_bit_size = if (enc.data.ops[1] == .m) dst_bit_size else enc.data.ops[1].memBitSize();
const disp = try dis.parseDisplacement(modrm, sib);
const op3: Instruction.Operand = switch (enc.data.op_en) {
.rmi => .{ .imm = try dis.parseImm(enc.data.ops[2]) },
.rm => .none,
else => unreachable,
};
if (modrm.rip()) {
return inst(enc, .{
.op1 = .{ .reg = parseGpRegister(modrm.op1, prefixes.rex.r, prefixes.rex, dst_bit_size) },
.op2 = .{ .mem = Memory.rip(Memory.PtrSize.fromBitSize(src_bit_size), disp) },
.op3 = op3,
});
}
const scale_index = if (sib) |info| info.scaleIndex(prefixes.rex) else null;
const base = if (sib) |info|
info.baseReg(modrm, prefixes)
else
parseGpRegister(modrm.op2, prefixes.rex.b, prefixes.rex, 64);
return inst(enc, .{
.op1 = .{ .reg = parseGpRegister(modrm.op1, prefixes.rex.r, prefixes.rex, dst_bit_size) },
.op2 = .{ .mem = Memory.sib(Memory.PtrSize.fromBitSize(src_bit_size), .{
.base = if (base) |base_reg| .{ .reg = base_reg } else .none,
.scale_index = scale_index,
.disp = disp,
}) },
.op3 = op3,
});
},
.rm0, .vmi, .rvm, .rvmr, .rvmi, .mvr => unreachable, // TODO
}
}
fn inst(encoding: Encoding, args: struct {
prefix: Instruction.Prefix = .none,
op1: Instruction.Operand = .none,
op2: Instruction.Operand = .none,
op3: Instruction.Operand = .none,
op4: Instruction.Operand = .none,
}) Instruction {
var i = Instruction{ .encoding = encoding, .prefix = args.prefix, .ops = .{
args.op1,
args.op2,
args.op3,
args.op4,
} };
return i;
}
const Prefixes = struct {
legacy: LegacyPrefixes = .{},
rex: Rex = .{},
// TODO add support for VEX prefix
};
fn parsePrefixes(dis: *Disassembler) !Prefixes {
const rex_prefix_mask: u4 = 0b0100;
var stream = std.io.fixedBufferStream(dis.code[dis.pos..]);
const reader = stream.reader();
var res: Prefixes = .{};
while (true) {
const next_byte = try reader.readByte();
dis.pos += 1;
switch (next_byte) {
0xf0, 0xf2, 0xf3, 0x2e, 0x36, 0x26, 0x64, 0x65, 0x3e, 0x66, 0x67 => {
// Legacy prefix
if (res.rex.present) return error.LegacyPrefixAfterRex;
switch (next_byte) {
0xf0 => res.legacy.prefix_f0 = true,
0xf2 => res.legacy.prefix_f2 = true,
0xf3 => res.legacy.prefix_f3 = true,
0x2e => res.legacy.prefix_2e = true,
0x36 => res.legacy.prefix_36 = true,
0x26 => res.legacy.prefix_26 = true,
0x64 => res.legacy.prefix_64 = true,
0x65 => res.legacy.prefix_65 = true,
0x3e => res.legacy.prefix_3e = true,
0x66 => res.legacy.prefix_66 = true,
0x67 => res.legacy.prefix_67 = true,
else => unreachable,
}
},
else => {
if (rex_prefix_mask == @as(u4, @truncate(next_byte >> 4))) {
// REX prefix
res.rex.w = next_byte & 0b1000 != 0;
res.rex.r = next_byte & 0b100 != 0;
res.rex.x = next_byte & 0b10 != 0;
res.rex.b = next_byte & 0b1 != 0;
res.rex.present = true;
continue;
}
// TODO VEX prefix
dis.pos -= 1;
break;
},
}
}
return res;
}
fn parseEncoding(dis: *Disassembler, prefixes: Prefixes) !?Encoding {
const o_mask: u8 = 0b1111_1000;
var opcode: [3]u8 = .{ 0, 0, 0 };
var stream = std.io.fixedBufferStream(dis.code[dis.pos..]);
const reader = stream.reader();
comptime var opc_count = 0;
inline while (opc_count < 3) : (opc_count += 1) {
const byte = try reader.readByte();
opcode[opc_count] = byte;
dis.pos += 1;
if (byte == 0x0f) {
// Multi-byte opcode
} else if (opc_count > 0) {
// Multi-byte opcode
if (Encoding.findByOpcode(opcode[0 .. opc_count + 1], .{
.legacy = prefixes.legacy,
.rex = prefixes.rex,
}, null)) |mnemonic| {
return mnemonic;
}
} else {
// Single-byte opcode
if (Encoding.findByOpcode(opcode[0..1], .{
.legacy = prefixes.legacy,
.rex = prefixes.rex,
}, null)) |mnemonic| {
return mnemonic;
} else {
// Try O* encoding
return Encoding.findByOpcode(&.{opcode[0] & o_mask}, .{
.legacy = prefixes.legacy,
.rex = prefixes.rex,
}, null);
}
}
}
return null;
}
fn parseGpRegister(low_enc: u3, is_extended: bool, rex: Rex, bit_size: u64) Register {
const reg_id: u4 = @as(u4, @intCast(@intFromBool(is_extended))) << 3 | low_enc;
const reg = @as(Register, @enumFromInt(reg_id)).toBitSize(bit_size);
return switch (reg) {
.spl => if (rex.present or rex.isSet()) .spl else .ah,
.dil => if (rex.present or rex.isSet()) .dil else .bh,
.bpl => if (rex.present or rex.isSet()) .bpl else .ch,
.sil => if (rex.present or rex.isSet()) .sil else .dh,
else => reg,
};
}
fn parseImm(dis: *Disassembler, kind: Encoding.Op) !Immediate {
var stream = std.io.fixedBufferStream(dis.code[dis.pos..]);
var creader = std.io.countingReader(stream.reader());
const reader = creader.reader();
const imm = switch (kind) {
.imm8s, .rel8 => Immediate.s(try reader.readInt(i8, .Little)),
.imm16s, .rel16 => Immediate.s(try reader.readInt(i16, .Little)),
.imm32s, .rel32 => Immediate.s(try reader.readInt(i32, .Little)),
.imm8 => Immediate.u(try reader.readInt(u8, .Little)),
.imm16 => Immediate.u(try reader.readInt(u16, .Little)),
.imm32 => Immediate.u(try reader.readInt(u32, .Little)),
.imm64 => Immediate.u(try reader.readInt(u64, .Little)),
else => unreachable,
};
dis.pos += creader.bytes_read;
return imm;
}
fn parseOffset(dis: *Disassembler) !u64 {
var stream = std.io.fixedBufferStream(dis.code[dis.pos..]);
const reader = stream.reader();
const offset = try reader.readInt(u64, .Little);
dis.pos += 8;
return offset;
}
const ModRm = packed struct {
mod: u2,
op1: u3,
op2: u3,
inline fn direct(self: ModRm) bool {
return self.mod == 0b11;
}
inline fn rip(self: ModRm) bool {
return self.mod == 0 and self.op2 == 0b101;
}
inline fn sib(self: ModRm) bool {
return !self.direct() and self.op2 == 0b100;
}
};
fn parseModRmByte(dis: *Disassembler) !ModRm {
if (dis.code[dis.pos..].len == 0) return error.EndOfStream;
const modrm_byte = dis.code[dis.pos];
dis.pos += 1;
const mod: u2 = @as(u2, @truncate(modrm_byte >> 6));
const op1: u3 = @as(u3, @truncate(modrm_byte >> 3));
const op2: u3 = @as(u3, @truncate(modrm_byte));
return ModRm{ .mod = mod, .op1 = op1, .op2 = op2 };
}
fn segmentRegister(prefixes: LegacyPrefixes) Register {
if (prefixes.prefix_2e) return .cs;
if (prefixes.prefix_36) return .ss;
if (prefixes.prefix_26) return .es;
if (prefixes.prefix_64) return .fs;
if (prefixes.prefix_65) return .gs;
return .ds;
}
const Sib = packed struct {
scale: u2,
index: u3,
base: u3,
fn scaleIndex(self: Sib, rex: Rex) ?Memory.ScaleIndex {
if (self.index == 0b100 and !rex.x) return null;
return .{
.scale = @as(u4, 1) << self.scale,
.index = parseGpRegister(self.index, rex.x, rex, 64),
};
}
fn baseReg(self: Sib, modrm: ModRm, prefixes: Prefixes) ?Register {
if (self.base == 0b101 and modrm.mod == 0) {
if (self.scaleIndex(prefixes.rex)) |_| return null;
return segmentRegister(prefixes.legacy);
}
return parseGpRegister(self.base, prefixes.rex.b, prefixes.rex, 64);
}
};
fn parseSibByte(dis: *Disassembler) !Sib {
if (dis.code[dis.pos..].len == 0) return error.EndOfStream;
const sib_byte = dis.code[dis.pos];
dis.pos += 1;
const scale: u2 = @as(u2, @truncate(sib_byte >> 6));
const index: u3 = @as(u3, @truncate(sib_byte >> 3));
const base: u3 = @as(u3, @truncate(sib_byte));
return Sib{ .scale = scale, .index = index, .base = base };
}
fn parseDisplacement(dis: *Disassembler, modrm: ModRm, sib: ?Sib) !i32 {
var stream = std.io.fixedBufferStream(dis.code[dis.pos..]);
var creader = std.io.countingReader(stream.reader());
const reader = creader.reader();
const disp = disp: {
if (sib) |info| {
if (info.base == 0b101 and modrm.mod == 0) {
break :disp try reader.readInt(i32, .Little);
}
}
if (modrm.rip()) {
break :disp try reader.readInt(i32, .Little);
}
break :disp switch (modrm.mod) {
0b00 => 0,
0b01 => try reader.readInt(i8, .Little),
0b10 => try reader.readInt(i32, .Little),
0b11 => unreachable,
};
};
dis.pos += creader.bytes_read;
return disp;
}