x86_64: reimplement inline memcpy and memset

This commit is contained in:
Jacob Young 2023-03-18 05:49:20 -04:00 committed by Jakub Konka
parent c865c8fb2a
commit edd63f9aba
7 changed files with 270 additions and 215 deletions

View File

@ -1286,7 +1286,7 @@ pub fn spillEflagsIfOccupied(self: *Self) !void {
}
}
pub fn spillRegisters(self: *Self, comptime count: comptime_int, registers: [count]Register) !void {
pub fn spillRegisters(self: *Self, registers: []const Register) !void {
for (registers) |reg| {
try self.register_manager.getReg(reg, null);
}
@ -1540,7 +1540,7 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void {
break :result try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs);
}
try self.spillRegisters(2, .{ .rax, .rdx });
try self.spillRegisters(&.{ .rax, .rdx });
const lhs = try self.resolveInst(bin_op.lhs);
const rhs = try self.resolveInst(bin_op.rhs);
@ -1594,7 +1594,7 @@ fn airAddSubShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
try self.spillEflagsIfOccupied();
if (tag == .shl_with_overflow) {
try self.spillRegisters(1, .{.rcx});
try self.spillRegisters(&.{.rcx});
}
const partial: MCValue = switch (tag) {
@ -1721,7 +1721,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
try self.spillEflagsIfOccupied();
self.eflags_inst = inst;
try self.spillRegisters(2, .{ .rax, .rdx });
try self.spillRegisters(&.{ .rax, .rdx });
const lhs = try self.resolveInst(bin_op.lhs);
const rhs = try self.resolveInst(bin_op.rhs);
@ -1774,7 +1774,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
break :dst_reg dst_reg;
},
.unsigned => {
try self.spillRegisters(2, .{ .rax, .rdx });
try self.spillRegisters(&.{ .rax, .rdx });
const lhs = try self.resolveInst(bin_op.lhs);
const rhs = try self.resolveInst(bin_op.rhs);
@ -1888,7 +1888,7 @@ fn airShlShrBinOp(self: *Self, inst: Air.Inst.Index) !void {
return self.finishAir(inst, .dead, .{ bin_op.lhs, bin_op.rhs, .none });
}
try self.spillRegisters(1, .{.rcx});
try self.spillRegisters(&.{.rcx});
const tag = self.air.instructions.items(.tag)[inst];
const lhs = try self.resolveInst(bin_op.lhs);
@ -2832,6 +2832,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
.unreach => unreachable,
.eflags => unreachable,
.undef => {
if (!self.wantSafety()) return; // The already existing value will do just fine.
switch (abi_size) {
1 => try self.store(ptr, .{ .immediate = 0xaa }, ptr_ty, value_ty),
2 => try self.store(ptr, .{ .immediate = 0xaaaa }, ptr_ty, value_ty),
@ -4035,11 +4036,40 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier
defer info.deinit(self);
try self.spillEflagsIfOccupied();
try self.spillRegisters(abi.getCallerPreservedRegs(self.target.*));
for (abi.getCallerPreservedRegs(self.target.*)) |reg| {
try self.register_manager.getReg(reg, null);
// set stack arguments first because this can clobber registers
// also clobber spill arguments as we go
if (info.return_value == .stack_offset) {
try self.spillRegisters(&.{abi.getCAbiIntParamRegs(self.target.*)[0]});
}
for (args, info.args) |arg, mc_arg| {
const arg_ty = self.air.typeOf(arg);
const arg_mcv = try self.resolveInst(arg);
// Here we do not use setRegOrMem even though the logic is similar, because
// the function call will move the stack pointer, so the offsets are different.
switch (mc_arg) {
.none => {},
.register => |reg| try self.spillRegisters(&.{reg}),
.stack_offset => |off| {
// TODO rewrite using `genSetStack`
try self.genSetStackArg(arg_ty, off, arg_mcv);
},
.ptr_stack_offset => {
return self.fail("TODO implement calling with MCValue.ptr_stack_offset arg", .{});
},
.undef => unreachable,
.immediate => unreachable,
.unreach => unreachable,
.dead => unreachable,
.memory => unreachable,
.linker_load => unreachable,
.eflags => unreachable,
.register_overflow => unreachable,
}
}
// now we are free to set register arguments
const ret_reg_lock: ?RegisterLock = blk: {
if (info.return_value == .stack_offset) {
const ret_ty = fn_ty.fnReturnType();
@ -4049,7 +4079,6 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier
log.debug("airCall: return value on stack at offset {}", .{stack_offset});
const ret_reg = abi.getCAbiIntParamRegs(self.target.*)[0];
try self.register_manager.getReg(ret_reg, null);
try self.genSetReg(Type.usize, ret_reg, .{ .ptr_stack_offset = stack_offset });
const ret_reg_lock = self.register_manager.lockRegAssumeUnused(ret_reg);
@ -4061,25 +4090,12 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier
};
defer if (ret_reg_lock) |lock| self.register_manager.unlockReg(lock);
for (args, info.args) |arg, info_arg| {
const mc_arg = info_arg;
for (args, info.args) |arg, mc_arg| {
const arg_ty = self.air.typeOf(arg);
const arg_mcv = try self.resolveInst(arg);
// Here we do not use setRegOrMem even though the logic is similar, because
// the function call will move the stack pointer, so the offsets are different.
switch (mc_arg) {
.none => continue,
.register => |reg| {
try self.register_manager.getReg(reg, null);
try self.genSetReg(arg_ty, reg, arg_mcv);
},
.stack_offset => |off| {
// TODO rewrite using `genSetStack`
try self.genSetStackArg(arg_ty, off, arg_mcv);
},
.ptr_stack_offset => {
return self.fail("TODO implement calling with MCValue.ptr_stack_offset arg", .{});
},
.none, .stack_offset, .ptr_stack_offset => {},
.register => |reg| try self.genSetReg(arg_ty, reg, arg_mcv),
.undef => unreachable,
.immediate => unreachable,
.unreach => unreachable,
@ -5277,6 +5293,7 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE
.dead => unreachable,
.unreach, .none => return,
.undef => {
if (!self.wantSafety()) return; // The already existing value will do just fine.
if (abi_size <= 8) {
const reg = try self.copyToTmpRegister(ty, mcv);
return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg });
@ -5384,8 +5401,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl
.dead => unreachable,
.unreach, .none => return, // Nothing to do.
.undef => {
if (!self.wantSafety())
return; // The already existing value will do just fine.
if (!self.wantSafety()) return; // The already existing value will do just fine.
// TODO Upgrade this to a memset call when we have that available.
switch (abi_size) {
1, 2, 4 => {
@ -5607,19 +5623,14 @@ fn genInlineMemcpy(
null;
defer if (dsbase_lock) |lock| self.register_manager.unlockReg(lock);
const regs = try self.register_manager.allocRegs(5, .{ null, null, null, null, null }, gp);
const dst_addr_reg = regs[0];
const src_addr_reg = regs[1];
const index_reg = regs[2].to64();
const count_reg = regs[3].to64();
const tmp_reg = regs[4].to8();
try self.spillRegisters(&.{ .rdi, .rsi, .rcx });
switch (dst_ptr) {
.memory, .linker_load => {
try self.loadMemPtrIntoRegister(dst_addr_reg, Type.usize, dst_ptr);
try self.loadMemPtrIntoRegister(.rdi, Type.usize, dst_ptr);
},
.ptr_stack_offset, .stack_offset => |off| {
try self.asmRegisterMemory(.lea, dst_addr_reg.to64(), Memory.sib(.qword, .{
try self.asmRegisterMemory(.lea, .rdi, Memory.sib(.qword, .{
.base = opts.dest_stack_base orelse .rbp,
.disp = -off,
}));
@ -5627,7 +5638,7 @@ fn genInlineMemcpy(
.register => |reg| {
try self.asmRegisterRegister(
.mov,
registerAlias(dst_addr_reg, @intCast(u32, @divExact(reg.bitSize(), 8))),
registerAlias(.rdi, @intCast(u32, @divExact(reg.bitSize(), 8))),
reg,
);
},
@ -5638,10 +5649,10 @@ fn genInlineMemcpy(
switch (src_ptr) {
.memory, .linker_load => {
try self.loadMemPtrIntoRegister(src_addr_reg, Type.usize, src_ptr);
try self.loadMemPtrIntoRegister(.rsi, Type.usize, src_ptr);
},
.ptr_stack_offset, .stack_offset => |off| {
try self.asmRegisterMemory(.lea, src_addr_reg.to64(), Memory.sib(.qword, .{
try self.asmRegisterMemory(.lea, .rsi, Memory.sib(.qword, .{
.base = opts.source_stack_base orelse .rbp,
.disp = -off,
}));
@ -5649,7 +5660,7 @@ fn genInlineMemcpy(
.register => |reg| {
try self.asmRegisterRegister(
.mov,
registerAlias(src_addr_reg, @intCast(u32, @divExact(reg.bitSize(), 8))),
registerAlias(.rsi, @intCast(u32, @divExact(reg.bitSize(), 8))),
reg,
);
},
@ -5658,37 +5669,12 @@ fn genInlineMemcpy(
},
}
try self.genSetReg(Type.usize, count_reg, len);
try self.asmRegisterImmediate(.mov, index_reg, Immediate.u(0));
const loop_start = try self.addInst(.{
.tag = .cmp,
.ops = .ri_u,
.data = .{ .ri = .{
.r1 = count_reg,
.imm = 0,
} },
try self.genSetReg(Type.usize, .rcx, len);
_ = try self.addInst(.{
.tag = .movs,
.ops = .string,
.data = .{ .string = .{ .repeat = .rep, .width = .b } },
});
const loop_reloc = try self.asmJccReloc(undefined, .e);
try self.asmRegisterMemory(.mov, tmp_reg.to8(), Memory.sib(.byte, .{
.base = src_addr_reg,
.scale_index = .{
.scale = 1,
.index = index_reg,
},
.disp = 0,
}));
try self.asmMemoryRegister(.mov, Memory.sib(.byte, .{
.base = dst_addr_reg,
.scale_index = .{
.scale = 1,
.index = index_reg,
},
.disp = 0,
}), tmp_reg.to8());
try self.asmRegisterImmediate(.add, index_reg, Immediate.u(1));
try self.asmRegisterImmediate(.sub, count_reg, Immediate.u(1));
_ = try self.asmJmpReloc(loop_start);
try self.performReloc(loop_reloc);
}
fn genInlineMemset(
@ -5698,28 +5684,20 @@ fn genInlineMemset(
len: MCValue,
opts: InlineMemcpyOpts,
) InnerError!void {
const ssbase_lock: ?RegisterLock = if (opts.source_stack_base) |reg|
self.register_manager.lockReg(reg)
else
null;
defer if (ssbase_lock) |reg| self.register_manager.unlockReg(reg);
const dsbase_lock: ?RegisterLock = if (opts.dest_stack_base) |reg|
self.register_manager.lockReg(reg)
else
null;
defer if (dsbase_lock) |lock| self.register_manager.unlockReg(lock);
const regs = try self.register_manager.allocRegs(2, .{ null, null }, gp);
const addr_reg = regs[0];
const index_reg = regs[1].to64();
try self.spillRegisters(&.{ .rdi, .al, .rcx });
switch (dst_ptr) {
.memory, .linker_load => {
try self.loadMemPtrIntoRegister(addr_reg, Type.usize, dst_ptr);
try self.loadMemPtrIntoRegister(.rdi, Type.usize, dst_ptr);
},
.ptr_stack_offset, .stack_offset => |off| {
try self.asmRegisterMemory(.lea, addr_reg.to64(), Memory.sib(.qword, .{
try self.asmRegisterMemory(.lea, .rdi, Memory.sib(.qword, .{
.base = opts.dest_stack_base orelse .rbp,
.disp = -off,
}));
@ -5727,48 +5705,22 @@ fn genInlineMemset(
.register => |reg| {
try self.asmRegisterRegister(
.mov,
registerAlias(addr_reg, @intCast(u32, @divExact(reg.bitSize(), 8))),
registerAlias(.rdi, @intCast(u32, @divExact(reg.bitSize(), 8))),
reg,
);
},
else => {
return self.fail("TODO implement memcpy for setting stack when dest is {}", .{dst_ptr});
return self.fail("TODO implement memset for setting stack when dest is {}", .{dst_ptr});
},
}
try self.genSetReg(Type.usize, index_reg, len);
try self.genBinOpMir(.sub, Type.usize, .{ .register = index_reg }, .{ .immediate = 1 });
const loop_start = try self.addInst(.{
.tag = .cmp,
.ops = .ri_s,
.data = .{ .ri = .{
.r1 = index_reg,
.imm = @bitCast(u32, @as(i32, -1)),
} },
try self.genSetReg(Type.u8, .al, value);
try self.genSetReg(Type.usize, .rcx, len);
_ = try self.addInst(.{
.tag = .stos,
.ops = .string,
.data = .{ .string = .{ .repeat = .rep, .width = .b } },
});
const loop_reloc = try self.asmJccReloc(undefined, .e);
switch (value) {
.immediate => |x| {
if (x > math.maxInt(i32)) {
return self.fail("TODO inline memset for value immediate larger than 32bits", .{});
}
try self.asmMemoryImmediate(.mov, Memory.sib(.byte, .{
.base = addr_reg,
.scale_index = .{
.scale = 1,
.index = index_reg,
},
.disp = 0,
}), Immediate.u(@intCast(u8, x)));
},
else => return self.fail("TODO inline memset for value of type {}", .{value}),
}
try self.asmRegisterImmediate(.sub, index_reg, Immediate.u(1));
_ = try self.asmJmpReloc(loop_start);
try self.performReloc(loop_reloc);
}
fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void {
@ -5788,8 +5740,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
},
.unreach, .none => return, // Nothing to do.
.undef => {
if (!self.wantSafety())
return; // The already existing value will do just fine.
if (!self.wantSafety()) return; // The already existing value will do just fine.
// Write the debug undefined value.
switch (registerAlias(reg, abi_size).bitSize()) {
8 => return self.genSetReg(ty, reg, .{ .immediate = 0xaa }),
@ -5802,27 +5753,27 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
.eflags => |cc| {
return self.asmSetccRegister(reg.to8(), cc);
},
.immediate => |x| {
if (x == 0) {
.immediate => |imm| {
if (imm == 0) {
// 32-bit moves zero-extend to 64-bit, so xoring the 32-bit
// register is the fastest way to zero a register.
return self.asmRegisterRegister(.xor, reg.to32(), reg.to32());
try self.asmRegisterRegister(.xor, reg.to32(), reg.to32());
} else if (abi_size > 4 and math.cast(u32, imm) != null) {
// 32-bit moves zero-extend to 64-bit.
try self.asmRegisterImmediate(.mov, reg.to32(), Immediate.u(imm));
} else if (abi_size <= 4 and @bitCast(i64, imm) < 0) {
try self.asmRegisterImmediate(
.mov,
registerAlias(reg, abi_size),
Immediate.s(@intCast(i32, @bitCast(i64, imm))),
);
} else {
try self.asmRegisterImmediate(
.mov,
registerAlias(reg, abi_size),
Immediate.u(imm),
);
}
if (ty.isSignedInt()) {
const signed_x = @bitCast(i64, x);
if (math.minInt(i32) <= signed_x and signed_x <= math.maxInt(i32)) {
return self.asmRegisterImmediate(
.mov,
registerAlias(reg, abi_size),
Immediate.s(@intCast(i32, signed_x)),
);
}
}
return self.asmRegisterImmediate(
.mov,
registerAlias(reg, abi_size),
Immediate.u(x),
);
},
.register => |src_reg| {
// If the registers are the same, nothing to do.
@ -6136,7 +6087,7 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void {
fn airAtomicRmw(self: *Self, inst: Air.Inst.Index) !void {
_ = inst;
return self.fail("TODO implement x86 airAtomicRaw", .{});
return self.fail("TODO implement x86 airAtomicRmw", .{});
}
fn airAtomicLoad(self: *Self, inst: Air.Inst.Index) !void {
@ -6177,7 +6128,7 @@ fn airMemset(self: *Self, inst: Air.Inst.Index) !void {
try self.genInlineMemset(dst_ptr, src_val, len, .{});
return self.finishAir(inst, .none, .{ pl_op.operand, .none, .none });
return self.finishAir(inst, .none, .{ pl_op.operand, extra.lhs, extra.rhs });
}
fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void {
@ -6229,7 +6180,7 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void {
try self.genInlineMemcpy(dst_ptr, src, len, .{});
return self.finishAir(inst, .none, .{ pl_op.operand, .none, .none });
return self.finishAir(inst, .none, .{ pl_op.operand, extra.lhs, extra.rhs });
}
fn airTagName(self: *Self, inst: Air.Inst.Index) !void {

View File

@ -130,6 +130,13 @@ pub fn lowerMir(emit: *Emit) InnerError!void {
.ucomisd,
=> try emit.mirEncodeGeneric(tag, inst),
.cmps,
.lods,
.movs,
.scas,
.stos,
=> try emit.mirString(tag, inst),
.jmp_reloc => try emit.mirJmpReloc(inst),
.call_extern => try emit.mirCallExtern(inst),
@ -183,18 +190,8 @@ fn fixupRelocs(emit: *Emit) InnerError!void {
}
}
fn encode(emit: *Emit, mnemonic: Instruction.Mnemonic, ops: struct {
op1: Instruction.Operand = .none,
op2: Instruction.Operand = .none,
op3: Instruction.Operand = .none,
op4: Instruction.Operand = .none,
}) InnerError!void {
const inst = try Instruction.new(mnemonic, .{
.op1 = ops.op1,
.op2 = ops.op2,
.op3 = ops.op3,
.op4 = ops.op4,
});
fn encode(emit: *Emit, mnemonic: Instruction.Mnemonic, ops: Instruction.Init) InnerError!void {
const inst = try Instruction.new(mnemonic, ops);
return inst.encode(emit.code.writer());
}
@ -318,6 +315,28 @@ fn mirEncodeGeneric(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerE
});
}
fn mirString(emit: *Emit, tag: Mir.Inst.Tag, inst: Mir.Inst.Index) InnerError!void {
const ops = emit.mir.instructions.items(.ops)[inst];
switch (ops) {
.string => {
const data = emit.mir.instructions.items(.data)[inst].string;
const mnemonic = switch (tag) {
inline .cmps, .lods, .movs, .scas, .stos => |comptime_tag| switch (data.width) {
inline else => |comptime_width| @field(
Instruction.Mnemonic,
@tagName(comptime_tag) ++ @tagName(comptime_width),
),
},
else => unreachable,
};
return emit.encode(mnemonic, .{ .prefix = switch (data.repeat) {
inline else => |comptime_repeat| @field(Instruction.Prefix, @tagName(comptime_repeat)),
} });
},
else => unreachable,
}
}
fn mirMovMoffs(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
const ops = emit.mir.instructions.items(.ops)[inst];
const payload = emit.mir.instructions.items(.data)[inst].payload;
@ -377,10 +396,9 @@ fn mirMovsx(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
}
fn mnemonicFromConditionCode(comptime basename: []const u8, cc: bits.Condition) Instruction.Mnemonic {
inline for (@typeInfo(bits.Condition).Enum.fields) |field| {
if (mem.eql(u8, field.name, @tagName(cc)))
return @field(Instruction.Mnemonic, basename ++ field.name);
} else unreachable;
return switch (cc) {
inline else => |comptime_cc| @field(Instruction.Mnemonic, basename ++ @tagName(comptime_cc)),
};
}
fn mirCmovcc(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {

View File

@ -24,12 +24,7 @@ opc: [7]u8,
modrm_ext: u3,
mode: Mode,
pub fn findByMnemonic(mnemonic: Mnemonic, args: struct {
op1: Instruction.Operand,
op2: Instruction.Operand,
op3: Instruction.Operand,
op4: Instruction.Operand,
}) !?Encoding {
pub fn findByMnemonic(mnemonic: Mnemonic, args: Instruction.Init) !?Encoding {
const input_op1 = Op.fromOperand(args.op1);
const input_op2 = Op.fromOperand(args.op2);
const input_op3 = Op.fromOperand(args.op3);
@ -109,17 +104,13 @@ pub fn findByMnemonic(mnemonic: Mnemonic, args: struct {
if (count == 1) return candidates[0];
const EncodingLength = struct {
fn estimate(encoding: Encoding, params: struct {
op1: Instruction.Operand,
op2: Instruction.Operand,
op3: Instruction.Operand,
op4: Instruction.Operand,
}) usize {
fn estimate(encoding: Encoding, params: Instruction.Init) usize {
var inst = Instruction{
.op1 = params.op1,
.op2 = params.op2,
.op3 = params.op3,
.op4 = params.op4,
.prefix = params.prefix,
.encoding = encoding,
};
var cwriter = std.io.countingWriter(std.io.null_writer);
@ -140,12 +131,7 @@ pub fn findByMnemonic(mnemonic: Mnemonic, args: struct {
else => {},
}
const len = EncodingLength.estimate(candidate, .{
.op1 = args.op1,
.op2 = args.op2,
.op3 = args.op3,
.op4 = args.op4,
});
const len = EncodingLength.estimate(candidate, args);
const current = shortest_encoding orelse {
shortest_encoding = .{ .index = i, .len = len };
continue;
@ -228,7 +214,11 @@ pub fn modRmExt(encoding: Encoding) u3 {
}
pub fn operandBitSize(encoding: Encoding) u64 {
if (encoding.mode == .long) return 64;
switch (encoding.mode) {
.short => return 16,
.long => return 64,
else => {},
}
const bit_size: u64 = switch (encoding.op_en) {
.np => switch (encoding.op1) {
.o16 => 16,
@ -317,10 +307,13 @@ pub const Mnemonic = enum {
// zig fmt: off
// General-purpose
adc, add, @"and",
call, cbw, cwde, cdqe, cwd, cdq, cqo, cmp,
call, cbw, cdq, cdqe,
cmova, cmovae, cmovb, cmovbe, cmovc, cmove, cmovg, cmovge, cmovl, cmovle, cmovna,
cmovnae, cmovnb, cmovnbe, cmovnc, cmovne, cmovng, cmovnge, cmovnl, cmovnle, cmovno,
cmovnp, cmovns, cmovnz, cmovo, cmovp, cmovpe, cmovpo, cmovs, cmovz,
cmp,
cmps, cmpsb, cmpsd, cmpsq, cmpsw,
cqo, cwd, cwde,
div,
fisttp, fld,
idiv, imul, int3,
@ -328,15 +321,21 @@ pub const Mnemonic = enum {
jnc, jne, jng, jnge, jnl, jnle, jno, jnp, jns, jnz, jo, jp, jpe, jpo, js, jz,
jmp,
lea,
mov, movsx, movsxd, movzx, mul,
lods, lodsb, lodsd, lodsq, lodsw,
mov,
movs, movsb, movsd, movsq, movsw,
movsx, movsxd, movzx, mul,
nop,
@"or",
pop, push,
ret,
sal, sar, sbb, shl, shr, sub, syscall,
sal, sar, sbb,
scas, scasb, scasd, scasq, scasw,
shl, shr, sub, syscall,
seta, setae, setb, setbe, setc, sete, setg, setge, setl, setle, setna, setnae,
setnb, setnbe, setnc, setne, setng, setnge, setnl, setnle, setno, setnp, setns,
setnz, seto, setp, setpe, setpo, sets, setz,
stos, stosb, stosd, stosq, stosw,
@"test",
ud2,
xor,
@ -351,10 +350,10 @@ pub const Mnemonic = enum {
ucomiss,
// SSE2
addsd,
cmpsd,
//cmpsd,
divsd,
maxsd, minsd,
movq, movsd,
movq, //movsd,
mulsd,
subsd,
ucomisd,
@ -591,6 +590,7 @@ pub const Op = enum {
pub const Mode = enum {
none,
short,
fpu,
rex,
long,

View File

@ -150,6 +150,17 @@ pub const Inst = struct {
/// Unordered compare scalar double-precision floating-point values
ucomisd,
/// Compare string operands
cmps,
/// Load string
lods,
/// Move data from string to string
movs,
/// Scan string
scas,
/// Store string
stos,
/// Conditional move
cmovcc,
/// Conditional jump
@ -268,6 +279,30 @@ pub const Inst = struct {
/// Memory (RIP), register operands.
/// Uses `rx` payload with extra data of type `MemoryRip`.
mr_rip,
/// Single memory (SIB) operand with lock prefix.
/// Uses `payload` with extra data of type `MemorySib`.
lock_m_sib,
/// Single memory (RIP) operand with lock prefix.
/// Uses `payload` with extra data of type `MemoryRip`.
lock_m_rip,
/// Memory (SIB), immediate (unsigned) operands with lock prefix.
/// Uses `xi` payload with extra data of type `MemorySib`.
lock_mi_u_sib,
/// Memory (RIP), immediate (unsigned) operands with lock prefix.
/// Uses `xi` payload with extra data of type `MemoryRip`.
lock_mi_u_rip,
/// Memory (SIB), immediate (sign-extend) operands with lock prefix.
/// Uses `xi` payload with extra data of type `MemorySib`.
lock_mi_s_sib,
/// Memory (RIP), immediate (sign-extend) operands with lock prefix.
/// Uses `xi` payload with extra data of type `MemoryRip`.
lock_mi_s_rip,
/// Memory (SIB), register operands with lock prefix.
/// Uses `rx` payload with extra data of type `MemorySib`.
lock_mr_sib,
/// Memory (RIP), register operands with lock prefix.
/// Uses `rx` payload with extra data of type `MemoryRip`.
lock_mr_rip,
/// Rax, Memory moffs.
/// Uses `payload` with extra data of type `MemoryMoffs`.
rax_moffs,
@ -280,6 +315,9 @@ pub const Inst = struct {
/// References another Mir instruction directly with condition code (CC).
/// Uses `inst_cc` payload.
inst_cc,
/// String repeat and width
/// Uses `string` payload.
string,
/// Uses `reloc` payload.
reloc,
/// Linker relocation - GOT indirection.
@ -353,6 +391,11 @@ pub const Inst = struct {
payload: u32,
imm: u32,
},
/// String instruction prefix and width.
string: struct {
repeat: bits.StringRepeat,
width: bits.StringWidth,
},
/// Relocation for the linker where:
/// * `atom_index` is the index of the source
/// * `sym_index` is the index of the target

View File

@ -6,6 +6,9 @@ const Allocator = std.mem.Allocator;
const ArrayList = std.ArrayList;
const DW = std.dwarf;
pub const StringRepeat = enum(u3) { none, rep, repe, repz, repne, repnz };
pub const StringWidth = enum(u2) { b, w, d, q };
/// EFLAGS condition codes
pub const Condition = enum(u5) {
/// above

View File

@ -15,10 +15,21 @@ pub const Instruction = struct {
op2: Operand = .none,
op3: Operand = .none,
op4: Operand = .none,
prefix: Prefix = .none,
encoding: Encoding,
pub const Mnemonic = Encoding.Mnemonic;
pub const Prefix = enum(u3) {
none,
lock,
rep,
repe,
repz,
repne,
repnz,
};
pub const Operand = union(enum) {
none,
reg: Register,
@ -96,18 +107,16 @@ pub const Instruction = struct {
}
};
pub fn new(mnemonic: Mnemonic, args: struct {
pub const Init = struct {
prefix: Prefix = .none,
op1: Operand = .none,
op2: Operand = .none,
op3: Operand = .none,
op4: Operand = .none,
}) !Instruction {
const encoding = (try Encoding.findByMnemonic(mnemonic, .{
.op1 = args.op1,
.op2 = args.op2,
.op3 = args.op3,
.op4 = args.op4,
})) orelse {
};
pub fn new(mnemonic: Mnemonic, args: Init) !Instruction {
const encoding = (try Encoding.findByMnemonic(mnemonic, args)) orelse {
log.debug("no encoding found for: {s} {s} {s} {s} {s}", .{
@tagName(mnemonic),
@tagName(Encoding.Op.fromOperand(args.op1)),
@ -119,6 +128,7 @@ pub const Instruction = struct {
};
log.debug("selected encoding: {}", .{encoding});
return .{
.prefix = args.prefix,
.op1 = args.op1,
.op2 = args.op2,
.op3 = args.op3,
@ -128,6 +138,7 @@ pub const Instruction = struct {
}
pub fn fmtPrint(inst: Instruction, writer: anytype) !void {
if (inst.prefix != .none) try writer.print("{s} ", .{@tagName(inst.prefix)});
try writer.print("{s}", .{@tagName(inst.encoding.mnemonic)});
const ops = [_]struct { Operand, Encoding.Op }{
.{ inst.op1, inst.encoding.op1 },
@ -215,6 +226,14 @@ pub const Instruction = struct {
const op_en = enc.op_en;
var legacy = LegacyPrefixes{};
switch (inst.prefix) {
.none => {},
.lock => legacy.prefix_f0 = true,
.repne, .repnz => legacy.prefix_f2 = true,
.rep, .repe, .repz => legacy.prefix_f3 = true,
}
if (enc.mode == .none) {
const bit_size = enc.operandBitSize();
if (bit_size == 16) {
@ -811,15 +830,11 @@ const TestEncode = struct {
buffer: [32]u8 = undefined,
index: usize = 0,
fn encode(enc: *TestEncode, mnemonic: Instruction.Mnemonic, args: struct {
op1: Instruction.Operand = .none,
op2: Instruction.Operand = .none,
op3: Instruction.Operand = .none,
op4: Instruction.Operand = .none,
}) !void {
fn encode(enc: *TestEncode, mnemonic: Instruction.Mnemonic, args: Instruction.Init) !void {
var stream = std.io.fixedBufferStream(&enc.buffer);
var count_writer = std.io.countingWriter(stream.writer());
const inst = try Instruction.new(mnemonic, .{
.prefix = args.prefix,
.op1 = args.op1,
.op2 = args.op2,
.op3 = args.op3,
@ -1447,18 +1462,8 @@ test "lower NP encoding" {
try expectEqualHexStrings("\x0f\x05", enc.code(), "syscall");
}
fn invalidInstruction(mnemonic: Instruction.Mnemonic, args: struct {
op1: Instruction.Operand = .none,
op2: Instruction.Operand = .none,
op3: Instruction.Operand = .none,
op4: Instruction.Operand = .none,
}) !void {
const err = Instruction.new(mnemonic, .{
.op1 = args.op1,
.op2 = args.op2,
.op3 = args.op3,
.op4 = args.op4,
});
fn invalidInstruction(mnemonic: Instruction.Mnemonic, args: Instruction.Init) !void {
const err = Instruction.new(mnemonic, args);
try testing.expectError(error.InvalidInstruction, err);
}
@ -1479,18 +1484,8 @@ test "invalid instruction" {
try invalidInstruction(.push, .{ .op1 = .{ .imm = Immediate.u(0x1000000000000000) } });
}
fn cannotEncode(mnemonic: Instruction.Mnemonic, args: struct {
op1: Instruction.Operand = .none,
op2: Instruction.Operand = .none,
op3: Instruction.Operand = .none,
op4: Instruction.Operand = .none,
}) !void {
try testing.expectError(error.CannotEncode, Instruction.new(mnemonic, .{
.op1 = args.op1,
.op2 = args.op2,
.op3 = args.op3,
.op4 = args.op4,
}));
fn cannotEncode(mnemonic: Instruction.Mnemonic, args: Instruction.Init) !void {
try testing.expectError(error.CannotEncode, Instruction.new(mnemonic, args));
}
test "cannot encode" {

View File

@ -207,6 +207,15 @@ pub const table = &[_]Entry{
.{ .cmp, .rm, .r32, .rm32, .none, .none, &.{ 0x3b }, 0, .none },
.{ .cmp, .rm, .r64, .rm64, .none, .none, &.{ 0x3b }, 0, .long },
.{ .cmps, .np, .m8, .m8, .none, .none, &.{ 0xa6 }, 0, .none },
.{ .cmps, .np, .m16, .m16, .none, .none, &.{ 0xa7 }, 0, .none },
.{ .cmps, .np, .m32, .m32, .none, .none, &.{ 0xa7 }, 0, .none },
.{ .cmps, .np, .m64, .m64, .none, .none, &.{ 0xa7 }, 0, .long },
.{ .cmpsb, .np, .none, .none, .none, .none, &.{ 0xa6 }, 0, .none },
.{ .cmpsw, .np, .none, .none, .none, .none, &.{ 0xa7 }, 0, .short },
.{ .cmpsd, .np, .none, .none, .none, .none, &.{ 0xa7 }, 0, .none },
.{ .cmpsq, .np, .none, .none, .none, .none, &.{ 0xa7 }, 0, .long },
.{ .div, .m, .rm8, .none, .none, .none, &.{ 0xf6 }, 6, .none },
.{ .div, .m, .rm8, .none, .none, .none, &.{ 0xf6 }, 6, .rex },
.{ .div, .m, .rm16, .none, .none, .none, &.{ 0xf7 }, 6, .none },
@ -283,6 +292,15 @@ pub const table = &[_]Entry{
.{ .lea, .rm, .r32, .m, .none, .none, &.{ 0x8d }, 0, .none },
.{ .lea, .rm, .r64, .m, .none, .none, &.{ 0x8d }, 0, .long },
.{ .lods, .np, .m8, .none, .none, .none, &.{ 0xac }, 0, .none },
.{ .lods, .np, .m16, .none, .none, .none, &.{ 0xad }, 0, .none },
.{ .lods, .np, .m32, .none, .none, .none, &.{ 0xad }, 0, .none },
.{ .lods, .np, .m64, .none, .none, .none, &.{ 0xad }, 0, .long },
.{ .lodsb, .np, .none, .none, .none, .none, &.{ 0xac }, 0, .none },
.{ .lodsw, .np, .none, .none, .none, .none, &.{ 0xad }, 0, .short },
.{ .lodsd, .np, .none, .none, .none, .none, &.{ 0xad }, 0, .none },
.{ .lodsq, .np, .none, .none, .none, .none, &.{ 0xad }, 0, .long },
.{ .mov, .mr, .rm8, .r8, .none, .none, &.{ 0x88 }, 0, .none },
.{ .mov, .mr, .rm8, .r8, .none, .none, &.{ 0x88 }, 0, .rex },
.{ .mov, .mr, .rm16, .r16, .none, .none, &.{ 0x89 }, 0, .none },
@ -316,6 +334,15 @@ pub const table = &[_]Entry{
.{ .mov, .mi, .rm32, .imm32, .none, .none, &.{ 0xc7 }, 0, .none },
.{ .mov, .mi, .rm64, .imm32s, .none, .none, &.{ 0xc7 }, 0, .long },
.{ .movs, .np, .m8, .m8, .none, .none, &.{ 0xa4 }, 0, .none },
.{ .movs, .np, .m16, .m16, .none, .none, &.{ 0xa5 }, 0, .none },
.{ .movs, .np, .m32, .m32, .none, .none, &.{ 0xa5 }, 0, .none },
.{ .movs, .np, .m64, .m64, .none, .none, &.{ 0xa5 }, 0, .long },
.{ .movsb, .np, .none, .none, .none, .none, &.{ 0xa4 }, 0, .none },
.{ .movsw, .np, .none, .none, .none, .none, &.{ 0xa5 }, 0, .short },
.{ .movsd, .np, .none, .none, .none, .none, &.{ 0xa5 }, 0, .none },
.{ .movsq, .np, .none, .none, .none, .none, &.{ 0xa5 }, 0, .long },
.{ .movsx, .rm, .r16, .rm8, .none, .none, &.{ 0x0f, 0xbe }, 0, .none },
.{ .movsx, .rm, .r16, .rm8, .none, .none, &.{ 0x0f, 0xbe }, 0, .rex },
.{ .movsx, .rm, .r32, .rm8, .none, .none, &.{ 0x0f, 0xbe }, 0, .none },
@ -435,6 +462,15 @@ pub const table = &[_]Entry{
.{ .sbb, .rm, .r32, .rm32, .none, .none, &.{ 0x1b }, 0, .none },
.{ .sbb, .rm, .r64, .rm64, .none, .none, &.{ 0x1b }, 0, .long },
.{ .scas, .np, .m8, .none, .none, .none, &.{ 0xae }, 0, .none },
.{ .scas, .np, .m16, .none, .none, .none, &.{ 0xaf }, 0, .none },
.{ .scas, .np, .m32, .none, .none, .none, &.{ 0xaf }, 0, .none },
.{ .scas, .np, .m64, .none, .none, .none, &.{ 0xaf }, 0, .long },
.{ .scasb, .np, .none, .none, .none, .none, &.{ 0xae }, 0, .none },
.{ .scasw, .np, .none, .none, .none, .none, &.{ 0xaf }, 0, .short },
.{ .scasd, .np, .none, .none, .none, .none, &.{ 0xaf }, 0, .none },
.{ .scasq, .np, .none, .none, .none, .none, &.{ 0xaf }, 0, .long },
.{ .seta, .m, .rm8, .none, .none, .none, &.{ 0x0f, 0x97 }, 0, .none },
.{ .seta, .m, .rm8, .none, .none, .none, &.{ 0x0f, 0x97 }, 0, .rex },
.{ .setae, .m, .rm8, .none, .none, .none, &.{ 0x0f, 0x93 }, 0, .none },
@ -528,6 +564,15 @@ pub const table = &[_]Entry{
.{ .shr, .mi, .rm32, .imm8, .none, .none, &.{ 0xc1 }, 5, .none },
.{ .shr, .mi, .rm64, .imm8, .none, .none, &.{ 0xc1 }, 5, .long },
.{ .stos, .np, .m8, .none, .none, .none, &.{ 0xaa }, 0, .none },
.{ .stos, .np, .m16, .none, .none, .none, &.{ 0xab }, 0, .none },
.{ .stos, .np, .m32, .none, .none, .none, &.{ 0xab }, 0, .none },
.{ .stos, .np, .m64, .none, .none, .none, &.{ 0xab }, 0, .long },
.{ .stosb, .np, .none, .none, .none, .none, &.{ 0xaa }, 0, .none },
.{ .stosw, .np, .none, .none, .none, .none, &.{ 0xab }, 0, .short },
.{ .stosd, .np, .none, .none, .none, .none, &.{ 0xab }, 0, .none },
.{ .stosq, .np, .none, .none, .none, .none, &.{ 0xab }, 0, .long },
.{ .sub, .zi, .al, .imm8, .none, .none, &.{ 0x2c }, 0, .none },
.{ .sub, .zi, .ax, .imm16, .none, .none, &.{ 0x2d }, 0, .none },
.{ .sub, .zi, .eax, .imm32, .none, .none, &.{ 0x2d }, 0, .none },