mirror of
https://github.com/ziglang/zig.git
synced 2026-01-05 21:13:24 +00:00
Merge pull request #8474 from gracefuu/grace/encode-instruction
stage2 x86_64: encoding helpers, fix bugs
This commit is contained in:
commit
b88d381dec
@ -4330,6 +4330,33 @@ pub fn intSub(allocator: *Allocator, lhs: Value, rhs: Value) !Value {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn intMul(allocator: *Allocator, lhs: Value, rhs: Value) !Value {
|
||||
// TODO is this a performance issue? maybe we should try the operation without
|
||||
// resorting to BigInt first.
|
||||
var lhs_space: Value.BigIntSpace = undefined;
|
||||
var rhs_space: Value.BigIntSpace = undefined;
|
||||
const lhs_bigint = lhs.toBigInt(&lhs_space);
|
||||
const rhs_bigint = rhs.toBigInt(&rhs_space);
|
||||
const limbs = try allocator.alloc(
|
||||
std.math.big.Limb,
|
||||
lhs_bigint.limbs.len + rhs_bigint.limbs.len + 1,
|
||||
);
|
||||
var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined };
|
||||
var limbs_buffer = try allocator.alloc(
|
||||
std.math.big.Limb,
|
||||
std.math.big.int.calcMulLimbsBufferLen(lhs_bigint.limbs.len, rhs_bigint.limbs.len, 1),
|
||||
);
|
||||
defer allocator.free(limbs_buffer);
|
||||
result_bigint.mul(lhs_bigint, rhs_bigint, limbs_buffer, allocator);
|
||||
const result_limbs = result_bigint.limbs[0..result_bigint.len];
|
||||
|
||||
if (result_bigint.positive) {
|
||||
return Value.Tag.int_big_positive.create(allocator, result_limbs);
|
||||
} else {
|
||||
return Value.Tag.int_big_negative.create(allocator, result_limbs);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn floatAdd(
|
||||
arena: *Allocator,
|
||||
float_type: Type,
|
||||
@ -4396,6 +4423,39 @@ pub fn floatSub(
|
||||
}
|
||||
}
|
||||
|
||||
pub fn floatMul(
|
||||
arena: *Allocator,
|
||||
float_type: Type,
|
||||
src: LazySrcLoc,
|
||||
lhs: Value,
|
||||
rhs: Value,
|
||||
) !Value {
|
||||
switch (float_type.tag()) {
|
||||
.f16 => {
|
||||
@panic("TODO add __trunctfhf2 to compiler-rt");
|
||||
//const lhs_val = lhs.toFloat(f16);
|
||||
//const rhs_val = rhs.toFloat(f16);
|
||||
//return Value.Tag.float_16.create(arena, lhs_val * rhs_val);
|
||||
},
|
||||
.f32 => {
|
||||
const lhs_val = lhs.toFloat(f32);
|
||||
const rhs_val = rhs.toFloat(f32);
|
||||
return Value.Tag.float_32.create(arena, lhs_val * rhs_val);
|
||||
},
|
||||
.f64 => {
|
||||
const lhs_val = lhs.toFloat(f64);
|
||||
const rhs_val = rhs.toFloat(f64);
|
||||
return Value.Tag.float_64.create(arena, lhs_val * rhs_val);
|
||||
},
|
||||
.f128, .comptime_float, .c_longdouble => {
|
||||
const lhs_val = lhs.toFloat(f128);
|
||||
const rhs_val = rhs.toFloat(f128);
|
||||
return Value.Tag.float_128.create(arena, lhs_val * rhs_val);
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn simplePtrType(
|
||||
mod: *Module,
|
||||
arena: *Allocator,
|
||||
|
||||
20
src/Sema.zig
20
src/Sema.zig
@ -3864,10 +3864,15 @@ fn analyzeArithmetic(
|
||||
// incase rhs is 0, simply return lhs without doing any calculations
|
||||
// TODO Once division is implemented we should throw an error when dividing by 0.
|
||||
if (rhs_val.compareWithZero(.eq)) {
|
||||
return sema.mod.constInst(sema.arena, src, .{
|
||||
.ty = scalar_type,
|
||||
.val = lhs_val,
|
||||
});
|
||||
switch (zir_tag) {
|
||||
.add, .addwrap, .sub, .subwrap => {
|
||||
return sema.mod.constInst(sema.arena, src, .{
|
||||
.ty = scalar_type,
|
||||
.val = lhs_val,
|
||||
});
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
|
||||
const value = switch (zir_tag) {
|
||||
@ -3885,6 +3890,13 @@ fn analyzeArithmetic(
|
||||
try Module.floatSub(sema.arena, scalar_type, src, lhs_val, rhs_val);
|
||||
break :blk val;
|
||||
},
|
||||
.mul => blk: {
|
||||
const val = if (is_int)
|
||||
try Module.intMul(sema.arena, lhs_val, rhs_val)
|
||||
else
|
||||
try Module.floatMul(sema.arena, scalar_type, src, lhs_val, rhs_val);
|
||||
break :blk val;
|
||||
},
|
||||
else => return sema.mod.fail(&block.base, src, "TODO Implement arithmetic operand '{s}'", .{@tagName(zir_tag)}),
|
||||
};
|
||||
|
||||
|
||||
727
src/codegen.zig
727
src/codegen.zig
@ -20,6 +20,8 @@ const build_options = @import("build_options");
|
||||
const LazySrcLoc = Module.LazySrcLoc;
|
||||
const RegisterManager = @import("register_manager.zig").RegisterManager;
|
||||
|
||||
const X8664Encoder = @import("codegen/x86_64.zig").Encoder;
|
||||
|
||||
/// The codegen-related data that is stored in `ir.Inst.Block` instructions.
|
||||
pub const BlockData = struct {
|
||||
relocs: std.ArrayListUnmanaged(Reloc) = undefined,
|
||||
@ -1038,7 +1040,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
},
|
||||
.val = Value.initTag(.bool_true),
|
||||
};
|
||||
return try self.genX8664BinMath(&inst.base, inst.operand, &imm.base, 6, 0x30);
|
||||
return try self.genX8664BinMath(&inst.base, inst.operand, &imm.base);
|
||||
},
|
||||
.arm, .armeb => {
|
||||
var imm = ir.Inst.Constant{
|
||||
@ -1062,7 +1064,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
return MCValue.dead;
|
||||
switch (arch) {
|
||||
.x86_64 => {
|
||||
return try self.genX8664BinMath(&inst.base, inst.lhs, inst.rhs, 0, 0x00);
|
||||
return try self.genX8664BinMath(&inst.base, inst.lhs, inst.rhs);
|
||||
},
|
||||
.arm, .armeb => return try self.genArmBinOp(&inst.base, inst.lhs, inst.rhs, .add),
|
||||
else => return self.fail(inst.base.src, "TODO implement add for {}", .{self.target.cpu.arch}),
|
||||
@ -1083,6 +1085,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
if (inst.base.isUnused())
|
||||
return MCValue.dead;
|
||||
switch (arch) {
|
||||
.x86_64 => return try self.genX8664BinMath(&inst.base, inst.lhs, inst.rhs),
|
||||
.arm, .armeb => return try self.genArmMul(&inst.base, inst.lhs, inst.rhs),
|
||||
else => return self.fail(inst.base.src, "TODO implement mul for {}", .{self.target.cpu.arch}),
|
||||
}
|
||||
@ -1361,7 +1364,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
return MCValue.dead;
|
||||
switch (arch) {
|
||||
.x86_64 => {
|
||||
return try self.genX8664BinMath(&inst.base, inst.lhs, inst.rhs, 5, 0x28);
|
||||
return try self.genX8664BinMath(&inst.base, inst.lhs, inst.rhs);
|
||||
},
|
||||
.arm, .armeb => return try self.genArmBinOp(&inst.base, inst.lhs, inst.rhs, .sub),
|
||||
else => return self.fail(inst.base.src, "TODO implement sub for {}", .{self.target.cpu.arch}),
|
||||
@ -1506,8 +1509,20 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
return dst_mcv;
|
||||
}
|
||||
|
||||
/// Perform "binary" operators, excluding comparisons.
|
||||
/// Currently, the following ops are supported:
|
||||
/// ADD, SUB, XOR, OR, AND
|
||||
fn genX8664BinMath(self: *Self, inst: *ir.Inst, op_lhs: *ir.Inst, op_rhs: *ir.Inst, opx: u8, mr: u8) !MCValue {
|
||||
fn genX8664BinMath(self: *Self, inst: *ir.Inst, op_lhs: *ir.Inst, op_rhs: *ir.Inst) !MCValue {
|
||||
// We'll handle these ops in two steps.
|
||||
// 1) Prepare an output location (register or memory)
|
||||
// This location will be the location of the operand that dies (if one exists)
|
||||
// or just a temporary register (if one doesn't exist)
|
||||
// 2) Perform the op with the other argument
|
||||
// 3) Sometimes, the output location is memory but the op doesn't support it.
|
||||
// In this case, copy that location to a register, then perform the op to that register instead.
|
||||
//
|
||||
// TODO: make this algorithm less bad
|
||||
|
||||
try self.code.ensureCapacity(self.code.items.len + 8);
|
||||
|
||||
const lhs = try self.resolveInst(op_lhs);
|
||||
@ -1568,18 +1583,109 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
else => {},
|
||||
}
|
||||
|
||||
try self.genX8664BinMathCode(inst.src, inst.ty, dst_mcv, src_mcv, opx, mr);
|
||||
// Now for step 2, we perform the actual op
|
||||
switch (inst.tag) {
|
||||
// TODO: Generate wrapping and non-wrapping versions separately
|
||||
.add, .addwrap => try self.genX8664BinMathCode(inst.src, inst.ty, dst_mcv, src_mcv, 0, 0x00),
|
||||
.bool_or, .bit_or => try self.genX8664BinMathCode(inst.src, inst.ty, dst_mcv, src_mcv, 1, 0x08),
|
||||
.bool_and, .bit_and => try self.genX8664BinMathCode(inst.src, inst.ty, dst_mcv, src_mcv, 4, 0x20),
|
||||
.sub, .subwrap => try self.genX8664BinMathCode(inst.src, inst.ty, dst_mcv, src_mcv, 5, 0x28),
|
||||
.xor, .not => try self.genX8664BinMathCode(inst.src, inst.ty, dst_mcv, src_mcv, 6, 0x30),
|
||||
|
||||
.mul, .mulwrap => try self.genX8664Imul(inst.src, inst.ty, dst_mcv, src_mcv),
|
||||
else => unreachable,
|
||||
}
|
||||
|
||||
return dst_mcv;
|
||||
}
|
||||
|
||||
/// Wrap over Instruction.encodeInto to translate errors
|
||||
fn encodeX8664Instruction(
|
||||
self: *Self,
|
||||
src: LazySrcLoc,
|
||||
inst: Instruction,
|
||||
) !void {
|
||||
inst.encodeInto(self.code) catch |err| {
|
||||
if (err == error.OutOfMemory)
|
||||
return error.OutOfMemory
|
||||
else
|
||||
return self.fail(src, "Instruction.encodeInto failed because {s}", .{@errorName(err)});
|
||||
};
|
||||
}
|
||||
|
||||
/// This function encodes a binary operation for x86_64
|
||||
/// intended for use with the following opcode ranges
|
||||
/// because they share the same structure.
|
||||
///
|
||||
/// Thus not all binary operations can be used here
|
||||
/// -- multiplication needs to be done with imul,
|
||||
/// which doesn't have as convenient an interface.
|
||||
///
|
||||
/// "opx"-style instructions use the opcode extension field to indicate which instruction to execute:
|
||||
///
|
||||
/// opx = /0: add
|
||||
/// opx = /1: or
|
||||
/// opx = /2: adc
|
||||
/// opx = /3: sbb
|
||||
/// opx = /4: and
|
||||
/// opx = /5: sub
|
||||
/// opx = /6: xor
|
||||
/// opx = /7: cmp
|
||||
///
|
||||
/// opcode | operand shape
|
||||
/// --------+----------------------
|
||||
/// 80 /opx | *r/m8*, imm8
|
||||
/// 81 /opx | *r/m16/32/64*, imm16/32
|
||||
/// 83 /opx | *r/m16/32/64*, imm8
|
||||
///
|
||||
/// "mr"-style instructions use the low bits of opcode to indicate shape of instruction:
|
||||
///
|
||||
/// mr = 00: add
|
||||
/// mr = 08: or
|
||||
/// mr = 10: adc
|
||||
/// mr = 18: sbb
|
||||
/// mr = 20: and
|
||||
/// mr = 28: sub
|
||||
/// mr = 30: xor
|
||||
/// mr = 38: cmp
|
||||
///
|
||||
/// opcode | operand shape
|
||||
/// -------+-------------------------
|
||||
/// mr + 0 | *r/m8*, r8
|
||||
/// mr + 1 | *r/m16/32/64*, r16/32/64
|
||||
/// mr + 2 | *r8*, r/m8
|
||||
/// mr + 3 | *r16/32/64*, r/m16/32/64
|
||||
/// mr + 4 | *AL*, imm8
|
||||
/// mr + 5 | *rAX*, imm16/32
|
||||
///
|
||||
/// TODO: rotates and shifts share the same structure, so we can potentially implement them
|
||||
/// at a later date with very similar code.
|
||||
/// They have "opx"-style instructions, but no "mr"-style instructions.
|
||||
///
|
||||
/// opx = /0: rol,
|
||||
/// opx = /1: ror,
|
||||
/// opx = /2: rcl,
|
||||
/// opx = /3: rcr,
|
||||
/// opx = /4: shl sal,
|
||||
/// opx = /5: shr,
|
||||
/// opx = /6: sal shl,
|
||||
/// opx = /7: sar,
|
||||
///
|
||||
/// opcode | operand shape
|
||||
/// --------+------------------
|
||||
/// c0 /opx | *r/m8*, imm8
|
||||
/// c1 /opx | *r/m16/32/64*, imm8
|
||||
/// d0 /opx | *r/m8*, 1
|
||||
/// d1 /opx | *r/m16/32/64*, 1
|
||||
/// d2 /opx | *r/m8*, CL (for context, CL is register 1)
|
||||
/// d3 /opx | *r/m16/32/64*, CL (for context, CL is register 1)
|
||||
fn genX8664BinMathCode(
|
||||
self: *Self,
|
||||
src: LazySrcLoc,
|
||||
dst_ty: Type,
|
||||
dst_mcv: MCValue,
|
||||
src_mcv: MCValue,
|
||||
opx: u8,
|
||||
opx: u3,
|
||||
mr: u8,
|
||||
) !void {
|
||||
switch (dst_mcv) {
|
||||
@ -1598,31 +1704,85 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
.ptr_stack_offset => unreachable,
|
||||
.ptr_embedded_in_code => unreachable,
|
||||
.register => |src_reg| {
|
||||
self.rex(.{ .b = dst_reg.isExtended(), .r = src_reg.isExtended(), .w = dst_reg.size() == 64 });
|
||||
self.code.appendSliceAssumeCapacity(&[_]u8{ mr + 0x1, 0xC0 | (@as(u8, src_reg.id() & 0b111) << 3) | @as(u8, dst_reg.id() & 0b111) });
|
||||
// for register, register use mr + 1
|
||||
// addressing mode: *r/m16/32/64*, r16/32/64
|
||||
const abi_size = dst_ty.abiSize(self.target.*);
|
||||
const encoder = try X8664Encoder.init(self.code, 3);
|
||||
encoder.rex(.{
|
||||
.w = abi_size == 8,
|
||||
.r = src_reg.isExtended(),
|
||||
.b = dst_reg.isExtended(),
|
||||
});
|
||||
encoder.opcode_1byte(mr + 1);
|
||||
encoder.modRm_direct(
|
||||
src_reg.low_id(),
|
||||
dst_reg.low_id(),
|
||||
);
|
||||
},
|
||||
.immediate => |imm| {
|
||||
const imm32 = @intCast(u31, imm); // This case must be handled before calling genX8664BinMathCode.
|
||||
// 81 /opx id
|
||||
if (imm32 <= math.maxInt(u7)) {
|
||||
self.rex(.{ .b = dst_reg.isExtended(), .w = dst_reg.size() == 64 });
|
||||
self.code.appendSliceAssumeCapacity(&[_]u8{
|
||||
0x83,
|
||||
0xC0 | (opx << 3) | @truncate(u3, dst_reg.id()),
|
||||
@intCast(u8, imm32),
|
||||
// register, immediate use opx = 81 or 83 addressing modes:
|
||||
// opx = 81: r/m16/32/64, imm16/32
|
||||
// opx = 83: r/m16/32/64, imm8
|
||||
const imm32 = @intCast(i32, imm); // This case must be handled before calling genX8664BinMathCode.
|
||||
if (imm32 <= math.maxInt(i8)) {
|
||||
const abi_size = dst_ty.abiSize(self.target.*);
|
||||
const encoder = try X8664Encoder.init(self.code, 4);
|
||||
encoder.rex(.{
|
||||
.w = abi_size == 8,
|
||||
.b = dst_reg.isExtended(),
|
||||
});
|
||||
encoder.opcode_1byte(0x83);
|
||||
encoder.modRm_direct(
|
||||
opx,
|
||||
dst_reg.low_id(),
|
||||
);
|
||||
encoder.imm8(@intCast(i8, imm32));
|
||||
} else {
|
||||
self.rex(.{ .r = dst_reg.isExtended(), .w = dst_reg.size() == 64 });
|
||||
self.code.appendSliceAssumeCapacity(&[_]u8{
|
||||
0x81,
|
||||
0xC0 | (opx << 3) | @truncate(u3, dst_reg.id()),
|
||||
const abi_size = dst_ty.abiSize(self.target.*);
|
||||
const encoder = try X8664Encoder.init(self.code, 7);
|
||||
encoder.rex(.{
|
||||
.w = abi_size == 8,
|
||||
.b = dst_reg.isExtended(),
|
||||
});
|
||||
std.mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), imm32);
|
||||
encoder.opcode_1byte(0x81);
|
||||
encoder.modRm_direct(
|
||||
opx,
|
||||
dst_reg.low_id(),
|
||||
);
|
||||
encoder.imm32(@intCast(i32, imm32));
|
||||
}
|
||||
},
|
||||
.embedded_in_code, .memory, .stack_offset => {
|
||||
.embedded_in_code, .memory => {
|
||||
return self.fail(src, "TODO implement x86 ADD/SUB/CMP source memory", .{});
|
||||
},
|
||||
.stack_offset => |off| {
|
||||
// register, indirect use mr + 3
|
||||
// addressing mode: *r16/32/64*, r/m16/32/64
|
||||
const abi_size = dst_ty.abiSize(self.target.*);
|
||||
const adj_off = off + abi_size;
|
||||
if (off > math.maxInt(i32)) {
|
||||
return self.fail(src, "stack offset too large", .{});
|
||||
}
|
||||
const encoder = try X8664Encoder.init(self.code, 7);
|
||||
encoder.rex(.{
|
||||
.w = abi_size == 8,
|
||||
.r = dst_reg.isExtended(),
|
||||
});
|
||||
encoder.opcode_1byte(mr + 3);
|
||||
if (adj_off <= std.math.maxInt(i8)) {
|
||||
encoder.modRm_indirectDisp8(
|
||||
dst_reg.low_id(),
|
||||
Register.ebp.low_id(),
|
||||
);
|
||||
encoder.disp8(-@intCast(i8, adj_off));
|
||||
} else {
|
||||
encoder.modRm_indirectDisp32(
|
||||
dst_reg.low_id(),
|
||||
Register.ebp.low_id(),
|
||||
);
|
||||
encoder.disp32(-@intCast(i32, adj_off));
|
||||
}
|
||||
},
|
||||
.compare_flags_unsigned => {
|
||||
return self.fail(src, "TODO implement x86 ADD/SUB/CMP source compare flag (unsigned)", .{});
|
||||
},
|
||||
@ -1661,28 +1821,184 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
}
|
||||
}
|
||||
|
||||
/// Performs integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv.
|
||||
fn genX8664Imul(
|
||||
self: *Self,
|
||||
src: LazySrcLoc,
|
||||
dst_ty: Type,
|
||||
dst_mcv: MCValue,
|
||||
src_mcv: MCValue,
|
||||
) !void {
|
||||
switch (dst_mcv) {
|
||||
.none => unreachable,
|
||||
.undef => unreachable,
|
||||
.dead, .unreach, .immediate => unreachable,
|
||||
.compare_flags_unsigned => unreachable,
|
||||
.compare_flags_signed => unreachable,
|
||||
.ptr_stack_offset => unreachable,
|
||||
.ptr_embedded_in_code => unreachable,
|
||||
.register => |dst_reg| {
|
||||
switch (src_mcv) {
|
||||
.none => unreachable,
|
||||
.undef => try self.genSetReg(src, dst_ty, dst_reg, .undef),
|
||||
.dead, .unreach => unreachable,
|
||||
.ptr_stack_offset => unreachable,
|
||||
.ptr_embedded_in_code => unreachable,
|
||||
.register => |src_reg| {
|
||||
// register, register
|
||||
//
|
||||
// Use the following imul opcode
|
||||
// 0F AF /r: IMUL r32/64, r/m32/64
|
||||
const abi_size = dst_ty.abiSize(self.target.*);
|
||||
const encoder = try X8664Encoder.init(self.code, 4);
|
||||
encoder.rex(.{
|
||||
.w = abi_size == 8,
|
||||
.r = dst_reg.isExtended(),
|
||||
.b = src_reg.isExtended(),
|
||||
});
|
||||
encoder.opcode_2byte(0x0f, 0xaf);
|
||||
encoder.modRm_direct(
|
||||
dst_reg.low_id(),
|
||||
src_reg.low_id(),
|
||||
);
|
||||
},
|
||||
.immediate => |imm| {
|
||||
// register, immediate:
|
||||
// depends on size of immediate.
|
||||
//
|
||||
// immediate fits in i8:
|
||||
// 6B /r ib: IMUL r32/64, r/m32/64, imm8
|
||||
//
|
||||
// immediate fits in i32:
|
||||
// 69 /r id: IMUL r32/64, r/m32/64, imm32
|
||||
//
|
||||
// immediate is huge:
|
||||
// split into 2 instructions
|
||||
// 1) copy the 64 bit immediate into a tmp register
|
||||
// 2) perform register,register mul
|
||||
// 0F AF /r: IMUL r32/64, r/m32/64
|
||||
if (math.minInt(i8) <= imm and imm <= math.maxInt(i8)) {
|
||||
const abi_size = dst_ty.abiSize(self.target.*);
|
||||
const encoder = try X8664Encoder.init(self.code, 4);
|
||||
encoder.rex(.{
|
||||
.w = abi_size == 8,
|
||||
.r = dst_reg.isExtended(),
|
||||
.b = dst_reg.isExtended(),
|
||||
});
|
||||
encoder.opcode_1byte(0x6B);
|
||||
encoder.modRm_direct(
|
||||
dst_reg.low_id(),
|
||||
dst_reg.low_id(),
|
||||
);
|
||||
encoder.imm8(@intCast(i8, imm));
|
||||
} else if (math.minInt(i32) <= imm and imm <= math.maxInt(i32)) {
|
||||
const abi_size = dst_ty.abiSize(self.target.*);
|
||||
const encoder = try X8664Encoder.init(self.code, 7);
|
||||
encoder.rex(.{
|
||||
.w = abi_size == 8,
|
||||
.r = dst_reg.isExtended(),
|
||||
.b = dst_reg.isExtended(),
|
||||
});
|
||||
encoder.opcode_1byte(0x69);
|
||||
encoder.modRm_direct(
|
||||
dst_reg.low_id(),
|
||||
dst_reg.low_id(),
|
||||
);
|
||||
encoder.imm32(@intCast(i32, imm));
|
||||
} else {
|
||||
const src_reg = try self.copyToTmpRegister(src, dst_ty, src_mcv);
|
||||
return self.genX8664Imul(src, dst_ty, dst_mcv, MCValue{ .register = src_reg });
|
||||
}
|
||||
},
|
||||
.embedded_in_code, .memory, .stack_offset => {
|
||||
return self.fail(src, "TODO implement x86 multiply source memory", .{});
|
||||
},
|
||||
.compare_flags_unsigned => {
|
||||
return self.fail(src, "TODO implement x86 multiply source compare flag (unsigned)", .{});
|
||||
},
|
||||
.compare_flags_signed => {
|
||||
return self.fail(src, "TODO implement x86 multiply source compare flag (signed)", .{});
|
||||
},
|
||||
}
|
||||
},
|
||||
.stack_offset => |off| {
|
||||
switch (src_mcv) {
|
||||
.none => unreachable,
|
||||
.undef => return self.genSetStack(src, dst_ty, off, .undef),
|
||||
.dead, .unreach => unreachable,
|
||||
.ptr_stack_offset => unreachable,
|
||||
.ptr_embedded_in_code => unreachable,
|
||||
.register => |src_reg| {
|
||||
// copy dst to a register
|
||||
const dst_reg = try self.copyToTmpRegister(src, dst_ty, dst_mcv);
|
||||
// multiply into dst_reg
|
||||
// register, register
|
||||
// Use the following imul opcode
|
||||
// 0F AF /r: IMUL r32/64, r/m32/64
|
||||
const abi_size = dst_ty.abiSize(self.target.*);
|
||||
const encoder = try X8664Encoder.init(self.code, 4);
|
||||
encoder.rex(.{
|
||||
.w = abi_size == 8,
|
||||
.r = dst_reg.isExtended(),
|
||||
.b = src_reg.isExtended(),
|
||||
});
|
||||
encoder.opcode_2byte(0x0f, 0xaf);
|
||||
encoder.modRm_direct(
|
||||
dst_reg.low_id(),
|
||||
src_reg.low_id(),
|
||||
);
|
||||
// copy dst_reg back out
|
||||
return self.genSetStack(src, dst_ty, off, MCValue{ .register = dst_reg });
|
||||
},
|
||||
.immediate => |imm| {
|
||||
return self.fail(src, "TODO implement x86 multiply source immediate", .{});
|
||||
},
|
||||
.embedded_in_code, .memory, .stack_offset => {
|
||||
return self.fail(src, "TODO implement x86 multiply source memory", .{});
|
||||
},
|
||||
.compare_flags_unsigned => {
|
||||
return self.fail(src, "TODO implement x86 multiply source compare flag (unsigned)", .{});
|
||||
},
|
||||
.compare_flags_signed => {
|
||||
return self.fail(src, "TODO implement x86 multiply source compare flag (signed)", .{});
|
||||
},
|
||||
}
|
||||
},
|
||||
.embedded_in_code, .memory => {
|
||||
return self.fail(src, "TODO implement x86 multiply destination memory", .{});
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn genX8664ModRMRegToStack(self: *Self, src: LazySrcLoc, ty: Type, off: u32, reg: Register, opcode: u8) !void {
|
||||
const abi_size = ty.abiSize(self.target.*);
|
||||
const adj_off = off + abi_size;
|
||||
try self.code.ensureCapacity(self.code.items.len + 7);
|
||||
self.rex(.{ .w = reg.size() == 64, .r = reg.isExtended() });
|
||||
const reg_id: u8 = @truncate(u3, reg.id());
|
||||
if (adj_off <= 128) {
|
||||
// example: 48 89 55 7f mov QWORD PTR [rbp+0x7f],rdx
|
||||
const RM = @as(u8, 0b01_000_101) | (reg_id << 3);
|
||||
const negative_offset = @intCast(i8, -@intCast(i32, adj_off));
|
||||
const twos_comp = @bitCast(u8, negative_offset);
|
||||
self.code.appendSliceAssumeCapacity(&[_]u8{ opcode, RM, twos_comp });
|
||||
} else if (adj_off <= 2147483648) {
|
||||
// example: 48 89 95 80 00 00 00 mov QWORD PTR [rbp+0x80],rdx
|
||||
const RM = @as(u8, 0b10_000_101) | (reg_id << 3);
|
||||
const negative_offset = @intCast(i32, -@intCast(i33, adj_off));
|
||||
const twos_comp = @bitCast(u32, negative_offset);
|
||||
self.code.appendSliceAssumeCapacity(&[_]u8{ opcode, RM });
|
||||
mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), twos_comp);
|
||||
} else {
|
||||
if (off > math.maxInt(i32)) {
|
||||
return self.fail(src, "stack offset too large", .{});
|
||||
}
|
||||
|
||||
const i_adj_off = -@intCast(i32, adj_off);
|
||||
const encoder = try X8664Encoder.init(self.code, 7);
|
||||
encoder.rex(.{
|
||||
.w = abi_size == 8,
|
||||
.r = reg.isExtended(),
|
||||
});
|
||||
encoder.opcode_1byte(opcode);
|
||||
if (i_adj_off < std.math.maxInt(i8)) {
|
||||
// example: 48 89 55 7f mov QWORD PTR [rbp+0x7f],rdx
|
||||
encoder.modRm_indirectDisp8(
|
||||
reg.low_id(),
|
||||
Register.ebp.low_id(),
|
||||
);
|
||||
encoder.disp8(@intCast(i8, i_adj_off));
|
||||
} else {
|
||||
// example: 48 89 95 80 00 00 00 mov QWORD PTR [rbp+0x80],rdx
|
||||
encoder.modRm_indirectDisp32(
|
||||
reg.low_id(),
|
||||
Register.ebp.low_id(),
|
||||
);
|
||||
encoder.disp32(i_adj_off);
|
||||
}
|
||||
}
|
||||
|
||||
fn genArgDbgInfo(self: *Self, inst: *ir.Inst.Arg, mcv: MCValue) !void {
|
||||
@ -2126,12 +2442,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
log.debug("got_addr = 0x{x}", .{got_addr});
|
||||
switch (arch) {
|
||||
.x86_64 => {
|
||||
try self.genSetReg(inst.base.src, Type.initTag(.u32), .rax, .{ .memory = got_addr });
|
||||
try self.genSetReg(inst.base.src, Type.initTag(.u64), .rax, .{ .memory = got_addr });
|
||||
// callq *%rax
|
||||
try self.code.ensureCapacity(self.code.items.len + 2);
|
||||
self.code.appendSliceAssumeCapacity(&[2]u8{ 0xff, 0xd0 });
|
||||
},
|
||||
.aarch64 => {
|
||||
try self.genSetReg(inst.base.src, Type.initTag(.u32), .x30, .{ .memory = got_addr });
|
||||
try self.genSetReg(inst.base.src, Type.initTag(.u64), .x30, .{ .memory = got_addr });
|
||||
// blr x30
|
||||
writeInt(u32, try self.code.addManyAsArray(4), Instruction.blr(.x30).toU32());
|
||||
},
|
||||
@ -2355,15 +2672,19 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
.register => |reg| blk: {
|
||||
// test reg, 1
|
||||
// TODO detect al, ax, eax
|
||||
try self.code.ensureCapacity(self.code.items.len + 4);
|
||||
// TODO audit this codegen: we force w = true here to make
|
||||
// the value affect the big register
|
||||
self.rex(.{ .b = reg.isExtended(), .w = true });
|
||||
self.code.appendSliceAssumeCapacity(&[_]u8{
|
||||
0xf6,
|
||||
@as(u8, 0xC0) | (0 << 3) | @truncate(u3, reg.id()),
|
||||
0x01,
|
||||
const encoder = try X8664Encoder.init(self.code, 4);
|
||||
encoder.rex(.{
|
||||
// TODO audit this codegen: we force w = true here to make
|
||||
// the value affect the big register
|
||||
.w = true,
|
||||
.b = reg.isExtended(),
|
||||
});
|
||||
encoder.opcode_1byte(0xf6);
|
||||
encoder.modRm_direct(
|
||||
0,
|
||||
reg.low_id(),
|
||||
);
|
||||
encoder.disp8(1);
|
||||
break :blk 0x84;
|
||||
},
|
||||
else => return self.fail(inst.base.src, "TODO implement condbr {s} when condition is {s}", .{ self.target.cpu.arch, @tagName(cond) }),
|
||||
@ -2673,9 +2994,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
switch (arch) {
|
||||
.x86_64 => switch (inst.base.tag) {
|
||||
// lhs AND rhs
|
||||
.bool_and => return try self.genX8664BinMath(&inst.base, inst.lhs, inst.rhs, 4, 0x20),
|
||||
.bool_and => return try self.genX8664BinMath(&inst.base, inst.lhs, inst.rhs),
|
||||
// lhs OR rhs
|
||||
.bool_or => return try self.genX8664BinMath(&inst.base, inst.lhs, inst.rhs, 1, 0x08),
|
||||
.bool_or => return try self.genX8664BinMath(&inst.base, inst.lhs, inst.rhs),
|
||||
else => unreachable, // Not a boolean operation
|
||||
},
|
||||
.arm, .armeb => switch (inst.base.tag) {
|
||||
@ -2882,39 +3203,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
}
|
||||
}
|
||||
|
||||
/// Encodes a REX prefix as specified, and appends it to the instruction
|
||||
/// stream. This only modifies the instruction stream if at least one bit
|
||||
/// is set true, which has a few implications:
|
||||
///
|
||||
/// * The length of the instruction buffer will be modified *if* the
|
||||
/// resulting REX is meaningful, but will remain the same if it is not.
|
||||
/// * Deliberately inserting a "meaningless REX" requires explicit usage of
|
||||
/// 0x40, and cannot be done via this function.
|
||||
/// W => 64 bit mode
|
||||
/// R => extension to the MODRM.reg field
|
||||
/// X => extension to the SIB.index field
|
||||
/// B => extension to the MODRM.rm field or the SIB.base field
|
||||
fn rex(self: *Self, arg: struct { b: bool = false, w: bool = false, x: bool = false, r: bool = false }) void {
|
||||
comptime assert(arch == .x86_64);
|
||||
// From section 2.2.1.2 of the manual, REX is encoded as b0100WRXB.
|
||||
var value: u8 = 0x40;
|
||||
if (arg.b) {
|
||||
value |= 0x1;
|
||||
}
|
||||
if (arg.x) {
|
||||
value |= 0x2;
|
||||
}
|
||||
if (arg.r) {
|
||||
value |= 0x4;
|
||||
}
|
||||
if (arg.w) {
|
||||
value |= 0x8;
|
||||
}
|
||||
if (value != 0x40) {
|
||||
self.code.appendAssumeCapacity(value);
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets the value without any modifications to register allocation metadata or stack allocation metadata.
|
||||
fn setRegOrMem(self: *Self, src: LazySrcLoc, ty: Type, loc: MCValue, val: MCValue) !void {
|
||||
switch (loc) {
|
||||
@ -3462,20 +3750,25 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
}
|
||||
},
|
||||
.compare_flags_unsigned => |op| {
|
||||
try self.code.ensureCapacity(self.code.items.len + 3);
|
||||
const encoder = try X8664Encoder.init(self.code, 7);
|
||||
// TODO audit this codegen: we force w = true here to make
|
||||
// the value affect the big register
|
||||
self.rex(.{ .b = reg.isExtended(), .w = true });
|
||||
const opcode: u8 = switch (op) {
|
||||
encoder.rex(.{
|
||||
.w = true,
|
||||
.b = reg.isExtended(),
|
||||
});
|
||||
encoder.opcode_2byte(0x0f, switch (op) {
|
||||
.gte => 0x93,
|
||||
.gt => 0x97,
|
||||
.neq => 0x95,
|
||||
.lt => 0x92,
|
||||
.lte => 0x96,
|
||||
.eq => 0x94,
|
||||
};
|
||||
const id = @as(u8, reg.id() & 0b111);
|
||||
self.code.appendSliceAssumeCapacity(&[_]u8{ 0x0f, opcode, 0xC0 | id });
|
||||
});
|
||||
encoder.modRm_direct(
|
||||
0,
|
||||
reg.low_id(),
|
||||
);
|
||||
},
|
||||
.compare_flags_signed => |op| {
|
||||
return self.fail(src, "TODO set register with compare flags value (signed)", .{});
|
||||
@ -3485,40 +3778,43 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
// register is the fastest way to zero a register.
|
||||
if (x == 0) {
|
||||
// The encoding for `xor r32, r32` is `0x31 /r`.
|
||||
// Section 3.1.1.1 of the Intel x64 Manual states that "/r indicates that the
|
||||
// ModR/M byte of the instruction contains a register operand and an r/m operand."
|
||||
//
|
||||
// R/M bytes are composed of two bits for the mode, then three bits for the register,
|
||||
// then three bits for the operand. Since we're zeroing a register, the two three-bit
|
||||
// values will be identical, and the mode is three (the raw register value).
|
||||
//
|
||||
const encoder = try X8664Encoder.init(self.code, 3);
|
||||
|
||||
// If we're accessing e.g. r8d, we need to use a REX prefix before the actual operation. Since
|
||||
// this is a 32-bit operation, the W flag is set to zero. X is also zero, as we're not using a SIB.
|
||||
// Both R and B are set, as we're extending, in effect, the register bits *and* the operand.
|
||||
try self.code.ensureCapacity(self.code.items.len + 3);
|
||||
self.rex(.{ .r = reg.isExtended(), .b = reg.isExtended() });
|
||||
const id = @as(u8, reg.id() & 0b111);
|
||||
self.code.appendSliceAssumeCapacity(&[_]u8{ 0x31, 0xC0 | id << 3 | id });
|
||||
encoder.rex(.{
|
||||
.r = reg.isExtended(),
|
||||
.b = reg.isExtended(),
|
||||
});
|
||||
encoder.opcode_1byte(0x31);
|
||||
// Section 3.1.1.1 of the Intel x64 Manual states that "/r indicates that the
|
||||
// ModR/M byte of the instruction contains a register operand and an r/m operand."
|
||||
encoder.modRm_direct(
|
||||
reg.low_id(),
|
||||
reg.low_id(),
|
||||
);
|
||||
|
||||
return;
|
||||
}
|
||||
if (x <= math.maxInt(u32)) {
|
||||
if (x <= math.maxInt(i32)) {
|
||||
// Next best case: if we set the lower four bytes, the upper four will be zeroed.
|
||||
//
|
||||
// The encoding for `mov IMM32 -> REG` is (0xB8 + R) IMM.
|
||||
if (reg.isExtended()) {
|
||||
// Just as with XORing, we need a REX prefix. This time though, we only
|
||||
// need the B bit set, as we're extending the opcode's register field,
|
||||
// and there is no Mod R/M byte.
|
||||
//
|
||||
// Thus, we need b01000001, or 0x41.
|
||||
try self.code.resize(self.code.items.len + 6);
|
||||
self.code.items[self.code.items.len - 6] = 0x41;
|
||||
} else {
|
||||
try self.code.resize(self.code.items.len + 5);
|
||||
}
|
||||
self.code.items[self.code.items.len - 5] = 0xB8 | @as(u8, reg.id() & 0b111);
|
||||
const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4];
|
||||
mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x));
|
||||
|
||||
const encoder = try X8664Encoder.init(self.code, 6);
|
||||
// Just as with XORing, we need a REX prefix. This time though, we only
|
||||
// need the B bit set, as we're extending the opcode's register field,
|
||||
// and there is no Mod R/M byte.
|
||||
encoder.rex(.{
|
||||
.b = reg.isExtended(),
|
||||
});
|
||||
encoder.opcode_withReg(0xB8, reg.low_id());
|
||||
|
||||
// no ModR/M byte
|
||||
|
||||
// IMM
|
||||
encoder.imm32(@intCast(i32, x));
|
||||
return;
|
||||
}
|
||||
// Worst case: we need to load the 64-bit register with the IMM. GNU's assemblers calls
|
||||
@ -3528,79 +3824,98 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
// This encoding is, in fact, the *same* as the one used for 32-bit loads. The only
|
||||
// difference is that we set REX.W before the instruction, which extends the load to
|
||||
// 64-bit and uses the full bit-width of the register.
|
||||
//
|
||||
// Since we always need a REX here, let's just check if we also need to set REX.B.
|
||||
//
|
||||
// In this case, the encoding of the REX byte is 0b0100100B
|
||||
try self.code.ensureCapacity(self.code.items.len + 10);
|
||||
self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() });
|
||||
self.code.items.len += 9;
|
||||
self.code.items[self.code.items.len - 9] = 0xB8 | @as(u8, reg.id() & 0b111);
|
||||
const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8];
|
||||
mem.writeIntLittle(u64, imm_ptr, x);
|
||||
{
|
||||
const encoder = try X8664Encoder.init(self.code, 10);
|
||||
encoder.rex(.{
|
||||
.w = true,
|
||||
.b = reg.isExtended(),
|
||||
});
|
||||
encoder.opcode_withReg(0xB8, reg.low_id());
|
||||
encoder.imm64(x);
|
||||
}
|
||||
},
|
||||
.embedded_in_code => |code_offset| {
|
||||
// We need the offset from RIP in a signed i32 twos complement.
|
||||
// The instruction is 7 bytes long and RIP points to the next instruction.
|
||||
try self.code.ensureCapacity(self.code.items.len + 7);
|
||||
// 64-bit LEA is encoded as REX.W 8D /r. If the register is extended, the REX byte is modified,
|
||||
// but the operation size is unchanged. Since we're using a disp32, we want mode 0 and lower three
|
||||
// bits as five.
|
||||
// REX 0x8D 0b00RRR101, where RRR is the lower three bits of the id.
|
||||
self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() });
|
||||
self.code.items.len += 6;
|
||||
const rip = self.code.items.len;
|
||||
|
||||
// 64-bit LEA is encoded as REX.W 8D /r.
|
||||
const rip = self.code.items.len + 7;
|
||||
const big_offset = @intCast(i64, code_offset) - @intCast(i64, rip);
|
||||
const offset = @intCast(i32, big_offset);
|
||||
self.code.items[self.code.items.len - 6] = 0x8D;
|
||||
self.code.items[self.code.items.len - 5] = 0b101 | (@as(u8, reg.id() & 0b111) << 3);
|
||||
const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4];
|
||||
mem.writeIntLittle(i32, imm_ptr, offset);
|
||||
const encoder = try X8664Encoder.init(self.code, 7);
|
||||
|
||||
// byte 1, always exists because w = true
|
||||
encoder.rex(.{
|
||||
.w = true,
|
||||
.r = reg.isExtended(),
|
||||
});
|
||||
// byte 2
|
||||
encoder.opcode_1byte(0x8D);
|
||||
// byte 3
|
||||
encoder.modRm_RIPDisp32(reg.low_id());
|
||||
// byte 4-7
|
||||
encoder.disp32(offset);
|
||||
|
||||
// Double check that we haven't done any math errors
|
||||
assert(rip == self.code.items.len);
|
||||
},
|
||||
.register => |src_reg| {
|
||||
// If the registers are the same, nothing to do.
|
||||
if (src_reg.id() == reg.id())
|
||||
return;
|
||||
|
||||
// This is a variant of 8B /r. Since we're using 64-bit moves, we require a REX.
|
||||
// This is thus three bytes: REX 0x8B R/M.
|
||||
// If the destination is extended, the R field must be 1.
|
||||
// If the *source* is extended, the B field must be 1.
|
||||
// Since the register is being accessed directly, the R/M mode is three. The reg field (the middle
|
||||
// three bits) contain the destination, and the R/M field (the lower three bits) contain the source.
|
||||
try self.code.ensureCapacity(self.code.items.len + 3);
|
||||
self.rex(.{ .w = reg.size() == 64, .r = reg.isExtended(), .b = src_reg.isExtended() });
|
||||
const R = 0xC0 | (@as(u8, reg.id() & 0b111) << 3) | @as(u8, src_reg.id() & 0b111);
|
||||
self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, R });
|
||||
// This is a variant of 8B /r.
|
||||
const abi_size = ty.abiSize(self.target.*);
|
||||
const encoder = try X8664Encoder.init(self.code, 3);
|
||||
encoder.rex(.{
|
||||
.w = abi_size == 8,
|
||||
.r = reg.isExtended(),
|
||||
.b = src_reg.isExtended(),
|
||||
});
|
||||
encoder.opcode_1byte(0x8B);
|
||||
encoder.modRm_direct(reg.low_id(), src_reg.low_id());
|
||||
},
|
||||
.memory => |x| {
|
||||
if (self.bin_file.options.pie) {
|
||||
// RIP-relative displacement to the entry in the GOT table.
|
||||
const abi_size = ty.abiSize(self.target.*);
|
||||
const encoder = try X8664Encoder.init(self.code, 10);
|
||||
|
||||
// LEA reg, [<offset>]
|
||||
|
||||
// We encode the instruction FIRST because prefixes may or may not appear.
|
||||
// After we encode the instruction, we will know that the displacement bytes
|
||||
// for [<offset>] will be at self.code.items.len - 4.
|
||||
encoder.rex(.{
|
||||
.w = true, // force 64 bit because loading an address (to the GOT)
|
||||
.r = reg.isExtended(),
|
||||
});
|
||||
encoder.opcode_1byte(0x8D);
|
||||
encoder.modRm_RIPDisp32(reg.low_id());
|
||||
encoder.disp32(0);
|
||||
|
||||
// TODO we should come up with our own, backend independent relocation types
|
||||
// which each backend (Elf, MachO, etc.) would then translate into an actual
|
||||
// fixup when linking.
|
||||
if (self.bin_file.cast(link.File.MachO)) |macho_file| {
|
||||
try macho_file.pie_fixups.append(self.bin_file.allocator, .{
|
||||
.target_addr = x,
|
||||
.offset = self.code.items.len + 3,
|
||||
.offset = self.code.items.len - 4,
|
||||
.size = 4,
|
||||
});
|
||||
} else {
|
||||
return self.fail(src, "TODO implement genSetReg for PIE GOT indirection on this platform", .{});
|
||||
}
|
||||
try self.code.ensureCapacity(self.code.items.len + 7);
|
||||
self.rex(.{ .w = reg.size() == 64, .r = reg.isExtended() });
|
||||
self.code.appendSliceAssumeCapacity(&[_]u8{
|
||||
0x8D,
|
||||
0x05 | (@as(u8, reg.id() & 0b111) << 3),
|
||||
});
|
||||
mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), 0);
|
||||
|
||||
try self.code.ensureCapacity(self.code.items.len + 3);
|
||||
self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended(), .r = reg.isExtended() });
|
||||
const RM = (@as(u8, reg.id() & 0b111) << 3) | @truncate(u3, reg.id());
|
||||
self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, RM });
|
||||
} else if (x <= math.maxInt(u32)) {
|
||||
// MOV reg, [reg]
|
||||
encoder.rex(.{
|
||||
.w = abi_size == 8,
|
||||
.r = reg.isExtended(),
|
||||
.b = reg.isExtended(),
|
||||
});
|
||||
encoder.opcode_1byte(0x8B);
|
||||
encoder.modRm_indirectDisp0(reg.low_id(), reg.low_id());
|
||||
} else if (x <= math.maxInt(i32)) {
|
||||
// Moving from memory to a register is a variant of `8B /r`.
|
||||
// Since we're using 64-bit moves, we require a REX.
|
||||
// This variant also requires a SIB, as it would otherwise be RIP-relative.
|
||||
@ -3608,14 +3923,18 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
// The SIB must be 0x25, to indicate a disp32 with no scaled index.
|
||||
// 0b00RRR100, where RRR is the lower three bits of the register ID.
|
||||
// The instruction is thus eight bytes; REX 0x8B 0b00RRR100 0x25 followed by a four-byte disp32.
|
||||
try self.code.ensureCapacity(self.code.items.len + 8);
|
||||
self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() });
|
||||
self.code.appendSliceAssumeCapacity(&[_]u8{
|
||||
0x8B,
|
||||
0x04 | (@as(u8, reg.id() & 0b111) << 3), // R
|
||||
0x25,
|
||||
const abi_size = ty.abiSize(self.target.*);
|
||||
const encoder = try X8664Encoder.init(self.code, 8);
|
||||
encoder.rex(.{
|
||||
.w = abi_size == 8,
|
||||
.r = reg.isExtended(),
|
||||
});
|
||||
mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), @intCast(u32, x));
|
||||
encoder.opcode_1byte(0x8B);
|
||||
// effective address = [SIB]
|
||||
encoder.modRm_SIBDisp0(reg.low_id());
|
||||
// SIB = disp32
|
||||
encoder.sib_disp32();
|
||||
encoder.disp32(@intCast(i32, x));
|
||||
} else {
|
||||
// If this is RAX, we can use a direct load; otherwise, we need to load the address, then indirectly load
|
||||
// the value.
|
||||
@ -3623,12 +3942,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
// REX.W 0xA1 moffs64*
|
||||
// moffs64* is a 64-bit offset "relative to segment base", which really just means the
|
||||
// absolute address for all practical purposes.
|
||||
try self.code.resize(self.code.items.len + 10);
|
||||
// REX.W == 0x48
|
||||
self.code.items[self.code.items.len - 10] = 0x48;
|
||||
self.code.items[self.code.items.len - 9] = 0xA1;
|
||||
const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8];
|
||||
mem.writeIntLittle(u64, imm_ptr, x);
|
||||
|
||||
const encoder = try X8664Encoder.init(self.code, 10);
|
||||
encoder.rex(.{
|
||||
.w = true,
|
||||
});
|
||||
encoder.opcode_1byte(0xA1);
|
||||
encoder.writeIntLittle(u64, x);
|
||||
} else {
|
||||
// This requires two instructions; a move imm as used above, followed by an indirect load using the register
|
||||
// as the address and the register as the destination.
|
||||
@ -3645,41 +3965,42 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
// Now, the register contains the address of the value to load into it
|
||||
// Currently, we're only allowing 64-bit registers, so we need the `REX.W 8B /r` variant.
|
||||
// TODO: determine whether to allow other sized registers, and if so, handle them properly.
|
||||
// This operation requires three bytes: REX 0x8B R/M
|
||||
try self.code.ensureCapacity(self.code.items.len + 3);
|
||||
// For this operation, we want R/M mode *zero* (use register indirectly), and the two register
|
||||
// values must match. Thus, it's 00ABCABC where ABC is the lower three bits of the register ID.
|
||||
//
|
||||
// Furthermore, if this is an extended register, both B and R must be set in the REX byte, as *both*
|
||||
// register operands need to be marked as extended.
|
||||
self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended(), .r = reg.isExtended() });
|
||||
const RM = (@as(u8, reg.id() & 0b111) << 3) | @truncate(u3, reg.id());
|
||||
self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, RM });
|
||||
|
||||
// mov reg, [reg]
|
||||
const abi_size = ty.abiSize(self.target.*);
|
||||
const encoder = try X8664Encoder.init(self.code, 3);
|
||||
encoder.rex(.{
|
||||
.w = abi_size == 8,
|
||||
.r = reg.isExtended(),
|
||||
.b = reg.isExtended(),
|
||||
});
|
||||
encoder.opcode_1byte(0x8B);
|
||||
encoder.modRm_indirectDisp0(reg.low_id(), reg.low_id());
|
||||
}
|
||||
}
|
||||
},
|
||||
.stack_offset => |unadjusted_off| {
|
||||
try self.code.ensureCapacity(self.code.items.len + 7);
|
||||
const size_bytes = @divExact(reg.size(), 8);
|
||||
const off = unadjusted_off + size_bytes;
|
||||
self.rex(.{ .w = reg.size() == 64, .r = reg.isExtended() });
|
||||
const reg_id: u8 = @truncate(u3, reg.id());
|
||||
if (off <= 128) {
|
||||
// Example: 48 8b 4d 7f mov rcx,QWORD PTR [rbp+0x7f]
|
||||
const RM = @as(u8, 0b01_000_101) | (reg_id << 3);
|
||||
const negative_offset = @intCast(i8, -@intCast(i32, off));
|
||||
const twos_comp = @bitCast(u8, negative_offset);
|
||||
self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8b, RM, twos_comp });
|
||||
} else if (off <= 2147483648) {
|
||||
// Example: 48 8b 8d 80 00 00 00 mov rcx,QWORD PTR [rbp+0x80]
|
||||
const RM = @as(u8, 0b10_000_101) | (reg_id << 3);
|
||||
const negative_offset = @intCast(i32, -@intCast(i33, off));
|
||||
const twos_comp = @bitCast(u32, negative_offset);
|
||||
self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8b, RM });
|
||||
mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), twos_comp);
|
||||
} else {
|
||||
const abi_size = ty.abiSize(self.target.*);
|
||||
const off = unadjusted_off + abi_size;
|
||||
if (off < std.math.minInt(i32) or off > std.math.maxInt(i32)) {
|
||||
return self.fail(src, "stack offset too large", .{});
|
||||
}
|
||||
const ioff = -@intCast(i32, off);
|
||||
const encoder = try X8664Encoder.init(self.code, 3);
|
||||
encoder.rex(.{
|
||||
.w = abi_size == 8,
|
||||
.r = reg.isExtended(),
|
||||
});
|
||||
encoder.opcode_1byte(0x8B);
|
||||
if (std.math.minInt(i8) <= ioff and ioff <= std.math.maxInt(i8)) {
|
||||
// Example: 48 8b 4d 7f mov rcx,QWORD PTR [rbp+0x7f]
|
||||
encoder.modRm_indirectDisp8(reg.low_id(), Register.ebp.low_id());
|
||||
encoder.disp8(@intCast(i8, ioff));
|
||||
} else {
|
||||
// Example: 48 8b 8d 80 00 00 00 mov rcx,QWORD PTR [rbp+0x80]
|
||||
encoder.modRm_indirectDisp32(reg.low_id(), Register.ebp.low_id());
|
||||
encoder.disp32(ioff);
|
||||
}
|
||||
},
|
||||
},
|
||||
else => return self.fail(src, "TODO implement getSetReg for {}", .{self.target.cpu.arch}),
|
||||
|
||||
@ -1,4 +1,9 @@
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const mem = std.mem;
|
||||
const assert = std.debug.assert;
|
||||
const ArrayList = std.ArrayList;
|
||||
const Allocator = std.mem.Allocator;
|
||||
const Type = @import("../Type.zig");
|
||||
const DW = std.dwarf;
|
||||
|
||||
@ -68,6 +73,11 @@ pub const Register = enum(u8) {
|
||||
return @truncate(u4, @enumToInt(self));
|
||||
}
|
||||
|
||||
/// Like id, but only returns the lower 3 bits.
|
||||
pub fn low_id(self: Register) u3 {
|
||||
return @truncate(u3, @enumToInt(self));
|
||||
}
|
||||
|
||||
/// Returns the index into `callee_preserved_regs`.
|
||||
pub fn allocIndex(self: Register) ?u4 {
|
||||
return switch (self) {
|
||||
@ -136,6 +146,493 @@ pub const callee_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8
|
||||
pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 };
|
||||
pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx };
|
||||
|
||||
/// Encoding helper functions for x86_64 instructions
|
||||
///
|
||||
/// Many of these helpers do very little, but they can help make things
|
||||
/// slightly more readable with more descriptive field names / function names.
|
||||
///
|
||||
/// Some of them also have asserts to ensure that we aren't doing dumb things.
|
||||
/// For example, trying to use register 4 (esp) in an indirect modr/m byte is illegal,
|
||||
/// you need to encode it with an SIB byte.
|
||||
///
|
||||
/// Note that ALL of these helper functions will assume capacity,
|
||||
/// so ensure that the `code` has sufficient capacity before using them.
|
||||
/// The `init` method is the recommended way to ensure capacity.
|
||||
pub const Encoder = struct {
|
||||
/// Non-owning reference to the code array
|
||||
code: *ArrayList(u8),
|
||||
|
||||
const Self = @This();
|
||||
|
||||
/// Wrap `code` in Encoder to make it easier to call these helper functions
|
||||
///
|
||||
/// maximum_inst_size should contain the maximum number of bytes
|
||||
/// that the encoded instruction will take.
|
||||
/// This is because the helper functions will assume capacity
|
||||
/// in order to avoid bounds checking.
|
||||
pub fn init(code: *ArrayList(u8), maximum_inst_size: u8) !Self {
|
||||
try code.ensureCapacity(code.items.len + maximum_inst_size);
|
||||
return Self{ .code = code };
|
||||
}
|
||||
|
||||
/// Directly write a number to the code array with big endianness
|
||||
pub fn writeIntBig(self: Self, comptime T: type, value: T) void {
|
||||
mem.writeIntBig(
|
||||
T,
|
||||
self.code.addManyAsArrayAssumeCapacity(@divExact(@typeInfo(T).Int.bits, 8)),
|
||||
value,
|
||||
);
|
||||
}
|
||||
|
||||
/// Directly write a number to the code array with little endianness
|
||||
pub fn writeIntLittle(self: Self, comptime T: type, value: T) void {
|
||||
mem.writeIntLittle(
|
||||
T,
|
||||
self.code.addManyAsArrayAssumeCapacity(@divExact(@typeInfo(T).Int.bits, 8)),
|
||||
value,
|
||||
);
|
||||
}
|
||||
|
||||
// --------
|
||||
// Prefixes
|
||||
// --------
|
||||
|
||||
pub const LegacyPrefixes = packed struct {
|
||||
/// LOCK
|
||||
prefix_f0: bool = false,
|
||||
/// REPNZ, REPNE, REP, Scalar Double-precision
|
||||
prefix_f2: bool = false,
|
||||
/// REPZ, REPE, REP, Scalar Single-precision
|
||||
prefix_f3: bool = false,
|
||||
|
||||
/// CS segment override or Branch not taken
|
||||
prefix_2e: bool = false,
|
||||
/// DS segment override
|
||||
prefix_36: bool = false,
|
||||
/// ES segment override
|
||||
prefix_26: bool = false,
|
||||
/// FS segment override
|
||||
prefix_64: bool = false,
|
||||
/// GS segment override
|
||||
prefix_65: bool = false,
|
||||
|
||||
/// Branch taken
|
||||
prefix_3e: bool = false,
|
||||
|
||||
/// Operand size override (enables 16 bit operation)
|
||||
prefix_66: bool = false,
|
||||
|
||||
/// Address size override (enables 16 bit address size)
|
||||
prefix_67: bool = false,
|
||||
|
||||
padding: u5 = 0,
|
||||
};
|
||||
|
||||
/// Encodes legacy prefixes
|
||||
pub fn legacyPrefixes(self: Self, prefixes: LegacyPrefixes) void {
|
||||
if (@bitCast(u16, prefixes) != 0) {
|
||||
// Hopefully this path isn't taken very often, so we'll do it the slow way for now
|
||||
|
||||
// LOCK
|
||||
if (prefixes.prefix_f0) self.code.appendAssumeCapacity(0xf0);
|
||||
// REPNZ, REPNE, REP, Scalar Double-precision
|
||||
if (prefixes.prefix_f2) self.code.appendAssumeCapacity(0xf2);
|
||||
// REPZ, REPE, REP, Scalar Single-precision
|
||||
if (prefixes.prefix_f3) self.code.appendAssumeCapacity(0xf3);
|
||||
|
||||
// CS segment override or Branch not taken
|
||||
if (prefixes.prefix_2e) self.code.appendAssumeCapacity(0x2e);
|
||||
// DS segment override
|
||||
if (prefixes.prefix_36) self.code.appendAssumeCapacity(0x36);
|
||||
// ES segment override
|
||||
if (prefixes.prefix_26) self.code.appendAssumeCapacity(0x26);
|
||||
// FS segment override
|
||||
if (prefixes.prefix_64) self.code.appendAssumeCapacity(0x64);
|
||||
// GS segment override
|
||||
if (prefixes.prefix_65) self.code.appendAssumeCapacity(0x65);
|
||||
|
||||
// Branch taken
|
||||
if (prefixes.prefix_3e) self.code.appendAssumeCapacity(0x3e);
|
||||
|
||||
// Operand size override
|
||||
if (prefixes.prefix_66) self.code.appendAssumeCapacity(0x66);
|
||||
|
||||
// Address size override
|
||||
if (prefixes.prefix_67) self.code.appendAssumeCapacity(0x67);
|
||||
}
|
||||
}
|
||||
|
||||
/// Use 16 bit operand size
|
||||
///
|
||||
/// Note that this flag is overridden by REX.W, if both are present.
|
||||
pub fn prefix16BitMode(self: Self) void {
|
||||
self.code.appendAssumeCapacity(0x66);
|
||||
}
|
||||
|
||||
/// From section 2.2.1.2 of the manual, REX is encoded as b0100WRXB
|
||||
pub const Rex = struct {
|
||||
/// Wide, enables 64-bit operation
|
||||
w: bool = false,
|
||||
/// Extends the reg field in the ModR/M byte
|
||||
r: bool = false,
|
||||
/// Extends the index field in the SIB byte
|
||||
x: bool = false,
|
||||
/// Extends the r/m field in the ModR/M byte,
|
||||
/// or the base field in the SIB byte,
|
||||
/// or the reg field in the Opcode byte
|
||||
b: bool = false,
|
||||
};
|
||||
|
||||
/// Encodes a REX prefix byte given all the fields
|
||||
///
|
||||
/// Use this byte whenever you need 64 bit operation,
|
||||
/// or one of reg, index, r/m, base, or opcode-reg might be extended.
|
||||
///
|
||||
/// See struct `Rex` for a description of each field.
|
||||
///
|
||||
/// Does not add a prefix byte if none of the fields are set!
|
||||
pub fn rex(self: Self, byte: Rex) void {
|
||||
var value: u8 = 0b0100_0000;
|
||||
|
||||
if (byte.w) value |= 0b1000;
|
||||
if (byte.r) value |= 0b0100;
|
||||
if (byte.x) value |= 0b0010;
|
||||
if (byte.b) value |= 0b0001;
|
||||
|
||||
if (value != 0b0100_0000) {
|
||||
self.code.appendAssumeCapacity(value);
|
||||
}
|
||||
}
|
||||
|
||||
// ------
|
||||
// Opcode
|
||||
// ------
|
||||
|
||||
/// Encodes a 1 byte opcode
|
||||
pub fn opcode_1byte(self: Self, opcode: u8) void {
|
||||
self.code.appendAssumeCapacity(opcode);
|
||||
}
|
||||
|
||||
/// Encodes a 2 byte opcode
|
||||
///
|
||||
/// e.g. IMUL has the opcode 0x0f 0xaf, so you use
|
||||
///
|
||||
/// encoder.opcode_2byte(0x0f, 0xaf);
|
||||
pub fn opcode_2byte(self: Self, prefix: u8, opcode: u8) void {
|
||||
self.code.appendAssumeCapacity(prefix);
|
||||
self.code.appendAssumeCapacity(opcode);
|
||||
}
|
||||
|
||||
/// Encodes a 1 byte opcode with a reg field
|
||||
///
|
||||
/// Remember to add a REX prefix byte if reg is extended!
|
||||
pub fn opcode_withReg(self: Self, opcode: u8, reg: u3) void {
|
||||
assert(opcode & 0b111 == 0);
|
||||
self.code.appendAssumeCapacity(opcode | reg);
|
||||
}
|
||||
|
||||
// ------
|
||||
// ModR/M
|
||||
// ------
|
||||
|
||||
/// Construct a ModR/M byte given all the fields
|
||||
///
|
||||
/// Remember to add a REX prefix byte if reg or rm are extended!
|
||||
pub fn modRm(self: Self, mod: u2, reg_or_opx: u3, rm: u3) void {
|
||||
self.code.appendAssumeCapacity(
|
||||
@as(u8, mod) << 6 | @as(u8, reg_or_opx) << 3 | rm,
|
||||
);
|
||||
}
|
||||
|
||||
/// Construct a ModR/M byte using direct r/m addressing
|
||||
/// r/m effective address: r/m
|
||||
///
|
||||
/// Note reg's effective address is always just reg for the ModR/M byte.
|
||||
/// Remember to add a REX prefix byte if reg or rm are extended!
|
||||
pub fn modRm_direct(self: Self, reg_or_opx: u3, rm: u3) void {
|
||||
self.modRm(0b11, reg_or_opx, rm);
|
||||
}
|
||||
|
||||
/// Construct a ModR/M byte using indirect r/m addressing
|
||||
/// r/m effective address: [r/m]
|
||||
///
|
||||
/// Note reg's effective address is always just reg for the ModR/M byte.
|
||||
/// Remember to add a REX prefix byte if reg or rm are extended!
|
||||
pub fn modRm_indirectDisp0(self: Self, reg_or_opx: u3, rm: u3) void {
|
||||
assert(rm != 4 and rm != 5);
|
||||
self.modRm(0b00, reg_or_opx, rm);
|
||||
}
|
||||
|
||||
/// Construct a ModR/M byte using indirect SIB addressing
|
||||
/// r/m effective address: [SIB]
|
||||
///
|
||||
/// Note reg's effective address is always just reg for the ModR/M byte.
|
||||
/// Remember to add a REX prefix byte if reg or rm are extended!
|
||||
pub fn modRm_SIBDisp0(self: Self, reg_or_opx: u3) void {
|
||||
self.modRm(0b00, reg_or_opx, 0b100);
|
||||
}
|
||||
|
||||
/// Construct a ModR/M byte using RIP-relative addressing
|
||||
/// r/m effective address: [RIP + disp32]
|
||||
///
|
||||
/// Note reg's effective address is always just reg for the ModR/M byte.
|
||||
/// Remember to add a REX prefix byte if reg or rm are extended!
|
||||
pub fn modRm_RIPDisp32(self: Self, reg_or_opx: u3) void {
|
||||
self.modRm(0b00, reg_or_opx, 0b101);
|
||||
}
|
||||
|
||||
/// Construct a ModR/M byte using indirect r/m with a 8bit displacement
|
||||
/// r/m effective address: [r/m + disp8]
|
||||
///
|
||||
/// Note reg's effective address is always just reg for the ModR/M byte.
|
||||
/// Remember to add a REX prefix byte if reg or rm are extended!
|
||||
pub fn modRm_indirectDisp8(self: Self, reg_or_opx: u3, rm: u3) void {
|
||||
assert(rm != 4);
|
||||
self.modRm(0b01, reg_or_opx, rm);
|
||||
}
|
||||
|
||||
/// Construct a ModR/M byte using indirect SIB with a 8bit displacement
|
||||
/// r/m effective address: [SIB + disp8]
|
||||
///
|
||||
/// Note reg's effective address is always just reg for the ModR/M byte.
|
||||
/// Remember to add a REX prefix byte if reg or rm are extended!
|
||||
pub fn modRm_SIBDisp8(self: Self, reg_or_opx: u3) void {
|
||||
self.modRm(0b01, reg_or_opx, 0b100);
|
||||
}
|
||||
|
||||
/// Construct a ModR/M byte using indirect r/m with a 32bit displacement
|
||||
/// r/m effective address: [r/m + disp32]
|
||||
///
|
||||
/// Note reg's effective address is always just reg for the ModR/M byte.
|
||||
/// Remember to add a REX prefix byte if reg or rm are extended!
|
||||
pub fn modRm_indirectDisp32(self: Self, reg_or_opx: u3, rm: u3) void {
|
||||
assert(rm != 4);
|
||||
self.modRm(0b10, reg_or_opx, rm);
|
||||
}
|
||||
|
||||
/// Construct a ModR/M byte using indirect SIB with a 32bit displacement
|
||||
/// r/m effective address: [SIB + disp32]
|
||||
///
|
||||
/// Note reg's effective address is always just reg for the ModR/M byte.
|
||||
/// Remember to add a REX prefix byte if reg or rm are extended!
|
||||
pub fn modRm_SIBDisp32(self: Self, reg_or_opx: u3) void {
|
||||
self.modRm(0b10, reg_or_opx, 0b100);
|
||||
}
|
||||
|
||||
// ---
|
||||
// SIB
|
||||
// ---
|
||||
|
||||
/// Construct a SIB byte given all the fields
|
||||
///
|
||||
/// Remember to add a REX prefix byte if index or base are extended!
|
||||
pub fn sib(self: Self, scale: u2, index: u3, base: u3) void {
|
||||
self.code.appendAssumeCapacity(
|
||||
@as(u8, scale) << 6 | @as(u8, index) << 3 | base,
|
||||
);
|
||||
}
|
||||
|
||||
/// Construct a SIB byte with scale * index + base, no frills.
|
||||
/// r/m effective address: [base + scale * index]
|
||||
///
|
||||
/// Remember to add a REX prefix byte if index or base are extended!
|
||||
pub fn sib_scaleIndexBase(self: Self, scale: u2, index: u3, base: u3) void {
|
||||
assert(base != 5);
|
||||
|
||||
self.sib(scale, index, base);
|
||||
}
|
||||
|
||||
/// Construct a SIB byte with scale * index + disp32
|
||||
/// r/m effective address: [scale * index + disp32]
|
||||
///
|
||||
/// Remember to add a REX prefix byte if index or base are extended!
|
||||
pub fn sib_scaleIndexDisp32(self: Self, scale: u2, index: u3) void {
|
||||
assert(index != 4);
|
||||
|
||||
// scale is actually ignored
|
||||
// index = 4 means no index
|
||||
// base = 5 means no base, if mod == 0.
|
||||
self.sib(scale, index, 5);
|
||||
}
|
||||
|
||||
/// Construct a SIB byte with just base
|
||||
/// r/m effective address: [base]
|
||||
///
|
||||
/// Remember to add a REX prefix byte if index or base are extended!
|
||||
pub fn sib_base(self: Self, base: u3) void {
|
||||
assert(base != 5);
|
||||
|
||||
// scale is actually ignored
|
||||
// index = 4 means no index
|
||||
self.sib(0, 4, base);
|
||||
}
|
||||
|
||||
/// Construct a SIB byte with just disp32
|
||||
/// r/m effective address: [disp32]
|
||||
///
|
||||
/// Remember to add a REX prefix byte if index or base are extended!
|
||||
pub fn sib_disp32(self: Self) void {
|
||||
// scale is actually ignored
|
||||
// index = 4 means no index
|
||||
// base = 5 means no base, if mod == 0.
|
||||
self.sib(0, 4, 5);
|
||||
}
|
||||
|
||||
/// Construct a SIB byte with scale * index + base + disp8
|
||||
/// r/m effective address: [base + scale * index + disp8]
|
||||
///
|
||||
/// Remember to add a REX prefix byte if index or base are extended!
|
||||
pub fn sib_scaleIndexBaseDisp8(self: Self, scale: u2, index: u3, base: u3) void {
|
||||
self.sib(scale, index, base);
|
||||
}
|
||||
|
||||
/// Construct a SIB byte with base + disp8, no index
|
||||
/// r/m effective address: [base + disp8]
|
||||
///
|
||||
/// Remember to add a REX prefix byte if index or base are extended!
|
||||
pub fn sib_baseDisp8(self: Self, base: u3) void {
|
||||
// scale is ignored
|
||||
// index = 4 means no index
|
||||
self.sib(0, 4, base);
|
||||
}
|
||||
|
||||
/// Construct a SIB byte with scale * index + base + disp32
|
||||
/// r/m effective address: [base + scale * index + disp32]
|
||||
///
|
||||
/// Remember to add a REX prefix byte if index or base are extended!
|
||||
pub fn sib_scaleIndexBaseDisp32(self: Self, scale: u2, index: u3, base: u3) void {
|
||||
self.sib(scale, index, base);
|
||||
}
|
||||
|
||||
/// Construct a SIB byte with base + disp32, no index
|
||||
/// r/m effective address: [base + disp32]
|
||||
///
|
||||
/// Remember to add a REX prefix byte if index or base are extended!
|
||||
pub fn sib_baseDisp32(self: Self, base: u3) void {
|
||||
// scale is ignored
|
||||
// index = 4 means no index
|
||||
self.sib(0, 4, base);
|
||||
}
|
||||
|
||||
// -------------------------
|
||||
// Trivial (no bit fiddling)
|
||||
// -------------------------
|
||||
|
||||
/// Encode an 8 bit immediate
|
||||
///
|
||||
/// It is sign-extended to 64 bits by the cpu.
|
||||
pub fn imm8(self: Self, imm: i8) void {
|
||||
self.code.appendAssumeCapacity(@bitCast(u8, imm));
|
||||
}
|
||||
|
||||
/// Encode an 8 bit displacement
|
||||
///
|
||||
/// It is sign-extended to 64 bits by the cpu.
|
||||
pub fn disp8(self: Self, disp: i8) void {
|
||||
self.code.appendAssumeCapacity(@bitCast(u8, disp));
|
||||
}
|
||||
|
||||
/// Encode an 16 bit immediate
|
||||
///
|
||||
/// It is sign-extended to 64 bits by the cpu.
|
||||
pub fn imm16(self: Self, imm: i16) void {
|
||||
self.writeIntLittle(i16, imm);
|
||||
}
|
||||
|
||||
/// Encode an 32 bit immediate
|
||||
///
|
||||
/// It is sign-extended to 64 bits by the cpu.
|
||||
pub fn imm32(self: Self, imm: i32) void {
|
||||
self.writeIntLittle(i32, imm);
|
||||
}
|
||||
|
||||
/// Encode an 32 bit displacement
|
||||
///
|
||||
/// It is sign-extended to 64 bits by the cpu.
|
||||
pub fn disp32(self: Self, disp: i32) void {
|
||||
self.writeIntLittle(i32, disp);
|
||||
}
|
||||
|
||||
/// Encode an 64 bit immediate
|
||||
///
|
||||
/// It is sign-extended to 64 bits by the cpu.
|
||||
pub fn imm64(self: Self, imm: u64) void {
|
||||
self.writeIntLittle(u64, imm);
|
||||
}
|
||||
};
|
||||
|
||||
test "x86_64 Encoder helpers" {
|
||||
var code = ArrayList(u8).init(testing.allocator);
|
||||
defer code.deinit();
|
||||
|
||||
// simple integer multiplication
|
||||
|
||||
// imul eax,edi
|
||||
// 0faf c7
|
||||
{
|
||||
try code.resize(0);
|
||||
const encoder = try Encoder.init(&code, 4);
|
||||
encoder.rex(.{
|
||||
.r = Register.eax.isExtended(),
|
||||
.b = Register.edi.isExtended(),
|
||||
});
|
||||
encoder.opcode_2byte(0x0f, 0xaf);
|
||||
encoder.modRm_direct(
|
||||
Register.eax.low_id(),
|
||||
Register.edi.low_id(),
|
||||
);
|
||||
|
||||
testing.expectEqualSlices(u8, &[_]u8{ 0x0f, 0xaf, 0xc7 }, code.items);
|
||||
}
|
||||
|
||||
// simple mov
|
||||
|
||||
// mov eax,edi
|
||||
// 89 f8
|
||||
{
|
||||
try code.resize(0);
|
||||
const encoder = try Encoder.init(&code, 3);
|
||||
encoder.rex(.{
|
||||
.r = Register.edi.isExtended(),
|
||||
.b = Register.eax.isExtended(),
|
||||
});
|
||||
encoder.opcode_1byte(0x89);
|
||||
encoder.modRm_direct(
|
||||
Register.edi.low_id(),
|
||||
Register.eax.low_id(),
|
||||
);
|
||||
|
||||
testing.expectEqualSlices(u8, &[_]u8{ 0x89, 0xf8 }, code.items);
|
||||
}
|
||||
|
||||
// signed integer addition of 32-bit sign extended immediate to 64 bit register
|
||||
|
||||
// add rcx, 2147483647
|
||||
//
|
||||
// Using the following opcode: REX.W + 81 /0 id, we expect the following encoding
|
||||
//
|
||||
// 48 : REX.W set for 64 bit operand (*r*cx)
|
||||
// 81 : opcode for "<arithmetic> with immediate"
|
||||
// c1 : id = rcx,
|
||||
// : c1 = 11 <-- mod = 11 indicates r/m is register (rcx)
|
||||
// : 000 <-- opcode_extension = 0 because opcode extension is /0. /0 specifies ADD
|
||||
// : 001 <-- 001 is rcx
|
||||
// ffffff7f : 2147483647
|
||||
{
|
||||
try code.resize(0);
|
||||
const encoder = try Encoder.init(&code, 7);
|
||||
encoder.rex(.{ .w = true }); // use 64 bit operation
|
||||
encoder.opcode_1byte(0x81);
|
||||
encoder.modRm_direct(
|
||||
0,
|
||||
Register.rcx.low_id(),
|
||||
);
|
||||
encoder.imm32(2147483647);
|
||||
|
||||
testing.expectEqualSlices(u8, &[_]u8{ 0x48, 0x81, 0xc1, 0xff, 0xff, 0xff, 0x7f }, code.items);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO add these registers to the enum and populate dwarfLocOp
|
||||
// // Return Address register. This is stored in `0(%rsp, "")` and is not a physical register.
|
||||
// RA = (16, "RA"),
|
||||
|
||||
@ -318,6 +318,81 @@ pub fn addCases(ctx: *TestContext) !void {
|
||||
, &[_][]const u8{":2:15: error: incompatible types: 'bool' and 'comptime_int'"});
|
||||
}
|
||||
|
||||
{
|
||||
var case = ctx.exe("multiplying numbers at runtime and comptime", linux_x64);
|
||||
case.addCompareOutput(
|
||||
\\export fn _start() noreturn {
|
||||
\\ mul(3, 4);
|
||||
\\
|
||||
\\ exit();
|
||||
\\}
|
||||
\\
|
||||
\\fn mul(a: u32, b: u32) void {
|
||||
\\ if (a * b != 12) unreachable;
|
||||
\\}
|
||||
\\
|
||||
\\fn exit() noreturn {
|
||||
\\ asm volatile ("syscall"
|
||||
\\ :
|
||||
\\ : [number] "{rax}" (231),
|
||||
\\ [arg1] "{rdi}" (0)
|
||||
\\ : "rcx", "r11", "memory"
|
||||
\\ );
|
||||
\\ unreachable;
|
||||
\\}
|
||||
,
|
||||
"",
|
||||
);
|
||||
// comptime function call
|
||||
case.addCompareOutput(
|
||||
\\export fn _start() noreturn {
|
||||
\\ exit();
|
||||
\\}
|
||||
\\
|
||||
\\fn mul(a: u32, b: u32) u32 {
|
||||
\\ return a * b;
|
||||
\\}
|
||||
\\
|
||||
\\const x = mul(3, 4);
|
||||
\\
|
||||
\\fn exit() noreturn {
|
||||
\\ asm volatile ("syscall"
|
||||
\\ :
|
||||
\\ : [number] "{rax}" (231),
|
||||
\\ [arg1] "{rdi}" (x - 12)
|
||||
\\ : "rcx", "r11", "memory"
|
||||
\\ );
|
||||
\\ unreachable;
|
||||
\\}
|
||||
,
|
||||
"",
|
||||
);
|
||||
// Inline function call
|
||||
case.addCompareOutput(
|
||||
\\export fn _start() noreturn {
|
||||
\\ var x: usize = 5;
|
||||
\\ const y = mul(2, 3, x);
|
||||
\\ exit(y - 30);
|
||||
\\}
|
||||
\\
|
||||
\\fn mul(a: usize, b: usize, c: usize) callconv(.Inline) usize {
|
||||
\\ return a * b * c;
|
||||
\\}
|
||||
\\
|
||||
\\fn exit(code: usize) noreturn {
|
||||
\\ asm volatile ("syscall"
|
||||
\\ :
|
||||
\\ : [number] "{rax}" (231),
|
||||
\\ [arg1] "{rdi}" (code)
|
||||
\\ : "rcx", "r11", "memory"
|
||||
\\ );
|
||||
\\ unreachable;
|
||||
\\}
|
||||
,
|
||||
"",
|
||||
);
|
||||
}
|
||||
|
||||
{
|
||||
var case = ctx.exe("assert function", linux_x64);
|
||||
case.addCompareOutput(
|
||||
@ -700,7 +775,8 @@ pub fn addCases(ctx: *TestContext) !void {
|
||||
// Spilling registers to the stack.
|
||||
case.addCompareOutput(
|
||||
\\export fn _start() noreturn {
|
||||
\\ assert(add(3, 4) == 791);
|
||||
\\ assert(add(3, 4) == 1221);
|
||||
\\ assert(mul(3, 4) == 21609);
|
||||
\\
|
||||
\\ exit();
|
||||
\\}
|
||||
@ -716,19 +792,47 @@ pub fn addCases(ctx: *TestContext) !void {
|
||||
\\ const i = g + h; // 100
|
||||
\\ const j = i + d; // 110
|
||||
\\ const k = i + j; // 210
|
||||
\\ const l = k + c; // 217
|
||||
\\ const m = l + d; // 227
|
||||
\\ const n = m + e; // 241
|
||||
\\ const o = n + f; // 265
|
||||
\\ const p = o + g; // 303
|
||||
\\ const q = p + h; // 365
|
||||
\\ const r = q + i; // 465
|
||||
\\ const s = r + j; // 575
|
||||
\\ const t = s + k; // 785
|
||||
\\ break :blk t;
|
||||
\\ const l = j + k; // 320
|
||||
\\ const m = l + c; // 327
|
||||
\\ const n = m + d; // 337
|
||||
\\ const o = n + e; // 351
|
||||
\\ const p = o + f; // 375
|
||||
\\ const q = p + g; // 413
|
||||
\\ const r = q + h; // 475
|
||||
\\ const s = r + i; // 575
|
||||
\\ const t = s + j; // 685
|
||||
\\ const u = t + k; // 895
|
||||
\\ const v = u + l; // 1215
|
||||
\\ break :blk v;
|
||||
\\ };
|
||||
\\ const y = x + a; // 788
|
||||
\\ const z = y + a; // 791
|
||||
\\ const y = x + a; // 1218
|
||||
\\ const z = y + a; // 1221
|
||||
\\ return z;
|
||||
\\}
|
||||
\\
|
||||
\\fn mul(a: u32, b: u32) u32 {
|
||||
\\ const x: u32 = blk: {
|
||||
\\ const c = a * a * a * a; // 81
|
||||
\\ const d = a * a * a * b; // 108
|
||||
\\ const e = a * a * b * a; // 108
|
||||
\\ const f = a * a * b * b; // 144
|
||||
\\ const g = a * b * a * a; // 108
|
||||
\\ const h = a * b * a * b; // 144
|
||||
\\ const i = a * b * b * a; // 144
|
||||
\\ const j = a * b * b * b; // 192
|
||||
\\ const k = b * a * a * a; // 108
|
||||
\\ const l = b * a * a * b; // 144
|
||||
\\ const m = b * a * b * a; // 144
|
||||
\\ const n = b * a * b * b; // 192
|
||||
\\ const o = b * b * a * a; // 144
|
||||
\\ const p = b * b * a * b; // 192
|
||||
\\ const q = b * b * b * a; // 192
|
||||
\\ const r = b * b * b * b; // 256
|
||||
\\ const s = c + d + e + f + g + h + i + j + k + l + m + n + o + p + q + r; // 2401
|
||||
\\ break :blk s;
|
||||
\\ };
|
||||
\\ const y = x * a; // 7203
|
||||
\\ const z = y * a; // 21609
|
||||
\\ return z;
|
||||
\\}
|
||||
\\
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user