stage2 x86_64: implement integer mul

This was also an experiment to see if it were easier to implement a new
feature when using the instruction encoder.

Verdict: It's not that much easier, but I think it's certainly much more
readable, because the description of the Instruction annotates what each
field means. Right now, precise knowledge of x86_64 instructions is
still required because things like when to set the 64-bit flag, how to
read x86_64 instruction references, etc. are still not automatically
done for you.

In the future, this interface might make it sligtly easier to write an
assembler for x86_64, by abstracting the bit-fiddling aspects of
instruction encoding.
This commit is contained in:
gracefu 2021-04-09 13:51:00 +08:00
parent 5bd464e386
commit c4b83ea021
No known key found for this signature in database
GPG Key ID: 2B0D39CC4E035325
4 changed files with 318 additions and 0 deletions

View File

@ -4330,6 +4330,33 @@ pub fn intSub(allocator: *Allocator, lhs: Value, rhs: Value) !Value {
}
}
pub fn intMul(allocator: *Allocator, lhs: Value, rhs: Value) !Value {
// TODO is this a performance issue? maybe we should try the operation without
// resorting to BigInt first.
var lhs_space: Value.BigIntSpace = undefined;
var rhs_space: Value.BigIntSpace = undefined;
const lhs_bigint = lhs.toBigInt(&lhs_space);
const rhs_bigint = rhs.toBigInt(&rhs_space);
const limbs = try allocator.alloc(
std.math.big.Limb,
lhs_bigint.limbs.len + rhs_bigint.limbs.len + 1,
);
var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined };
var limbs_buffer = try allocator.alloc(
std.math.big.Limb,
std.math.big.int.calcMulLimbsBufferLen(lhs_bigint.limbs.len, rhs_bigint.limbs.len, 1),
);
defer allocator.free(limbs_buffer);
result_bigint.mul(lhs_bigint, rhs_bigint, limbs_buffer, allocator);
const result_limbs = result_bigint.limbs[0..result_bigint.len];
if (result_bigint.positive) {
return Value.Tag.int_big_positive.create(allocator, result_limbs);
} else {
return Value.Tag.int_big_negative.create(allocator, result_limbs);
}
}
pub fn floatAdd(
arena: *Allocator,
float_type: Type,
@ -4396,6 +4423,39 @@ pub fn floatSub(
}
}
pub fn floatMul(
arena: *Allocator,
float_type: Type,
src: LazySrcLoc,
lhs: Value,
rhs: Value,
) !Value {
switch (float_type.tag()) {
.f16 => {
@panic("TODO add __trunctfhf2 to compiler-rt");
//const lhs_val = lhs.toFloat(f16);
//const rhs_val = rhs.toFloat(f16);
//return Value.Tag.float_16.create(arena, lhs_val * rhs_val);
},
.f32 => {
const lhs_val = lhs.toFloat(f32);
const rhs_val = rhs.toFloat(f32);
return Value.Tag.float_32.create(arena, lhs_val * rhs_val);
},
.f64 => {
const lhs_val = lhs.toFloat(f64);
const rhs_val = rhs.toFloat(f64);
return Value.Tag.float_64.create(arena, lhs_val * rhs_val);
},
.f128, .comptime_float, .c_longdouble => {
const lhs_val = lhs.toFloat(f128);
const rhs_val = rhs.toFloat(f128);
return Value.Tag.float_128.create(arena, lhs_val * rhs_val);
},
else => unreachable,
}
}
pub fn simplePtrType(
mod: *Module,
arena: *Allocator,

View File

@ -3885,6 +3885,13 @@ fn analyzeArithmetic(
try Module.floatSub(sema.arena, scalar_type, src, lhs_val, rhs_val);
break :blk val;
},
.mul => blk: {
const val = if (is_int)
try Module.intMul(sema.arena, lhs_val, rhs_val)
else
try Module.floatMul(sema.arena, scalar_type, src, lhs_val, rhs_val);
break :blk val;
},
else => return sema.mod.fail(&block.base, src, "TODO Implement arithmetic operand '{s}'", .{@tagName(zir_tag)}),
};

View File

@ -1079,6 +1079,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
if (inst.base.isUnused())
return MCValue.dead;
switch (arch) {
.x86_64 => return try self.genX8664BinMath(&inst.base, inst.lhs, inst.rhs),
.arm, .armeb => return try self.genArmMul(&inst.base, inst.lhs, inst.rhs),
else => return self.fail(inst.base.src, "TODO implement mul for {}", .{self.target.cpu.arch}),
}
@ -1574,6 +1575,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
.sub, .subwrap => try self.genX8664BinMathCode(inst.src, inst.ty, dst_mcv, src_mcv, 5, 0x28),
.xor, .not => try self.genX8664BinMathCode(inst.src, inst.ty, dst_mcv, src_mcv, 6, 0x30),
.mul, .mulwrap => try self.genX8664Imul(inst.src, inst.ty, dst_mcv, src_mcv),
else => unreachable,
}
@ -1795,6 +1797,153 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
}
}
/// Performs integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv.
fn genX8664Imul(
self: *Self,
src: LazySrcLoc,
dst_ty: Type,
dst_mcv: MCValue,
src_mcv: MCValue,
) !void {
switch (dst_mcv) {
.none => unreachable,
.undef => unreachable,
.dead, .unreach, .immediate => unreachable,
.compare_flags_unsigned => unreachable,
.compare_flags_signed => unreachable,
.ptr_stack_offset => unreachable,
.ptr_embedded_in_code => unreachable,
.register => |dst_reg| {
switch (src_mcv) {
.none => unreachable,
.undef => try self.genSetReg(src, dst_ty, dst_reg, .undef),
.dead, .unreach => unreachable,
.ptr_stack_offset => unreachable,
.ptr_embedded_in_code => unreachable,
.register => |src_reg| {
// register, register
//
// Use the following imul opcode
// 0F AF /r: IMUL r32/64, r/m32/64
try self.encodeX8664Instruction(src, Instruction{
.operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
.primary_opcode_2b = 0xaf,
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
// https://github.com/ziglang/zig/issues/6515
.modrm = @as(
?Instruction.ModrmEffectiveAddress,
Instruction.ModrmEffectiveAddress{ .reg = src_reg },
),
.reg = dst_reg,
});
},
.immediate => |imm| {
// register, immediate:
// depends on size of immediate.
//
// immediate fits in i8:
// 6B /r ib: IMUL r32/64, r/m32/64, imm8
//
// immediate fits in i32:
// 69 /r id: IMUL r32/64, r/m32/64, imm32
//
// immediate is huge:
// split into 2 instructions
// 1) copy the 64 bit immediate into a tmp register
// 2) perform register,register mul
// 0F AF /r: IMUL r32/64, r/m32/64
if (math.minInt(i8) <= imm and imm <= math.maxInt(i8)) {
try self.encodeX8664Instruction(src, Instruction{
.operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
.primary_opcode_1b = 0x6B,
.reg = dst_reg,
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
// https://github.com/ziglang/zig/issues/6515
.modrm = @as(
?Instruction.ModrmEffectiveAddress,
Instruction.ModrmEffectiveAddress{ .reg = dst_reg },
),
.immediate_bytes = 1,
.immediate = imm,
});
} else if (math.minInt(i32) <= imm and imm <= math.maxInt(i32)) {
try self.encodeX8664Instruction(src, Instruction{
.operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
.primary_opcode_1b = 0x69,
.reg = dst_reg,
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
// https://github.com/ziglang/zig/issues/6515
.modrm = @as(
?Instruction.ModrmEffectiveAddress,
Instruction.ModrmEffectiveAddress{ .reg = dst_reg },
),
.immediate_bytes = 4,
.immediate = imm,
});
} else {
const src_reg = try self.copyToTmpRegister(src, dst_ty, src_mcv);
return self.genX8664Imul(src, dst_ty, dst_mcv, MCValue{ .register = src_reg });
}
},
.embedded_in_code, .memory, .stack_offset => {
return self.fail(src, "TODO implement x86 multiply source memory", .{});
},
.compare_flags_unsigned => {
return self.fail(src, "TODO implement x86 multiply source compare flag (unsigned)", .{});
},
.compare_flags_signed => {
return self.fail(src, "TODO implement x86 multiply source compare flag (signed)", .{});
},
}
},
.stack_offset => |off| {
switch (src_mcv) {
.none => unreachable,
.undef => return self.genSetStack(src, dst_ty, off, .undef),
.dead, .unreach => unreachable,
.ptr_stack_offset => unreachable,
.ptr_embedded_in_code => unreachable,
.register => |src_reg| {
// copy dst to a register
const dst_reg = try self.copyToTmpRegister(src, dst_ty, dst_mcv);
// multiply into dst_reg
// register, register
// Use the following imul opcode
// 0F AF /r: IMUL r32/64, r/m32/64
try self.encodeX8664Instruction(src, Instruction{
.operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
.primary_opcode_2b = 0xaf,
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
// https://github.com/ziglang/zig/issues/6515
.modrm = @as(
?Instruction.ModrmEffectiveAddress,
Instruction.ModrmEffectiveAddress{ .reg = src_reg },
),
.reg = dst_reg,
});
// copy dst_reg back out
return self.genSetStack(src, dst_ty, off, MCValue{ .register = dst_reg });
},
.immediate => |imm| {
return self.fail(src, "TODO implement x86 multiply source immediate", .{});
},
.embedded_in_code, .memory, .stack_offset => {
return self.fail(src, "TODO implement x86 multiply source memory", .{});
},
.compare_flags_unsigned => {
return self.fail(src, "TODO implement x86 multiply source compare flag (unsigned)", .{});
},
.compare_flags_signed => {
return self.fail(src, "TODO implement x86 multiply source compare flag (signed)", .{});
},
}
},
.embedded_in_code, .memory => {
return self.fail(src, "TODO implement x86 multiply destination memory", .{});
},
}
}
fn genX8664ModRMRegToStack(self: *Self, src: LazySrcLoc, ty: Type, off: u32, reg: Register, opcode: u8) !void {
const abi_size = ty.abiSize(self.target.*);
const adj_off = off + abi_size;

View File

@ -358,6 +358,81 @@ pub fn addCases(ctx: *TestContext) !void {
, &[_][]const u8{":2:15: error: incompatible types: 'bool' and 'comptime_int'"});
}
{
var case = ctx.exe("multiplying numbers at runtime and comptime", linux_x64);
case.addCompareOutput(
\\export fn _start() noreturn {
\\ mul(3, 4);
\\
\\ exit();
\\}
\\
\\fn mul(a: u32, b: u32) void {
\\ if (a * b != 12) unreachable;
\\}
\\
\\fn exit() noreturn {
\\ asm volatile ("syscall"
\\ :
\\ : [number] "{rax}" (231),
\\ [arg1] "{rdi}" (0)
\\ : "rcx", "r11", "memory"
\\ );
\\ unreachable;
\\}
,
"",
);
// comptime function call
case.addCompareOutput(
\\export fn _start() noreturn {
\\ exit();
\\}
\\
\\fn mul(a: u32, b: u32) u32 {
\\ return a * b;
\\}
\\
\\const x = mul(3, 4);
\\
\\fn exit() noreturn {
\\ asm volatile ("syscall"
\\ :
\\ : [number] "{rax}" (231),
\\ [arg1] "{rdi}" (x - 12)
\\ : "rcx", "r11", "memory"
\\ );
\\ unreachable;
\\}
,
"",
);
// Inline function call
case.addCompareOutput(
\\export fn _start() noreturn {
\\ var x: usize = 5;
\\ const y = mul(2, 3, x);
\\ exit(y - 30);
\\}
\\
\\fn mul(a: usize, b: usize, c: usize) callconv(.Inline) usize {
\\ return a * b * c;
\\}
\\
\\fn exit(code: usize) noreturn {
\\ asm volatile ("syscall"
\\ :
\\ : [number] "{rax}" (231),
\\ [arg1] "{rdi}" (code)
\\ : "rcx", "r11", "memory"
\\ );
\\ unreachable;
\\}
,
"",
);
}
{
var case = ctx.exe("assert function", linux_x64);
case.addCompareOutput(
@ -741,6 +816,7 @@ pub fn addCases(ctx: *TestContext) !void {
case.addCompareOutput(
\\export fn _start() noreturn {
\\ assert(add(3, 4) == 1221);
\\ assert(mul(3, 4) == 21609);
\\
\\ exit();
\\}
@ -774,6 +850,32 @@ pub fn addCases(ctx: *TestContext) !void {
\\ return z;
\\}
\\
\\fn mul(a: u32, b: u32) u32 {
\\ const x: u32 = blk: {
\\ const c = a * a * a * a; // 81
\\ const d = a * a * a * b; // 108
\\ const e = a * a * b * a; // 108
\\ const f = a * a * b * b; // 144
\\ const g = a * b * a * a; // 108
\\ const h = a * b * a * b; // 144
\\ const i = a * b * b * a; // 144
\\ const j = a * b * b * b; // 192
\\ const k = b * a * a * a; // 108
\\ const l = b * a * a * b; // 144
\\ const m = b * a * b * a; // 144
\\ const n = b * a * b * b; // 192
\\ const o = b * b * a * a; // 144
\\ const p = b * b * a * b; // 192
\\ const q = b * b * b * a; // 192
\\ const r = b * b * b * b; // 256
\\ const s = c + d + e + f + g + h + i + j + k + l + m + n + o + p + q + r; // 2401
\\ break :blk s;
\\ };
\\ const y = x * a; // 7203
\\ const z = y * a; // 21609
\\ return z;
\\}
\\
\\pub fn assert(ok: bool) void {
\\ if (!ok) unreachable; // assertion failure
\\}