mirror of
https://github.com/ziglang/zig.git
synced 2025-12-29 17:43:17 +00:00
stage2 x86_64: implement integer mul
This was also an experiment to see if it were easier to implement a new feature when using the instruction encoder. Verdict: It's not that much easier, but I think it's certainly much more readable, because the description of the Instruction annotates what each field means. Right now, precise knowledge of x86_64 instructions is still required because things like when to set the 64-bit flag, how to read x86_64 instruction references, etc. are still not automatically done for you. In the future, this interface might make it sligtly easier to write an assembler for x86_64, by abstracting the bit-fiddling aspects of instruction encoding.
This commit is contained in:
parent
5bd464e386
commit
c4b83ea021
@ -4330,6 +4330,33 @@ pub fn intSub(allocator: *Allocator, lhs: Value, rhs: Value) !Value {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn intMul(allocator: *Allocator, lhs: Value, rhs: Value) !Value {
|
||||
// TODO is this a performance issue? maybe we should try the operation without
|
||||
// resorting to BigInt first.
|
||||
var lhs_space: Value.BigIntSpace = undefined;
|
||||
var rhs_space: Value.BigIntSpace = undefined;
|
||||
const lhs_bigint = lhs.toBigInt(&lhs_space);
|
||||
const rhs_bigint = rhs.toBigInt(&rhs_space);
|
||||
const limbs = try allocator.alloc(
|
||||
std.math.big.Limb,
|
||||
lhs_bigint.limbs.len + rhs_bigint.limbs.len + 1,
|
||||
);
|
||||
var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined };
|
||||
var limbs_buffer = try allocator.alloc(
|
||||
std.math.big.Limb,
|
||||
std.math.big.int.calcMulLimbsBufferLen(lhs_bigint.limbs.len, rhs_bigint.limbs.len, 1),
|
||||
);
|
||||
defer allocator.free(limbs_buffer);
|
||||
result_bigint.mul(lhs_bigint, rhs_bigint, limbs_buffer, allocator);
|
||||
const result_limbs = result_bigint.limbs[0..result_bigint.len];
|
||||
|
||||
if (result_bigint.positive) {
|
||||
return Value.Tag.int_big_positive.create(allocator, result_limbs);
|
||||
} else {
|
||||
return Value.Tag.int_big_negative.create(allocator, result_limbs);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn floatAdd(
|
||||
arena: *Allocator,
|
||||
float_type: Type,
|
||||
@ -4396,6 +4423,39 @@ pub fn floatSub(
|
||||
}
|
||||
}
|
||||
|
||||
pub fn floatMul(
|
||||
arena: *Allocator,
|
||||
float_type: Type,
|
||||
src: LazySrcLoc,
|
||||
lhs: Value,
|
||||
rhs: Value,
|
||||
) !Value {
|
||||
switch (float_type.tag()) {
|
||||
.f16 => {
|
||||
@panic("TODO add __trunctfhf2 to compiler-rt");
|
||||
//const lhs_val = lhs.toFloat(f16);
|
||||
//const rhs_val = rhs.toFloat(f16);
|
||||
//return Value.Tag.float_16.create(arena, lhs_val * rhs_val);
|
||||
},
|
||||
.f32 => {
|
||||
const lhs_val = lhs.toFloat(f32);
|
||||
const rhs_val = rhs.toFloat(f32);
|
||||
return Value.Tag.float_32.create(arena, lhs_val * rhs_val);
|
||||
},
|
||||
.f64 => {
|
||||
const lhs_val = lhs.toFloat(f64);
|
||||
const rhs_val = rhs.toFloat(f64);
|
||||
return Value.Tag.float_64.create(arena, lhs_val * rhs_val);
|
||||
},
|
||||
.f128, .comptime_float, .c_longdouble => {
|
||||
const lhs_val = lhs.toFloat(f128);
|
||||
const rhs_val = rhs.toFloat(f128);
|
||||
return Value.Tag.float_128.create(arena, lhs_val * rhs_val);
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn simplePtrType(
|
||||
mod: *Module,
|
||||
arena: *Allocator,
|
||||
|
||||
@ -3885,6 +3885,13 @@ fn analyzeArithmetic(
|
||||
try Module.floatSub(sema.arena, scalar_type, src, lhs_val, rhs_val);
|
||||
break :blk val;
|
||||
},
|
||||
.mul => blk: {
|
||||
const val = if (is_int)
|
||||
try Module.intMul(sema.arena, lhs_val, rhs_val)
|
||||
else
|
||||
try Module.floatMul(sema.arena, scalar_type, src, lhs_val, rhs_val);
|
||||
break :blk val;
|
||||
},
|
||||
else => return sema.mod.fail(&block.base, src, "TODO Implement arithmetic operand '{s}'", .{@tagName(zir_tag)}),
|
||||
};
|
||||
|
||||
|
||||
149
src/codegen.zig
149
src/codegen.zig
@ -1079,6 +1079,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
if (inst.base.isUnused())
|
||||
return MCValue.dead;
|
||||
switch (arch) {
|
||||
.x86_64 => return try self.genX8664BinMath(&inst.base, inst.lhs, inst.rhs),
|
||||
.arm, .armeb => return try self.genArmMul(&inst.base, inst.lhs, inst.rhs),
|
||||
else => return self.fail(inst.base.src, "TODO implement mul for {}", .{self.target.cpu.arch}),
|
||||
}
|
||||
@ -1574,6 +1575,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
.sub, .subwrap => try self.genX8664BinMathCode(inst.src, inst.ty, dst_mcv, src_mcv, 5, 0x28),
|
||||
.xor, .not => try self.genX8664BinMathCode(inst.src, inst.ty, dst_mcv, src_mcv, 6, 0x30),
|
||||
|
||||
.mul, .mulwrap => try self.genX8664Imul(inst.src, inst.ty, dst_mcv, src_mcv),
|
||||
else => unreachable,
|
||||
}
|
||||
|
||||
@ -1795,6 +1797,153 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
|
||||
}
|
||||
}
|
||||
|
||||
/// Performs integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv.
|
||||
fn genX8664Imul(
|
||||
self: *Self,
|
||||
src: LazySrcLoc,
|
||||
dst_ty: Type,
|
||||
dst_mcv: MCValue,
|
||||
src_mcv: MCValue,
|
||||
) !void {
|
||||
switch (dst_mcv) {
|
||||
.none => unreachable,
|
||||
.undef => unreachable,
|
||||
.dead, .unreach, .immediate => unreachable,
|
||||
.compare_flags_unsigned => unreachable,
|
||||
.compare_flags_signed => unreachable,
|
||||
.ptr_stack_offset => unreachable,
|
||||
.ptr_embedded_in_code => unreachable,
|
||||
.register => |dst_reg| {
|
||||
switch (src_mcv) {
|
||||
.none => unreachable,
|
||||
.undef => try self.genSetReg(src, dst_ty, dst_reg, .undef),
|
||||
.dead, .unreach => unreachable,
|
||||
.ptr_stack_offset => unreachable,
|
||||
.ptr_embedded_in_code => unreachable,
|
||||
.register => |src_reg| {
|
||||
// register, register
|
||||
//
|
||||
// Use the following imul opcode
|
||||
// 0F AF /r: IMUL r32/64, r/m32/64
|
||||
try self.encodeX8664Instruction(src, Instruction{
|
||||
.operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
|
||||
.primary_opcode_2b = 0xaf,
|
||||
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
|
||||
// https://github.com/ziglang/zig/issues/6515
|
||||
.modrm = @as(
|
||||
?Instruction.ModrmEffectiveAddress,
|
||||
Instruction.ModrmEffectiveAddress{ .reg = src_reg },
|
||||
),
|
||||
.reg = dst_reg,
|
||||
});
|
||||
},
|
||||
.immediate => |imm| {
|
||||
// register, immediate:
|
||||
// depends on size of immediate.
|
||||
//
|
||||
// immediate fits in i8:
|
||||
// 6B /r ib: IMUL r32/64, r/m32/64, imm8
|
||||
//
|
||||
// immediate fits in i32:
|
||||
// 69 /r id: IMUL r32/64, r/m32/64, imm32
|
||||
//
|
||||
// immediate is huge:
|
||||
// split into 2 instructions
|
||||
// 1) copy the 64 bit immediate into a tmp register
|
||||
// 2) perform register,register mul
|
||||
// 0F AF /r: IMUL r32/64, r/m32/64
|
||||
if (math.minInt(i8) <= imm and imm <= math.maxInt(i8)) {
|
||||
try self.encodeX8664Instruction(src, Instruction{
|
||||
.operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
|
||||
.primary_opcode_1b = 0x6B,
|
||||
.reg = dst_reg,
|
||||
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
|
||||
// https://github.com/ziglang/zig/issues/6515
|
||||
.modrm = @as(
|
||||
?Instruction.ModrmEffectiveAddress,
|
||||
Instruction.ModrmEffectiveAddress{ .reg = dst_reg },
|
||||
),
|
||||
.immediate_bytes = 1,
|
||||
.immediate = imm,
|
||||
});
|
||||
} else if (math.minInt(i32) <= imm and imm <= math.maxInt(i32)) {
|
||||
try self.encodeX8664Instruction(src, Instruction{
|
||||
.operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
|
||||
.primary_opcode_1b = 0x69,
|
||||
.reg = dst_reg,
|
||||
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
|
||||
// https://github.com/ziglang/zig/issues/6515
|
||||
.modrm = @as(
|
||||
?Instruction.ModrmEffectiveAddress,
|
||||
Instruction.ModrmEffectiveAddress{ .reg = dst_reg },
|
||||
),
|
||||
.immediate_bytes = 4,
|
||||
.immediate = imm,
|
||||
});
|
||||
} else {
|
||||
const src_reg = try self.copyToTmpRegister(src, dst_ty, src_mcv);
|
||||
return self.genX8664Imul(src, dst_ty, dst_mcv, MCValue{ .register = src_reg });
|
||||
}
|
||||
},
|
||||
.embedded_in_code, .memory, .stack_offset => {
|
||||
return self.fail(src, "TODO implement x86 multiply source memory", .{});
|
||||
},
|
||||
.compare_flags_unsigned => {
|
||||
return self.fail(src, "TODO implement x86 multiply source compare flag (unsigned)", .{});
|
||||
},
|
||||
.compare_flags_signed => {
|
||||
return self.fail(src, "TODO implement x86 multiply source compare flag (signed)", .{});
|
||||
},
|
||||
}
|
||||
},
|
||||
.stack_offset => |off| {
|
||||
switch (src_mcv) {
|
||||
.none => unreachable,
|
||||
.undef => return self.genSetStack(src, dst_ty, off, .undef),
|
||||
.dead, .unreach => unreachable,
|
||||
.ptr_stack_offset => unreachable,
|
||||
.ptr_embedded_in_code => unreachable,
|
||||
.register => |src_reg| {
|
||||
// copy dst to a register
|
||||
const dst_reg = try self.copyToTmpRegister(src, dst_ty, dst_mcv);
|
||||
// multiply into dst_reg
|
||||
// register, register
|
||||
// Use the following imul opcode
|
||||
// 0F AF /r: IMUL r32/64, r/m32/64
|
||||
try self.encodeX8664Instruction(src, Instruction{
|
||||
.operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
|
||||
.primary_opcode_2b = 0xaf,
|
||||
// TODO: Explicit optional wrap due to stage 1 miscompilation :(
|
||||
// https://github.com/ziglang/zig/issues/6515
|
||||
.modrm = @as(
|
||||
?Instruction.ModrmEffectiveAddress,
|
||||
Instruction.ModrmEffectiveAddress{ .reg = src_reg },
|
||||
),
|
||||
.reg = dst_reg,
|
||||
});
|
||||
// copy dst_reg back out
|
||||
return self.genSetStack(src, dst_ty, off, MCValue{ .register = dst_reg });
|
||||
},
|
||||
.immediate => |imm| {
|
||||
return self.fail(src, "TODO implement x86 multiply source immediate", .{});
|
||||
},
|
||||
.embedded_in_code, .memory, .stack_offset => {
|
||||
return self.fail(src, "TODO implement x86 multiply source memory", .{});
|
||||
},
|
||||
.compare_flags_unsigned => {
|
||||
return self.fail(src, "TODO implement x86 multiply source compare flag (unsigned)", .{});
|
||||
},
|
||||
.compare_flags_signed => {
|
||||
return self.fail(src, "TODO implement x86 multiply source compare flag (signed)", .{});
|
||||
},
|
||||
}
|
||||
},
|
||||
.embedded_in_code, .memory => {
|
||||
return self.fail(src, "TODO implement x86 multiply destination memory", .{});
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn genX8664ModRMRegToStack(self: *Self, src: LazySrcLoc, ty: Type, off: u32, reg: Register, opcode: u8) !void {
|
||||
const abi_size = ty.abiSize(self.target.*);
|
||||
const adj_off = off + abi_size;
|
||||
|
||||
@ -358,6 +358,81 @@ pub fn addCases(ctx: *TestContext) !void {
|
||||
, &[_][]const u8{":2:15: error: incompatible types: 'bool' and 'comptime_int'"});
|
||||
}
|
||||
|
||||
{
|
||||
var case = ctx.exe("multiplying numbers at runtime and comptime", linux_x64);
|
||||
case.addCompareOutput(
|
||||
\\export fn _start() noreturn {
|
||||
\\ mul(3, 4);
|
||||
\\
|
||||
\\ exit();
|
||||
\\}
|
||||
\\
|
||||
\\fn mul(a: u32, b: u32) void {
|
||||
\\ if (a * b != 12) unreachable;
|
||||
\\}
|
||||
\\
|
||||
\\fn exit() noreturn {
|
||||
\\ asm volatile ("syscall"
|
||||
\\ :
|
||||
\\ : [number] "{rax}" (231),
|
||||
\\ [arg1] "{rdi}" (0)
|
||||
\\ : "rcx", "r11", "memory"
|
||||
\\ );
|
||||
\\ unreachable;
|
||||
\\}
|
||||
,
|
||||
"",
|
||||
);
|
||||
// comptime function call
|
||||
case.addCompareOutput(
|
||||
\\export fn _start() noreturn {
|
||||
\\ exit();
|
||||
\\}
|
||||
\\
|
||||
\\fn mul(a: u32, b: u32) u32 {
|
||||
\\ return a * b;
|
||||
\\}
|
||||
\\
|
||||
\\const x = mul(3, 4);
|
||||
\\
|
||||
\\fn exit() noreturn {
|
||||
\\ asm volatile ("syscall"
|
||||
\\ :
|
||||
\\ : [number] "{rax}" (231),
|
||||
\\ [arg1] "{rdi}" (x - 12)
|
||||
\\ : "rcx", "r11", "memory"
|
||||
\\ );
|
||||
\\ unreachable;
|
||||
\\}
|
||||
,
|
||||
"",
|
||||
);
|
||||
// Inline function call
|
||||
case.addCompareOutput(
|
||||
\\export fn _start() noreturn {
|
||||
\\ var x: usize = 5;
|
||||
\\ const y = mul(2, 3, x);
|
||||
\\ exit(y - 30);
|
||||
\\}
|
||||
\\
|
||||
\\fn mul(a: usize, b: usize, c: usize) callconv(.Inline) usize {
|
||||
\\ return a * b * c;
|
||||
\\}
|
||||
\\
|
||||
\\fn exit(code: usize) noreturn {
|
||||
\\ asm volatile ("syscall"
|
||||
\\ :
|
||||
\\ : [number] "{rax}" (231),
|
||||
\\ [arg1] "{rdi}" (code)
|
||||
\\ : "rcx", "r11", "memory"
|
||||
\\ );
|
||||
\\ unreachable;
|
||||
\\}
|
||||
,
|
||||
"",
|
||||
);
|
||||
}
|
||||
|
||||
{
|
||||
var case = ctx.exe("assert function", linux_x64);
|
||||
case.addCompareOutput(
|
||||
@ -741,6 +816,7 @@ pub fn addCases(ctx: *TestContext) !void {
|
||||
case.addCompareOutput(
|
||||
\\export fn _start() noreturn {
|
||||
\\ assert(add(3, 4) == 1221);
|
||||
\\ assert(mul(3, 4) == 21609);
|
||||
\\
|
||||
\\ exit();
|
||||
\\}
|
||||
@ -774,6 +850,32 @@ pub fn addCases(ctx: *TestContext) !void {
|
||||
\\ return z;
|
||||
\\}
|
||||
\\
|
||||
\\fn mul(a: u32, b: u32) u32 {
|
||||
\\ const x: u32 = blk: {
|
||||
\\ const c = a * a * a * a; // 81
|
||||
\\ const d = a * a * a * b; // 108
|
||||
\\ const e = a * a * b * a; // 108
|
||||
\\ const f = a * a * b * b; // 144
|
||||
\\ const g = a * b * a * a; // 108
|
||||
\\ const h = a * b * a * b; // 144
|
||||
\\ const i = a * b * b * a; // 144
|
||||
\\ const j = a * b * b * b; // 192
|
||||
\\ const k = b * a * a * a; // 108
|
||||
\\ const l = b * a * a * b; // 144
|
||||
\\ const m = b * a * b * a; // 144
|
||||
\\ const n = b * a * b * b; // 192
|
||||
\\ const o = b * b * a * a; // 144
|
||||
\\ const p = b * b * a * b; // 192
|
||||
\\ const q = b * b * b * a; // 192
|
||||
\\ const r = b * b * b * b; // 256
|
||||
\\ const s = c + d + e + f + g + h + i + j + k + l + m + n + o + p + q + r; // 2401
|
||||
\\ break :blk s;
|
||||
\\ };
|
||||
\\ const y = x * a; // 7203
|
||||
\\ const z = y * a; // 21609
|
||||
\\ return z;
|
||||
\\}
|
||||
\\
|
||||
\\pub fn assert(ok: bool) void {
|
||||
\\ if (!ok) unreachable; // assertion failure
|
||||
\\}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user