stage2 ARM: implement mul_with_overflow for ints <= 16 bits

2026-02-20 00:08:56 +00:00 · 2022-03-31 18:25:53 +02:00 · 2022-03-31 18:25:53 +02:00 · c4778fc029
commit c4778fc029
parent 77e70189f4
5 changed files with 120 additions and 5 deletions
--- a/src/arch/arm/CodeGen.zig
+++ b/src/arch/arm/CodeGen.zig
@ -1452,8 +1452,63 @@ fn airOverflow(self: *Self, inst: Air.Inst.Index) !void {
 }

 fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
-    _ = inst;
-    return self.fail("TODO implement airMulWithOverflow for {}", .{self.target.cpu.arch});
+    const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
+    const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
+    if (self.liveness.isUnused(inst)) return self.finishAir(inst, .dead, .{ extra.lhs, extra.rhs, .none });
+    const result: MCValue = result: {
+        const lhs = try self.resolveInst(extra.lhs);
+        const rhs = try self.resolveInst(extra.rhs);
+        const lhs_ty = self.air.typeOf(extra.lhs);
+        const rhs_ty = self.air.typeOf(extra.rhs);
+
+        const tuple_ty = self.air.typeOfIndex(inst);
+        const tuple_size = @intCast(u32, tuple_ty.abiSize(self.target.*));
+        const tuple_align = tuple_ty.abiAlignment(self.target.*);
+        const overflow_bit_offset = @intCast(u32, tuple_ty.structFieldOffset(1, self.target.*));
+
+        switch (lhs_ty.zigTypeTag()) {
+            .Vector => return self.fail("TODO implement mul_with_overflow for vectors", .{}),
+            .Int => {
+                assert(lhs_ty.eql(rhs_ty, self.target.*));
+                const int_info = lhs_ty.intInfo(self.target.*);
+                if (int_info.bits <= 16) {
+                    const stack_offset = try self.allocMem(inst, tuple_size, tuple_align);
+
+                    try self.spillCompareFlagsIfOccupied();
+                    self.compare_flags_inst = null;
+
+                    const base_tag: Mir.Inst.Tag = switch (int_info.signedness) {
+                        .signed => .smulbb,
+                        .unsigned => .mul,
+                    };
+
+                    const dest = try self.binOpRegister(base_tag, null, lhs, rhs, lhs_ty, rhs_ty);
+                    const dest_reg = dest.register;
+                    self.register_manager.freezeRegs(&.{dest_reg});
+                    defer self.register_manager.unfreezeRegs(&.{dest_reg});
+
+                    const truncated_reg = try self.register_manager.allocReg(null);
+                    self.register_manager.freezeRegs(&.{truncated_reg});
+                    defer self.register_manager.unfreezeRegs(&.{truncated_reg});
+
+                    // sbfx/ubfx truncated, dest, #0, #bits
+                    try self.truncRegister(dest_reg, truncated_reg, int_info.signedness, int_info.bits);
+
+                    // cmp dest, truncated
+                    _ = try self.binOp(.cmp_eq, null, dest, .{ .register = truncated_reg }, Type.usize, Type.usize);
+
+                    try self.genSetStack(lhs_ty, stack_offset, .{ .register = truncated_reg });
+                    try self.genSetStack(Type.initTag(.u1), stack_offset - overflow_bit_offset, .{ .compare_flags_unsigned = .neq });
+
+                    break :result MCValue{ .stack_offset = stack_offset };
+                } else {
+                    return self.fail("TODO ARM overflow operations on integers > u16/i16", .{});
+                }
+            },
+            else => unreachable,
+        }
+    };
+    return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, .none });
 }

 fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
@ -2382,7 +2437,9 @@ fn binOpRegister(
            .rm = lhs_reg,
            .shift_amount = Instruction.ShiftAmount.reg(rhs_reg),
        } },
-        .mul => .{ .rrr = .{
+        .mul,
+        .smulbb,
+        => .{ .rrr = .{
            .rd = dest_reg,
            .rn = lhs_reg,
            .rm = rhs_reg,
--- a/src/arch/arm/Emit.zig
+++ b/src/arch/arm/Emit.zig
@ -122,7 +122,7 @@ pub fn emitMir(
            .ldrsh_stack_argument => try emit.mirLoadStackArgument(inst),

            .ldrh => try emit.mirLoadStoreExtra(inst),
-            .ldrsb => try emit.mirLoadStore(inst),
+            .ldrsb => try emit.mirLoadStoreExtra(inst),
            .ldrsh => try emit.mirLoadStoreExtra(inst),
            .strh => try emit.mirLoadStoreExtra(inst),

@ -130,6 +130,7 @@ pub fn emitMir(
            .movt => try emit.mirSpecialMove(inst),

            .mul => try emit.mirMultiply(inst),
+            .smulbb => try emit.mirMultiply(inst),

            .nop => try emit.mirNop(),

@ -689,6 +690,7 @@ fn mirMultiply(emit: *Emit, inst: Mir.Inst.Index) !void {

    switch (tag) {
        .mul => try emit.writeInstruction(Instruction.mul(cond, rrr.rd, rrr.rn, rrr.rm)),
+        .smulbb => try emit.writeInstruction(Instruction.smulbb(cond, rrr.rd, rrr.rn, rrr.rm)),
        else => unreachable,
    }
 }
--- a/src/arch/arm/Mir.zig
+++ b/src/arch/arm/Mir.zig
@ -102,6 +102,8 @@ pub const Inst = struct {
        rsb,
        /// Signed Bit Field Extract
        sbfx,
+        /// Signed Multiply (halfwords), bottom half, bottom half
+        smulbb,
        /// Store Register
        str,
        /// Store Register Byte
--- a/src/arch/arm/bits.zig
+++ b/src/arch/arm/bits.zig
@ -216,6 +216,18 @@ pub const Instruction = union(enum) {
        fixed_2: u5 = 0b00001,
        cond: u4,
    },
+    signed_multiply_halfwords: packed struct {
+        rn: u4,
+        fixed_1: u1 = 0b0,
+        n: u1,
+        m: u1,
+        fixed_2: u1 = 0b1,
+        rm: u4,
+        fixed_3: u4 = 0b0000,
+        rd: u4,
+        fixed_4: u8 = 0b00010110,
+        cond: u4,
+    },
    integer_saturating_arithmetic: packed struct {
        rm: u4,
        fixed_1: u8 = 0b0000_0101,
@ -592,6 +604,7 @@ pub const Instruction = union(enum) {
            .data_processing => |v| @bitCast(u32, v),
            .multiply => |v| @bitCast(u32, v),
            .multiply_long => |v| @bitCast(u32, v),
+            .signed_multiply_halfwords => |v| @bitCast(u32, v),
            .integer_saturating_arithmetic => |v| @bitCast(u32, v),
            .bit_field_extract => |v| @bitCast(u32, v),
            .single_data_transfer => |v| @bitCast(u32, v),
@ -691,6 +704,26 @@ pub const Instruction = union(enum) {
        };
    }

+    fn signedMultiplyHalfwords(
+        n: u1,
+        m: u1,
+        cond: Condition,
+        rd: Register,
+        rn: Register,
+        rm: Register,
+    ) Instruction {
+        return Instruction{
+            .signed_multiply_halfwords = .{
+                .rn = rn.id(),
+                .n = n,
+                .m = m,
+                .rm = rm.id(),
+                .rd = rd.id(),
+                .cond = @enumToInt(cond),
+            },
+        };
+    }
+
    fn integerSaturationArithmetic(
        cond: Condition,
        rd: Register,
@ -1093,6 +1126,24 @@ pub const Instruction = union(enum) {
        return multiplyLong(cond, 1, 1, 1, rdhi, rdlo, rm, rn);
    }

+    // Signed Multiply (halfwords)
+
+    pub fn smulbb(cond: Condition, rd: Register, rn: Register, rm: Register) Instruction {
+        return signedMultiplyHalfwords(0, 0, cond, rd, rn, rm);
+    }
+
+    pub fn smulbt(cond: Condition, rd: Register, rn: Register, rm: Register) Instruction {
+        return signedMultiplyHalfwords(0, 1, cond, rd, rn, rm);
+    }
+
+    pub fn smultb(cond: Condition, rd: Register, rn: Register, rm: Register) Instruction {
+        return signedMultiplyHalfwords(1, 0, cond, rd, rn, rm);
+    }
+
+    pub fn smultt(cond: Condition, rd: Register, rn: Register, rm: Register) Instruction {
+        return signedMultiplyHalfwords(1, 1, cond, rd, rn, rm);
+    }
+
    // Bit field extract

    pub fn ubfx(cond: Condition, rd: Register, rn: Register, lsb: u5, width: u6) Instruction {
@ -1440,6 +1491,10 @@ test "serialize instructions" {
            .inst = Instruction.qadd(.al, .r0, .r7, .r8),
            .expected = 0b1110_00010_00_0_1000_0000_0000_0101_0111,
        },
+        .{ // smulbt r0, r0, r0
+            .inst = Instruction.smulbt(.al, .r0, .r0, .r0),
+            .expected = 0b1110_00010110_0000_0000_0000_1_1_0_0_0000,
+        },
    };

    for (testcases) |case| {
--- a/test/behavior/math.zig
+++ b/test/behavior/math.zig
@ -678,7 +678,6 @@ test "small int addition" {
 test "@mulWithOverflow" {
    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO

    var result: u8 = undefined;
    try expect(@mulWithOverflow(u8, 86, 3, &result));