mirror of
https://github.com/ziglang/zig.git
synced 2026-02-13 04:48:20 +00:00
x86_64: optimize wide mul with overflow
This commit is contained in:
parent
3c2636a83d
commit
c81878978a
@ -2520,7 +2520,7 @@ fn genSetFrameTruncatedOverflowCompare(
|
||||
tuple_ty: Type,
|
||||
frame_index: FrameIndex,
|
||||
src_mcv: MCValue,
|
||||
cc: Condition,
|
||||
overflow_cc: ?Condition,
|
||||
) !void {
|
||||
const src_lock = switch (src_mcv) {
|
||||
.register => |reg| self.register_manager.lockReg(reg),
|
||||
@ -2551,7 +2551,7 @@ fn genSetFrameTruncatedOverflowCompare(
|
||||
defer for (temp_locks) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
const overflow_reg = temp_regs[0];
|
||||
try self.asmSetccRegister(overflow_reg.to8(), cc);
|
||||
if (overflow_cc) |cc| try self.asmSetccRegister(overflow_reg.to8(), cc);
|
||||
|
||||
const scratch_reg = temp_regs[1];
|
||||
const hi_limb_off = if (int_info.bits <= 64) 0 else (int_info.bits - 1) / 64 * 8;
|
||||
@ -2564,8 +2564,10 @@ fn genSetFrameTruncatedOverflowCompare(
|
||||
try self.genBinOpMir(.cmp, hi_limb_ty, .{ .register = scratch_reg }, hi_limb_mcv);
|
||||
|
||||
const eq_reg = temp_regs[2];
|
||||
try self.asmSetccRegister(eq_reg.to8(), .ne);
|
||||
try self.genBinOpMir(.@"or", Type.u8, .{ .register = overflow_reg }, .{ .register = eq_reg });
|
||||
if (overflow_cc) |_| {
|
||||
try self.asmSetccRegister(eq_reg.to8(), .ne);
|
||||
try self.genBinOpMir(.@"or", Type.u8, .{ .register = overflow_reg }, .{ .register = eq_reg });
|
||||
}
|
||||
|
||||
const payload_off = @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*));
|
||||
if (hi_limb_off > 0) try self.genSetMem(.{ .frame = frame_index }, payload_off, rest_ty, src_mcv);
|
||||
@ -2579,7 +2581,7 @@ fn genSetFrameTruncatedOverflowCompare(
|
||||
.{ .frame = frame_index },
|
||||
@intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)),
|
||||
tuple_ty.structFieldType(1),
|
||||
.{ .register = overflow_reg.to8() },
|
||||
if (overflow_cc) |_| .{ .register = overflow_reg.to8() } else .{ .eflags = .ne },
|
||||
);
|
||||
}
|
||||
|
||||
@ -2654,27 +2656,36 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
|
||||
try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc);
|
||||
break :result .{ .load_frame = .{ .index = frame_index } };
|
||||
},
|
||||
// For now, this is the only supported multiply that doesn't fit in a register.
|
||||
else => assert(dst_info.bits <= 128 and src_pl.data == 64),
|
||||
}
|
||||
else => {
|
||||
// For now, this is the only supported multiply that doesn't fit in a register,
|
||||
// so cc being set is impossible.
|
||||
|
||||
const frame_index =
|
||||
try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*));
|
||||
if (dst_info.bits >= lhs_active_bits + rhs_active_bits) {
|
||||
try self.genSetMem(
|
||||
.{ .frame = frame_index },
|
||||
@intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)),
|
||||
tuple_ty.structFieldType(0),
|
||||
partial_mcv,
|
||||
);
|
||||
try self.genSetMem(
|
||||
.{ .frame = frame_index },
|
||||
@intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)),
|
||||
tuple_ty.structFieldType(1),
|
||||
.{ .immediate = 0 }, // overflow is impossible for 64-bit*64-bit -> 128-bit
|
||||
);
|
||||
} else try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc);
|
||||
break :result .{ .load_frame = .{ .index = frame_index } };
|
||||
assert(dst_info.bits <= 128 and src_pl.data == 64);
|
||||
|
||||
const frame_index =
|
||||
try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*));
|
||||
if (dst_info.bits >= lhs_active_bits + rhs_active_bits) {
|
||||
try self.genSetMem(
|
||||
.{ .frame = frame_index },
|
||||
@intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)),
|
||||
tuple_ty.structFieldType(0),
|
||||
partial_mcv,
|
||||
);
|
||||
try self.genSetMem(
|
||||
.{ .frame = frame_index },
|
||||
@intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)),
|
||||
tuple_ty.structFieldType(1),
|
||||
.{ .immediate = 0 },
|
||||
);
|
||||
} else try self.genSetFrameTruncatedOverflowCompare(
|
||||
tuple_ty,
|
||||
frame_index,
|
||||
partial_mcv,
|
||||
null,
|
||||
);
|
||||
break :result .{ .load_frame = .{ .index = frame_index } };
|
||||
},
|
||||
}
|
||||
},
|
||||
else => unreachable,
|
||||
};
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user