x64: implement shl with overflow for non-pow-2

This commit is contained in:
Jakub Konka 2022-05-10 21:19:05 +02:00
parent d31875f7ab
commit 1d3b714125
2 changed files with 56 additions and 127 deletions

View File

@ -621,10 +621,10 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
.trunc_float,
=> try self.airUnaryMath(inst),
.add_with_overflow => try self.airAddSubWithOverflow(inst),
.sub_with_overflow => try self.airAddSubWithOverflow(inst),
.add_with_overflow => try self.airAddSubShlWithOverflow(inst),
.sub_with_overflow => try self.airAddSubShlWithOverflow(inst),
.mul_with_overflow => try self.airMulWithOverflow(inst),
.shl_with_overflow => try self.airShlWithOverflow(inst),
.shl_with_overflow => try self.airAddSubShlWithOverflow(inst),
.div_float, .div_trunc, .div_floor, .div_exact => try self.airMulDivBinOp(inst),
@ -1305,7 +1305,7 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void {
return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
}
fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
fn airAddSubShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
const tag = self.air.instructions.items(.tag)[inst];
const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
@ -1313,23 +1313,30 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
const ty = self.air.typeOf(bin_op.lhs);
const abi_size = ty.abiSize(self.target.*);
switch (ty.zigTypeTag()) {
.Vector => return self.fail("TODO implement add_with_overflow for Vector type", .{}),
.Vector => return self.fail("TODO implement add/sub/shl with overflow for Vector type", .{}),
.Int => {
if (abi_size > 8) {
return self.fail("TODO implement add_with_overflow for Ints larger than 64bits", .{});
return self.fail("TODO implement add/sub/shl with overflow for Ints larger than 64bits", .{});
}
try self.spillCompareFlagsIfOccupied();
if (tag == .shl_with_overflow) {
try self.spillRegisters(1, .{.rcx});
}
const lhs = try self.resolveInst(bin_op.lhs);
const rhs = try self.resolveInst(bin_op.rhs);
const base_tag: Air.Inst.Tag = switch (tag) {
.add_with_overflow => .add,
.sub_with_overflow => .sub,
const partial: MCValue = switch (tag) {
.add_with_overflow => try self.genBinOp(.add, null, lhs, rhs, ty, ty),
.sub_with_overflow => try self.genBinOp(.sub, null, lhs, rhs, ty, ty),
.shl_with_overflow => blk: {
const shift_ty = self.air.typeOf(bin_op.rhs);
break :blk try self.genShiftBinOp(.shl, null, lhs, rhs, ty, shift_ty);
},
else => unreachable,
};
const partial = try self.genBinOp(base_tag, null, lhs, rhs, ty, ty);
const int_info = ty.intInfo(self.target.*);
@ -1532,111 +1539,6 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
}
fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
if (self.liveness.isUnused(inst)) {
return self.finishAir(inst, .dead, .{ bin_op.lhs, bin_op.rhs, .none });
}
const lhs_ty = self.air.typeOf(bin_op.lhs);
const abi_size = lhs_ty.abiSize(self.target.*);
const rhs_ty = self.air.typeOf(bin_op.rhs);
const result: MCValue = result: {
switch (lhs_ty.zigTypeTag()) {
.Vector => return self.fail("TODO implement shl_with_overflow for Vector type", .{}),
.Int => {
if (abi_size > 8) {
return self.fail("TODO implement shl_with_overflow for Ints larger than 64bits", .{});
}
const int_info = lhs_ty.intInfo(self.target.*);
if (math.isPowerOfTwo(int_info.bits) and int_info.bits >= 8) {
try self.spillCompareFlagsIfOccupied();
self.compare_flags_inst = inst;
try self.spillRegisters(1, .{.rcx});
const lhs = try self.resolveInst(bin_op.lhs);
const rhs = try self.resolveInst(bin_op.rhs);
const partial = try self.genShiftBinOp(.shl, null, lhs, rhs, lhs_ty, rhs_ty);
break :result switch (int_info.signedness) {
.signed => MCValue{ .register_overflow_signed = partial.register },
.unsigned => MCValue{ .register_overflow_unsigned = partial.register },
};
}
return self.fail("TODO shl_with_overflow non-power-of-two", .{});
// try self.spillCompareFlagsIfOccupied();
// self.compare_flags_inst = null;
// const dst_reg: Register = dst_reg: {
// switch (int_info.signedness) {
// .signed => {
// const lhs = try self.resolveInst(bin_op.lhs);
// const rhs = try self.resolveInst(bin_op.rhs);
// const rhs_lock: ?RegisterLock = switch (rhs) {
// .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
// else => null,
// };
// defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
// const dst_reg: Register = blk: {
// if (lhs.isRegister()) break :blk lhs.register;
// break :blk try self.copyToTmpRegister(ty, lhs);
// };
// const dst_reg_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
// defer self.register_manager.unlockReg(dst_reg_lock);
// const rhs_mcv: MCValue = blk: {
// if (rhs.isRegister() or rhs.isMemory()) break :blk rhs;
// break :blk MCValue{ .register = try self.copyToTmpRegister(ty, rhs) };
// };
// const rhs_mcv_lock: ?RegisterLock = switch (rhs_mcv) {
// .register => |reg| self.register_manager.lockReg(reg),
// else => null,
// };
// defer if (rhs_mcv_lock) |lock| self.register_manager.unlockReg(lock);
// try self.genIntMulComplexOpMir(Type.isize, .{ .register = dst_reg }, rhs_mcv);
// break :dst_reg dst_reg;
// },
// .unsigned => {
// try self.spillRegisters(2, .{ .rax, .rdx });
// const lhs = try self.resolveInst(bin_op.lhs);
// const rhs = try self.resolveInst(bin_op.rhs);
// const dst_mcv = try self.genMulDivBinOp(.mul, null, ty, lhs, rhs);
// break :dst_reg dst_mcv.register;
// },
// }
// };
// const tuple_ty = self.air.typeOfIndex(inst);
// const tuple_size = @intCast(u32, tuple_ty.abiSize(self.target.*));
// const tuple_align = tuple_ty.abiAlignment(self.target.*);
// const overflow_bit_offset = @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*));
// const stack_offset = @intCast(i32, try self.allocMem(inst, tuple_size, tuple_align));
// try self.genSetStackTruncatedOverflowCompare(ty, stack_offset, overflow_bit_offset, dst_reg);
// break :result MCValue{ .stack_offset = stack_offset };
},
else => unreachable,
}
};
return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
}
/// Generates signed or unsigned integer multiplication/division.
/// Clobbers .rax and .rdx registers.
/// Quotient is saved in .rax and remainder in .rdx.

View File

@ -905,20 +905,47 @@ test "@subWithOverflow" {
test "@shlWithOverflow" {
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
var result: u16 = undefined;
try expect(@shlWithOverflow(u16, 0b0010111111111111, 3, &result));
try expect(result == 0b0111111111111000);
try expect(!@shlWithOverflow(u16, 0b0010111111111111, 2, &result));
try expect(result == 0b1011111111111100);
{
var result: u4 = undefined;
var a: u4 = 2;
var b: u2 = 1;
try expect(!@shlWithOverflow(u4, a, b, &result));
try expect(result == 4);
var a: u16 = 0b0000_0000_0000_0011;
var b: u4 = 15;
try expect(@shlWithOverflow(u16, a, b, &result));
try expect(result == 0b1000_0000_0000_0000);
b = 14;
try expect(!@shlWithOverflow(u16, a, b, &result));
try expect(result == 0b1100_0000_0000_0000);
b = 3;
try expect(@shlWithOverflow(u4, a, b, &result));
try expect(result == 0);
}
{
var result: i9 = undefined;
var a: i9 = 127;
var b: u4 = 1;
try expect(!@shlWithOverflow(i9, a, b, &result));
try expect(result == 254);
b = 2;
try expect(@shlWithOverflow(i9, a, b, &result));
try expect(result == -4);
}
{
var result: u16 = undefined;
try expect(@shlWithOverflow(u16, 0b0010111111111111, 3, &result));
try expect(result == 0b0111111111111000);
try expect(!@shlWithOverflow(u16, 0b0010111111111111, 2, &result));
try expect(result == 0b1011111111111100);
var a: u16 = 0b0000_0000_0000_0011;
var b: u4 = 15;
try expect(@shlWithOverflow(u16, a, b, &result));
try expect(result == 0b1000_0000_0000_0000);
b = 14;
try expect(!@shlWithOverflow(u16, a, b, &result));
try expect(result == 0b1100_0000_0000_0000);
}
}
test "overflow arithmetic with u0 values" {