mirror of
https://github.com/ziglang/zig.git
synced 2026-02-16 14:28:57 +00:00
llvm: convert @divFloor and @mod to forms llvm will recognize
On x86_64, the `@divFloor` change is a strict improvement, and the
`@mod` change adds one zero latency instruction. In return, once we
upgrade to LLVM 20, when the optimizer discovers one of these operations
has a power-of-two constant rhs, it will be able to optimize the entire
operation into an `ashr` or `and`, respectively.
#I CPL CPT
old `@divFloor` | 8 | 15 | .143 |
new `@divFloor` | 7 | 15 | .148 |
old `@mod` | 9 | 17 | .134 | (rip llvm
new `@mod` | 10 | 17 | .138 | scheduler)
This commit is contained in:
parent
b074fb7dda
commit
5cfcb01503
@ -8571,19 +8571,37 @@ pub const FuncGen = struct {
|
||||
}
|
||||
if (scalar_ty.isSignedInt(zcu)) {
|
||||
const inst_llvm_ty = try o.lowerType(inst_ty);
|
||||
const bit_size_minus_one = try o.builder.splatValue(inst_llvm_ty, try o.builder.intConst(
|
||||
|
||||
const ExpectedContents = [std.math.big.int.calcTwosCompLimbCount(256)]std.math.big.Limb;
|
||||
var stack align(@max(
|
||||
@alignOf(std.heap.StackFallbackAllocator(0)),
|
||||
@alignOf(ExpectedContents),
|
||||
)) = std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa);
|
||||
const allocator = stack.get();
|
||||
|
||||
const scalar_bits = inst_llvm_ty.scalarBits(&o.builder);
|
||||
var smin_big_int: std.math.big.int.Mutable = .{
|
||||
.limbs = try allocator.alloc(
|
||||
std.math.big.Limb,
|
||||
std.math.big.int.calcTwosCompLimbCount(scalar_bits),
|
||||
),
|
||||
.len = undefined,
|
||||
.positive = undefined,
|
||||
};
|
||||
defer allocator.free(smin_big_int.limbs);
|
||||
smin_big_int.setTwosCompIntLimit(.min, .signed, scalar_bits);
|
||||
const smin = try o.builder.splatValue(inst_llvm_ty, try o.builder.bigIntConst(
|
||||
inst_llvm_ty.scalarType(&o.builder),
|
||||
inst_llvm_ty.scalarBits(&o.builder) - 1,
|
||||
smin_big_int.toConst(),
|
||||
));
|
||||
|
||||
const div = try self.wip.bin(.sdiv, lhs, rhs, "");
|
||||
const rem = try self.wip.bin(.srem, lhs, rhs, "");
|
||||
const div_sign = try self.wip.bin(.xor, lhs, rhs, "");
|
||||
const div_sign_mask = try self.wip.bin(.ashr, div_sign, bit_size_minus_one, "");
|
||||
const zero = try o.builder.zeroInitValue(inst_llvm_ty);
|
||||
const rem_nonzero = try self.wip.icmp(.ne, rem, zero, "");
|
||||
const correction = try self.wip.select(.normal, rem_nonzero, div_sign_mask, zero, "");
|
||||
return self.wip.bin(.@"add nsw", div, correction, "");
|
||||
const div = try self.wip.bin(.sdiv, lhs, rhs, "divFloor.div");
|
||||
const rem = try self.wip.bin(.srem, lhs, rhs, "divFloor.rem");
|
||||
const rhs_sign = try self.wip.bin(.@"and", rhs, smin, "divFloor.rhs_sign");
|
||||
const rem_xor_rhs_sign = try self.wip.bin(.xor, rem, rhs_sign, "divFloor.rem_xor_rhs_sign");
|
||||
const need_correction = try self.wip.icmp(.ugt, rem_xor_rhs_sign, smin, "divFloor.need_correction");
|
||||
const correction = try self.wip.cast(.sext, need_correction, inst_llvm_ty, "divFloor.correction");
|
||||
return self.wip.bin(.@"add nsw", div, correction, "divFloor");
|
||||
}
|
||||
return self.wip.bin(.udiv, lhs, rhs, "");
|
||||
}
|
||||
@ -8642,19 +8660,36 @@ pub const FuncGen = struct {
|
||||
return self.wip.select(fast, ltz, c, a, "");
|
||||
}
|
||||
if (scalar_ty.isSignedInt(zcu)) {
|
||||
const bit_size_minus_one = try o.builder.splatValue(inst_llvm_ty, try o.builder.intConst(
|
||||
const ExpectedContents = [std.math.big.int.calcTwosCompLimbCount(256)]std.math.big.Limb;
|
||||
var stack align(@max(
|
||||
@alignOf(std.heap.StackFallbackAllocator(0)),
|
||||
@alignOf(ExpectedContents),
|
||||
)) = std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa);
|
||||
const allocator = stack.get();
|
||||
|
||||
const scalar_bits = inst_llvm_ty.scalarBits(&o.builder);
|
||||
var smin_big_int: std.math.big.int.Mutable = .{
|
||||
.limbs = try allocator.alloc(
|
||||
std.math.big.Limb,
|
||||
std.math.big.int.calcTwosCompLimbCount(scalar_bits),
|
||||
),
|
||||
.len = undefined,
|
||||
.positive = undefined,
|
||||
};
|
||||
defer allocator.free(smin_big_int.limbs);
|
||||
smin_big_int.setTwosCompIntLimit(.min, .signed, scalar_bits);
|
||||
const smin = try o.builder.splatValue(inst_llvm_ty, try o.builder.bigIntConst(
|
||||
inst_llvm_ty.scalarType(&o.builder),
|
||||
inst_llvm_ty.scalarBits(&o.builder) - 1,
|
||||
smin_big_int.toConst(),
|
||||
));
|
||||
|
||||
const rem = try self.wip.bin(.srem, lhs, rhs, "");
|
||||
const div_sign = try self.wip.bin(.xor, lhs, rhs, "");
|
||||
const div_sign_mask = try self.wip.bin(.ashr, div_sign, bit_size_minus_one, "");
|
||||
const rhs_masked = try self.wip.bin(.@"and", rhs, div_sign_mask, "");
|
||||
const rem = try self.wip.bin(.srem, lhs, rhs, "mod.rem");
|
||||
const rhs_sign = try self.wip.bin(.@"and", rhs, smin, "mod.rhs_sign");
|
||||
const rem_xor_rhs_sign = try self.wip.bin(.xor, rem, rhs_sign, "mod.rem_xor_rhs_sign");
|
||||
const need_correction = try self.wip.icmp(.ugt, rem_xor_rhs_sign, smin, "mod.need_correction");
|
||||
const zero = try o.builder.zeroInitValue(inst_llvm_ty);
|
||||
const rem_nonzero = try self.wip.icmp(.ne, rem, zero, "");
|
||||
const correction = try self.wip.select(.normal, rem_nonzero, rhs_masked, zero, "");
|
||||
return self.wip.bin(.@"add nsw", rem, correction, "");
|
||||
const correction = try self.wip.select(.normal, need_correction, rhs, zero, "mod.correction");
|
||||
return self.wip.bin(.@"add nsw", correction, rem, "mod");
|
||||
}
|
||||
return self.wip.bin(.urem, lhs, rhs, "");
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user