mirror of
https://github.com/ziglang/zig.git
synced 2026-01-03 12:03:19 +00:00
std.math.big.int: Add Sqrt
Implemented with reference to Modern Computer Arithmetic, Algorithm 1.13. https://members.loria.fr/PZimmermann/mca/pub226.html The below optimization ideas are derived from Go's big package. * Minimize initial loop value * Reuse loop values math/big/int.go: https://cs.opensource.google/go/go/+/refs/tags/go1.20.4:src/math/big/int.go;l=1286
This commit is contained in:
parent
6e6a61a384
commit
4422af8be9
@ -66,6 +66,13 @@ pub fn calcPowLimbsBufferLen(a_bit_count: usize, y: usize) usize {
|
|||||||
return 2 + (a_bit_count * y + (limb_bits - 1)) / limb_bits;
|
return 2 + (a_bit_count * y + (limb_bits - 1)) / limb_bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn calcSqrtLimbsBufferLen(a_bit_count: usize) usize {
|
||||||
|
const a_limb_count = (a_bit_count - 1) / limb_bits + 1;
|
||||||
|
const shift = (a_bit_count + 1) / 2;
|
||||||
|
const u_s_rem_limb_count = 1 + ((shift - 1) / limb_bits + 1);
|
||||||
|
return a_limb_count + 3 * u_s_rem_limb_count + calcDivLimbsBufferLen(a_limb_count, u_s_rem_limb_count);
|
||||||
|
}
|
||||||
|
|
||||||
// Compute the number of limbs required to store a 2s-complement number of `bit_count` bits.
|
// Compute the number of limbs required to store a 2s-complement number of `bit_count` bits.
|
||||||
pub fn calcTwosCompLimbCount(bit_count: usize) usize {
|
pub fn calcTwosCompLimbCount(bit_count: usize) usize {
|
||||||
return std.math.divCeil(usize, bit_count, @bitSizeOf(Limb)) catch unreachable;
|
return std.math.divCeil(usize, bit_count, @bitSizeOf(Limb)) catch unreachable;
|
||||||
@ -1344,6 +1351,64 @@ pub const Mutable = struct {
|
|||||||
r.positive = a.positive or (b & 1) == 0;
|
r.positive = a.positive or (b & 1) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// r = ⌊√a⌋
|
||||||
|
///
|
||||||
|
/// r may alias a.
|
||||||
|
///
|
||||||
|
/// Asserts that `r` has enough limbs to store the result. Upper bound is
|
||||||
|
/// `(a.limbs.len - 1) / 2 + 1`.
|
||||||
|
///
|
||||||
|
/// `limbs_buffer` is used for temporary storage.
|
||||||
|
/// The amount required is given by `calcSqrtLimbsBufferLen`.
|
||||||
|
pub fn sqrt(
|
||||||
|
r: *Mutable,
|
||||||
|
a: Const,
|
||||||
|
limbs_buffer: []Limb,
|
||||||
|
) void {
|
||||||
|
// Brent and Zimmermann, Modern Computer Arithmetic, Algorithm 1.13 SqrtInt
|
||||||
|
// https://members.loria.fr/PZimmermann/mca/pub226.html
|
||||||
|
var buf_index: usize = 0;
|
||||||
|
var t = b: {
|
||||||
|
const start = buf_index;
|
||||||
|
buf_index += a.limbs.len;
|
||||||
|
break :b Mutable.init(limbs_buffer[start..buf_index], 0);
|
||||||
|
};
|
||||||
|
var u = b: {
|
||||||
|
const start = buf_index;
|
||||||
|
const shift = (a.bitCountAbs() + 1) / 2;
|
||||||
|
buf_index += 1 + ((shift - 1) / limb_bits + 1);
|
||||||
|
var m = Mutable.init(limbs_buffer[start..buf_index], 1);
|
||||||
|
m.shiftLeft(m.toConst(), shift); // u must be >= ⌊√a⌋, and should be as small as possible for efficiency
|
||||||
|
break :b m;
|
||||||
|
};
|
||||||
|
var s = b: {
|
||||||
|
const start = buf_index;
|
||||||
|
buf_index += u.limbs.len;
|
||||||
|
break :b u.toConst().toMutable(limbs_buffer[start..buf_index]);
|
||||||
|
};
|
||||||
|
var rem = b: {
|
||||||
|
const start = buf_index;
|
||||||
|
buf_index += s.limbs.len;
|
||||||
|
break :b Mutable.init(limbs_buffer[start..buf_index], 0);
|
||||||
|
};
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
t.divFloor(&rem, a, s.toConst(), limbs_buffer[buf_index..]);
|
||||||
|
t.add(t.toConst(), s.toConst());
|
||||||
|
u.shiftRight(t.toConst(), 1);
|
||||||
|
|
||||||
|
if (u.toConst().order(s.toConst()).compare(.gte)) {
|
||||||
|
r.copy(s.toConst());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Avoid copying u to s by swapping u and s
|
||||||
|
var tmp_s = s;
|
||||||
|
s = u;
|
||||||
|
u = tmp_s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// rma may not alias x or y.
|
/// rma may not alias x or y.
|
||||||
/// x and y may alias each other.
|
/// x and y may alias each other.
|
||||||
/// Asserts that `rma` has enough limbs to store the result. Upper bound is given by `calcGcdNoAliasLimbLen`.
|
/// Asserts that `rma` has enough limbs to store the result. Upper bound is given by `calcGcdNoAliasLimbLen`.
|
||||||
@ -3140,6 +3205,19 @@ pub const Managed = struct {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// r = ⌊√a⌋
|
||||||
|
pub fn sqrt(rma: *Managed, a: *const Managed) !void {
|
||||||
|
const needed_limbs = calcSqrtLimbsBufferLen(a.bitCountAbs());
|
||||||
|
|
||||||
|
const limbs_buffer = try rma.allocator.alloc(Limb, needed_limbs);
|
||||||
|
defer rma.allocator.free(limbs_buffer);
|
||||||
|
|
||||||
|
try rma.ensureCapacity((a.len() - 1) / 2 + 1);
|
||||||
|
var m = rma.toMutable();
|
||||||
|
m.sqrt(a.toConst(), limbs_buffer);
|
||||||
|
rma.setMetadata(m.positive, m.len);
|
||||||
|
}
|
||||||
|
|
||||||
/// r = truncate(Int(signedness, bit_count), a)
|
/// r = truncate(Int(signedness, bit_count), a)
|
||||||
pub fn truncate(r: *Managed, a: *const Managed, signedness: Signedness, bit_count: usize) !void {
|
pub fn truncate(r: *Managed, a: *const Managed, signedness: Signedness, bit_count: usize) !void {
|
||||||
try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
|
try r.ensureCapacity(calcTwosCompLimbCount(bit_count));
|
||||||
|
|||||||
@ -2622,6 +2622,36 @@ test "big.int pow" {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test "big.int sqrt" {
|
||||||
|
var r = try Managed.init(testing.allocator);
|
||||||
|
defer r.deinit();
|
||||||
|
var a = try Managed.init(testing.allocator);
|
||||||
|
defer a.deinit();
|
||||||
|
|
||||||
|
// not aliased
|
||||||
|
try r.set(0);
|
||||||
|
try a.set(25);
|
||||||
|
try r.sqrt(&a);
|
||||||
|
try testing.expectEqual(@as(i32, 5), try r.to(i32));
|
||||||
|
|
||||||
|
// aliased
|
||||||
|
try a.set(25);
|
||||||
|
try a.sqrt(&a);
|
||||||
|
try testing.expectEqual(@as(i32, 5), try a.to(i32));
|
||||||
|
|
||||||
|
// bottom
|
||||||
|
try r.set(0);
|
||||||
|
try a.set(24);
|
||||||
|
try r.sqrt(&a);
|
||||||
|
try testing.expectEqual(@as(i32, 4), try r.to(i32));
|
||||||
|
|
||||||
|
// large number
|
||||||
|
try r.set(0);
|
||||||
|
try a.set(0x1_0000_0000_0000);
|
||||||
|
try r.sqrt(&a);
|
||||||
|
try testing.expectEqual(@as(i32, 0x100_0000), try r.to(i32));
|
||||||
|
}
|
||||||
|
|
||||||
test "big.int regression test for 1 limb overflow with alias" {
|
test "big.int regression test for 1 limb overflow with alias" {
|
||||||
// Note these happen to be two consecutive Fibonacci sequence numbers, the
|
// Note these happen to be two consecutive Fibonacci sequence numbers, the
|
||||||
// first two whose sum exceeds 2**64.
|
// first two whose sum exceeds 2**64.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user