diff --git a/README.md b/README.md index 6a9fa4e580..1c7d7e5258 100644 --- a/README.md +++ b/README.md @@ -125,17 +125,20 @@ libc. Create demo games using Zig. ##### POSIX - * gcc >= 5.0.0 or clang >= 3.6.0 * cmake >= 2.8.5 + * gcc >= 5.0.0 or clang >= 3.6.0 * LLVM, Clang, LLD libraries == 5.x, compiled with the same gcc or clang version above ##### Windows + * cmake >= 2.8.5 * Microsoft Visual Studio 2015 * LLVM, Clang, LLD libraries == 5.x, compiled with the same MSVC version above #### Instructions +##### POSIX + If you have gcc or clang installed, you can find out what `ZIG_LIBC_LIB_DIR`, `ZIG_LIBC_STATIC_LIB_DIR`, and `ZIG_LIBC_INCLUDE_DIR` should be set to (example below). diff --git a/src/bigint.cpp b/src/bigint.cpp index f01436d232..3e47a11900 100644 --- a/src/bigint.cpp +++ b/src/bigint.cpp @@ -12,6 +12,9 @@ #include "os.hpp" #include "softfloat.hpp" +#include +#include + static void bigint_normalize(BigInt *dest) { const uint64_t *digits = bigint_ptr(dest); @@ -539,7 +542,7 @@ void bigint_add(BigInt *dest, const BigInt *op1, const BigInt *op2) { dest->data.digits[i] = x; i += 1; - if (!found_digit) + if (!found_digit || i >= bigger_op->digit_count) break; } assert(overflow == 0); @@ -670,19 +673,409 @@ void bigint_mul_wrap(BigInt *dest, const BigInt *op1, const BigInt *op2, size_t bigint_truncate(dest, &unwrapped, bit_count, is_signed); } +enum ZeroBehavior { + /// \brief The returned value is undefined. + ZB_Undefined, + /// \brief The returned value is numeric_limits::max() + ZB_Max, + /// \brief The returned value is numeric_limits::digits + ZB_Width +}; + +template struct LeadingZerosCounter { + static std::size_t count(T Val, ZeroBehavior) { + if (!Val) + return std::numeric_limits::digits; + + // Bisection method. + std::size_t ZeroBits = 0; + for (T Shift = std::numeric_limits::digits >> 1; Shift; Shift >>= 1) { + T Tmp = Val >> Shift; + if (Tmp) + Val = Tmp; + else + ZeroBits |= Shift; + } + return ZeroBits; + } +}; + +#if __GNUC__ >= 4 || defined(_MSC_VER) +template struct LeadingZerosCounter { + static std::size_t count(T Val, ZeroBehavior ZB) { + if (ZB != ZB_Undefined && Val == 0) + return 32; + +#if defined(_MSC_VER) + unsigned long Index; + _BitScanReverse(&Index, Val); + return Index ^ 31; +#else + return __builtin_clz(Val); +#endif + } +}; + +#if !defined(_MSC_VER) || defined(_M_X64) +template struct LeadingZerosCounter { + static std::size_t count(T Val, ZeroBehavior ZB) { + if (ZB != ZB_Undefined && Val == 0) + return 64; + +#if defined(_MSC_VER) + unsigned long Index; + _BitScanReverse64(&Index, Val); + return Index ^ 63; +#else + return __builtin_clzll(Val); +#endif + } +}; +#endif +#endif + +/// \brief Count number of 0's from the most significant bit to the least +/// stopping at the first 1. +/// +/// Only unsigned integral types are allowed. +/// +/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are +/// valid arguments. +template +std::size_t countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) { + static_assert(std::numeric_limits::is_integer && + !std::numeric_limits::is_signed, + "Only unsigned integral types are allowed."); + return LeadingZerosCounter::count(Val, ZB); +} + +/// Make a 64-bit integer from a high / low pair of 32-bit integers. +constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) { + return ((uint64_t)High << 32) | (uint64_t)Low; +} + +/// Return the high 32 bits of a 64 bit value. +constexpr inline uint32_t Hi_32(uint64_t Value) { + return static_cast(Value >> 32); +} + +/// Return the low 32 bits of a 64 bit value. +constexpr inline uint32_t Lo_32(uint64_t Value) { + return static_cast(Value); +} + +/// Implementation of Knuth's Algorithm D (Division of nonnegative integers) +/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The +/// variables here have the same names as in the algorithm. Comments explain +/// the algorithm and any deviation from it. +static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r, + unsigned m, unsigned n) +{ + assert(u && "Must provide dividend"); + assert(v && "Must provide divisor"); + assert(q && "Must provide quotient"); + assert(u != v && u != q && v != q && "Must use different memory"); + assert(n>1 && "n must be > 1"); + + // b denotes the base of the number system. In our case b is 2^32. + const uint64_t b = uint64_t(1) << 32; + + // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of + // u and v by d. Note that we have taken Knuth's advice here to use a power + // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of + // 2 allows us to shift instead of multiply and it is easy to determine the + // shift amount from the leading zeros. We are basically normalizing the u + // and v so that its high bits are shifted to the top of v's range without + // overflow. Note that this can require an extra word in u so that u must + // be of length m+n+1. + unsigned shift = countLeadingZeros(v[n-1]); + uint32_t v_carry = 0; + uint32_t u_carry = 0; + if (shift) { + for (unsigned i = 0; i < m+n; ++i) { + uint32_t u_tmp = u[i] >> (32 - shift); + u[i] = (u[i] << shift) | u_carry; + u_carry = u_tmp; + } + for (unsigned i = 0; i < n; ++i) { + uint32_t v_tmp = v[i] >> (32 - shift); + v[i] = (v[i] << shift) | v_carry; + v_carry = v_tmp; + } + } + u[m+n] = u_carry; + + // D2. [Initialize j.] Set j to m. This is the loop counter over the places. + int j = m; + do { + // D3. [Calculate q'.]. + // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q') + // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r') + // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease + // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test + // on v[n-2] determines at high speed most of the cases in which the trial + // value qp is one too large, and it eliminates all cases where qp is two + // too large. + uint64_t dividend = Make_64(u[j+n], u[j+n-1]); + uint64_t qp = dividend / v[n-1]; + uint64_t rp = dividend % v[n-1]; + if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) { + qp--; + rp += v[n-1]; + if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2])) + qp--; + } + + // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with + // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation + // consists of a simple multiplication by a one-place number, combined with + // a subtraction. + // The digits (u[j+n]...u[j]) should be kept positive; if the result of + // this step is actually negative, (u[j+n]...u[j]) should be left as the + // true value plus b**(n+1), namely as the b's complement of + // the true value, and a "borrow" to the left should be remembered. + int64_t borrow = 0; + for (unsigned i = 0; i < n; ++i) { + uint64_t p = uint64_t(qp) * uint64_t(v[i]); + int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p); + u[j+i] = Lo_32(subres); + borrow = Hi_32(p) - Hi_32(subres); + } + bool isNeg = u[j+n] < borrow; + u[j+n] -= Lo_32(borrow); + + // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was + // negative, go to step D6; otherwise go on to step D7. + q[j] = Lo_32(qp); + if (isNeg) { + // D6. [Add back]. The probability that this step is necessary is very + // small, on the order of only 2/b. Make sure that test data accounts for + // this possibility. Decrease q[j] by 1 + q[j]--; + // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]). + // A carry will occur to the left of u[j+n], and it should be ignored + // since it cancels with the borrow that occurred in D4. + bool carry = false; + for (unsigned i = 0; i < n; i++) { + uint32_t limit = std::min(u[j+i],v[i]); + u[j+i] += v[i] + carry; + carry = u[j+i] < limit || (carry && u[j+i] == limit); + } + u[j+n] += carry; + } + + // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3. + } while (--j >= 0); + + // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired + // remainder may be obtained by dividing u[...] by d. If r is non-null we + // compute the remainder (urem uses this). + if (r) { + // The value d is expressed by the "shift" value above since we avoided + // multiplication by d by using a shift left. So, all we have to do is + // shift right here. + if (shift) { + uint32_t carry = 0; + for (int i = n-1; i >= 0; i--) { + r[i] = (u[i] >> shift) | carry; + carry = u[i] << (32 - shift); + } + } else { + for (int i = n-1; i >= 0; i--) { + r[i] = u[i]; + } + } + } +} + +// Implementation ported from LLVM/lib/Support/APInt.cpp +static void bigint_unsigned_division(const BigInt *op1, const BigInt *op2, BigInt *Quotient, BigInt *Remainder) { + Cmp cmp = bigint_cmp(op1, op2); + if (cmp == CmpLT) { + if (Quotient != nullptr) { + bigint_init_unsigned(Quotient, 0); + } + if (Remainder != nullptr) { + bigint_init_bigint(Remainder, op1); + } + return; + } + if (cmp == CmpEQ) { + if (Quotient != nullptr) { + bigint_init_unsigned(Quotient, 1); + } + if (Remainder != nullptr) { + bigint_init_unsigned(Remainder, 0); + } + return; + } + + const uint64_t *LHS = bigint_ptr(op1); + const uint64_t *RHS = bigint_ptr(op2); + unsigned lhsWords = op1->digit_count; + unsigned rhsWords = op2->digit_count; + + // First, compose the values into an array of 32-bit words instead of + // 64-bit words. This is a necessity of both the "short division" algorithm + // and the Knuth "classical algorithm" which requires there to be native + // operations for +, -, and * on an m bit value with an m*2 bit result. We + // can't use 64-bit operands here because we don't have native results of + // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't + // work on large-endian machines. + unsigned n = rhsWords * 2; + unsigned m = (lhsWords * 2) - n; + + // Allocate space for the temporary values we need either on the stack, if + // it will fit, or on the heap if it won't. + uint32_t SPACE[128]; + uint32_t *U = nullptr; + uint32_t *V = nullptr; + uint32_t *Q = nullptr; + uint32_t *R = nullptr; + if ((Remainder?4:3)*n+2*m+1 <= 128) { + U = &SPACE[0]; + V = &SPACE[m+n+1]; + Q = &SPACE[(m+n+1) + n]; + if (Remainder) + R = &SPACE[(m+n+1) + n + (m+n)]; + } else { + U = new uint32_t[m + n + 1]; + V = new uint32_t[n]; + Q = new uint32_t[m+n]; + if (Remainder) + R = new uint32_t[n]; + } + + // Initialize the dividend + memset(U, 0, (m+n+1)*sizeof(uint32_t)); + for (unsigned i = 0; i < lhsWords; ++i) { + uint64_t tmp = LHS[i]; + U[i * 2] = Lo_32(tmp); + U[i * 2 + 1] = Hi_32(tmp); + } + U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm. + + // Initialize the divisor + memset(V, 0, (n)*sizeof(uint32_t)); + for (unsigned i = 0; i < rhsWords; ++i) { + uint64_t tmp = RHS[i]; + V[i * 2] = Lo_32(tmp); + V[i * 2 + 1] = Hi_32(tmp); + } + + // initialize the quotient and remainder + memset(Q, 0, (m+n) * sizeof(uint32_t)); + if (Remainder) + memset(R, 0, n * sizeof(uint32_t)); + + // Now, adjust m and n for the Knuth division. n is the number of words in + // the divisor. m is the number of words by which the dividend exceeds the + // divisor (i.e. m+n is the length of the dividend). These sizes must not + // contain any zero words or the Knuth algorithm fails. + for (unsigned i = n; i > 0 && V[i-1] == 0; i--) { + n--; + m++; + } + for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--) + m--; + + // If we're left with only a single word for the divisor, Knuth doesn't work + // so we implement the short division algorithm here. This is much simpler + // and faster because we are certain that we can divide a 64-bit quantity + // by a 32-bit quantity at hardware speed and short division is simply a + // series of such operations. This is just like doing short division but we + // are using base 2^32 instead of base 10. + assert(n != 0 && "Divide by zero?"); + if (n == 1) { + uint32_t divisor = V[0]; + uint32_t remainder = 0; + for (int i = m; i >= 0; i--) { + uint64_t partial_dividend = Make_64(remainder, U[i]); + if (partial_dividend == 0) { + Q[i] = 0; + remainder = 0; + } else if (partial_dividend < divisor) { + Q[i] = 0; + remainder = Lo_32(partial_dividend); + } else if (partial_dividend == divisor) { + Q[i] = 1; + remainder = 0; + } else { + Q[i] = Lo_32(partial_dividend / divisor); + remainder = Lo_32(partial_dividend - (Q[i] * divisor)); + } + } + if (R) + R[0] = remainder; + } else { + // Now we're ready to invoke the Knuth classical divide algorithm. In this + // case n > 1. + KnuthDiv(U, V, Q, R, m, n); + } + + // If the caller wants the quotient + if (Quotient) { + Quotient->digit_count = lhsWords; + Quotient->data.digits = allocate(lhsWords); + Quotient->is_negative = false; + for (size_t i = 0; i < lhsWords; i += 1) { + Quotient->data.digits[i] = Make_64(Q[i*2+1], Q[i*2]); + } + } + + // If the caller wants the remainder + if (Remainder) { + Remainder->digit_count = rhsWords; + Remainder->data.digits = allocate(rhsWords); + Remainder->is_negative = false; + for (size_t i = 0; i < rhsWords; i += 1) { + Remainder->data.digits[i] = Make_64(R[i*2+1], R[i*2]); + } + } +} + void bigint_div_trunc(BigInt *dest, const BigInt *op1, const BigInt *op2) { assert(op2->digit_count != 0); // division by zero if (op1->digit_count == 0) { bigint_init_unsigned(dest, 0); return; } - if (op1->digit_count != 1 || op2->digit_count != 1) { - zig_panic("TODO bigint div_trunc with >1 digits"); - } const uint64_t *op1_digits = bigint_ptr(op1); const uint64_t *op2_digits = bigint_ptr(op2); - dest->data.digit = op1_digits[0] / op2_digits[0]; - dest->digit_count = 1; + if (op1->digit_count == 1 && op2->digit_count == 1) { + dest->data.digit = op1_digits[0] / op2_digits[0]; + dest->digit_count = 1; + dest->is_negative = op1->is_negative != op2->is_negative; + bigint_normalize(dest); + return; + } + if (op2->digit_count == 1 && op2_digits[0] == 1) { + // X / 1 == X + bigint_init_bigint(dest, op1); + dest->is_negative = op1->is_negative != op2->is_negative; + bigint_normalize(dest); + return; + } + + const BigInt *op1_positive; + BigInt op1_positive_data; + if (op1->is_negative) { + bigint_negate(&op1_positive_data, op1); + op1_positive = &op1_positive_data; + } else { + op1_positive = op1; + } + + const BigInt *op2_positive; + BigInt op2_positive_data; + if (op2->is_negative) { + bigint_negate(&op2_positive_data, op2); + op2_positive = &op2_positive_data; + } else { + op2_positive = op2; + } + + bigint_unsigned_division(op1_positive, op2_positive, dest, nullptr); dest->is_negative = op1->is_negative != op2->is_negative; bigint_normalize(dest); } @@ -714,6 +1107,14 @@ void bigint_rem(BigInt *dest, const BigInt *op1, const BigInt *op2) { } const uint64_t *op1_digits = bigint_ptr(op1); const uint64_t *op2_digits = bigint_ptr(op2); + + if (op1->digit_count == 1 && op2->digit_count == 1) { + dest->data.digit = op1_digits[0] % op2_digits[0]; + dest->digit_count = 1; + dest->is_negative = op1->is_negative; + bigint_normalize(dest); + return; + } if (op2->digit_count == 2 && op2_digits[0] == 0 && op2_digits[1] == 1) { // special case this divisor bigint_init_unsigned(dest, op1_digits[0]); @@ -721,11 +1122,32 @@ void bigint_rem(BigInt *dest, const BigInt *op1, const BigInt *op2) { bigint_normalize(dest); return; } - if (op1->digit_count != 1 || op2->digit_count != 1) { - zig_panic("TODO bigint rem with >1 digits"); + + if (op2->digit_count == 1 && op2_digits[0] == 1) { + // X % 1 == 0 + bigint_init_unsigned(dest, 0); + return; } - dest->data.digit = op1_digits[0] % op2_digits[0]; - dest->digit_count = 1; + + const BigInt *op1_positive; + BigInt op1_positive_data; + if (op1->is_negative) { + bigint_negate(&op1_positive_data, op1); + op1_positive = &op1_positive_data; + } else { + op1_positive = op1; + } + + const BigInt *op2_positive; + BigInt op2_positive_data; + if (op2->is_negative) { + bigint_negate(&op2_positive_data, op2); + op2_positive = &op2_positive_data; + } else { + op2_positive = op2; + } + + bigint_unsigned_division(op1_positive, op2_positive, nullptr, dest); dest->is_negative = op1->is_negative; bigint_normalize(dest); } diff --git a/test/cases/math.zig b/test/cases/math.zig index b4e0e4cfd6..9b949ffd16 100644 --- a/test/cases/math.zig +++ b/test/cases/math.zig @@ -26,6 +26,28 @@ fn testDivision() { assert(divTrunc(i32, -5, 3) == -1); assert(divTrunc(f32, 5.0, 3.0) == 1.0); assert(divTrunc(f32, -5.0, 3.0) == -1.0); + + comptime { + assert( + 1194735857077236777412821811143690633098347576 % + 508740759824825164163191790951174292733114988 == + 177254337427586449086438229241342047632117600); + assert(@rem(-1194735857077236777412821811143690633098347576, + 508740759824825164163191790951174292733114988) == + -177254337427586449086438229241342047632117600); + assert(1194735857077236777412821811143690633098347576 / + 508740759824825164163191790951174292733114988 == + 2); + assert(@divTrunc(-1194735857077236777412821811143690633098347576, + 508740759824825164163191790951174292733114988) == + -2); + assert(@divTrunc(1194735857077236777412821811143690633098347576, + -508740759824825164163191790951174292733114988) == + -2); + assert(@divTrunc(-1194735857077236777412821811143690633098347576, + -508740759824825164163191790951174292733114988) == + 2); + } } fn div(comptime T: type, a: T, b: T) -> T { return a / b;