Improve division performance
Before: test divide_0 ... bench: 4,058 ns/iter (+/- 255) test divide_1 ... bench: 304,507 ns/iter (+/- 28,063) test divide_2 ... bench: 668,293,969 ns/iter (+/- 25,383,239) After: test divide_0 ... bench: 874 ns/iter (+/- 71) test divide_1 ... bench: 16,641 ns/iter (+/- 1,205) test divide_2 ... bench: 1,336,888 ns/iter (+/- 77,450)
This commit is contained in:
parent
08b0022aab
commit
79928b3185
196
src/bigint.rs
196
src/bigint.rs
|
@ -158,6 +158,20 @@ fn mac_with_carry(a: BigDigit, b: BigDigit, c: BigDigit, carry: &mut BigDigit) -
|
||||||
lo
|
lo
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Divide a two digit numerator by a one digit divisor, returns quotient and remainder:
|
||||||
|
///
|
||||||
|
/// Note: the caller must ensure that both the quotient and remainder will fit into a single digit.
|
||||||
|
/// This is _not_ true for an arbitrary numerator/denominator.
|
||||||
|
///
|
||||||
|
/// (This function also matches what the x86 divide instruction does).
|
||||||
|
#[inline]
|
||||||
|
fn div_wide(hi: BigDigit, lo: BigDigit, divisor: BigDigit) -> (BigDigit, BigDigit) {
|
||||||
|
debug_assert!(hi < divisor);
|
||||||
|
|
||||||
|
let lhs = big_digit::to_doublebigdigit(hi, lo);
|
||||||
|
let rhs = divisor as DoubleBigDigit;
|
||||||
|
((lhs / rhs) as BigDigit, (lhs % rhs) as BigDigit)
|
||||||
|
}
|
||||||
/// A big unsigned integer type.
|
/// A big unsigned integer type.
|
||||||
///
|
///
|
||||||
/// A `BigUint`-typed value `BigUint { data: vec!(a, b, c) }` represents a number
|
/// A `BigUint`-typed value `BigUint { data: vec!(a, b, c) }` represents a number
|
||||||
|
@ -822,6 +836,18 @@ impl<'a, 'b> Mul<&'b BigUint> for &'a BigUint {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn div_rem_digit(mut a: BigUint, b: BigDigit) -> (BigUint, BigDigit) {
|
||||||
|
let mut rem = 0;
|
||||||
|
|
||||||
|
for d in a.data.iter_mut().rev() {
|
||||||
|
let (q, r) = div_wide(rem, *d, b);
|
||||||
|
*d = q;
|
||||||
|
rem = r;
|
||||||
|
}
|
||||||
|
|
||||||
|
(a.normalize(), rem)
|
||||||
|
}
|
||||||
|
|
||||||
forward_all_binop_to_ref_ref!(impl Div for BigUint, div);
|
forward_all_binop_to_ref_ref!(impl Div for BigUint, div);
|
||||||
|
|
||||||
impl<'a, 'b> Div<&'b BigUint> for &'a BigUint {
|
impl<'a, 'b> Div<&'b BigUint> for &'a BigUint {
|
||||||
|
@ -917,83 +943,94 @@ impl Integer for BigUint {
|
||||||
if self.is_zero() { return (Zero::zero(), Zero::zero()); }
|
if self.is_zero() { return (Zero::zero(), Zero::zero()); }
|
||||||
if *other == One::one() { return (self.clone(), Zero::zero()); }
|
if *other == One::one() { return (self.clone(), Zero::zero()); }
|
||||||
|
|
||||||
|
/* Required or the q_len calculation below can underflow: */
|
||||||
match self.cmp(other) {
|
match self.cmp(other) {
|
||||||
Less => return (Zero::zero(), self.clone()),
|
Less => return (Zero::zero(), self.clone()),
|
||||||
Equal => return (One::one(), Zero::zero()),
|
Equal => return (One::one(), Zero::zero()),
|
||||||
Greater => {} // Do nothing
|
Greater => {} // Do nothing
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut shift = 0;
|
/*
|
||||||
let mut n = *other.data.last().unwrap();
|
* This algorithm is from Knuth, TAOCP vol 2 section 4.3, algorithm D:
|
||||||
while n < (1 << big_digit::BITS - 2) {
|
*
|
||||||
n <<= 1;
|
* First, normalize the arguments so the highest bit in the highest digit of the divisor is
|
||||||
shift += 1;
|
* set: the main loop uses the highest digit of the divisor for generating guesses, so we
|
||||||
}
|
* want it to be the largest number we can efficiently divide by.
|
||||||
assert!(shift < big_digit::BITS);
|
*/
|
||||||
let (d, m) = div_mod_floor_inner(self << shift, other << shift);
|
let shift = other.data.last().unwrap().leading_zeros() as usize;
|
||||||
return (d, m >> shift);
|
let mut a = self << shift;
|
||||||
|
let b = other << shift;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The algorithm works by incrementally calculating "guesses", q0, for part of the
|
||||||
|
* remainder. Once we have any number q0 such that q0 * b <= a, we can set
|
||||||
|
*
|
||||||
|
* q += q0
|
||||||
|
* a -= q0 * b
|
||||||
|
*
|
||||||
|
* and then iterate until a < b. Then, (q, a) will be our desired quotient and remainder.
|
||||||
|
*
|
||||||
|
* q0, our guess, is calculated by dividing the last few digits of a by the last digit of b
|
||||||
|
* - this should give us a guess that is "close" to the actual quotient, but is possibly
|
||||||
|
* greater than the actual quotient. If q0 * b > a, we simply use iterated subtraction
|
||||||
|
* until we have a guess such that q0 & b <= a.
|
||||||
|
*/
|
||||||
|
|
||||||
fn div_mod_floor_inner(a: BigUint, b: BigUint) -> (BigUint, BigUint) {
|
let bn = *b.data.last().unwrap();
|
||||||
let mut m = a;
|
let q_len = a.data.len() - b.data.len() + 1;
|
||||||
let mut d: BigUint = Zero::zero();
|
let mut q: BigUint = BigUint { data: Vec::with_capacity(q_len) };
|
||||||
let mut n = 1;
|
|
||||||
while m >= b {
|
q.data.extend(repeat(0).take(q_len));
|
||||||
let (d0, d_unit, b_unit) = div_estimate(&m, &b, n);
|
|
||||||
let mut d0 = d0;
|
/*
|
||||||
let mut prod = &b * &d0;
|
* We reuse the same temporary to avoid hitting the allocator in our inner loop - this is
|
||||||
while prod > m {
|
* sized to hold a0 (in the common case; if a particular digit of the quotient is zero a0
|
||||||
// FIXME(#5992): assignment operator overloads
|
* can be bigger).
|
||||||
// d0 -= &d_unit
|
*/
|
||||||
d0 = d0 - &d_unit;
|
let mut tmp: BigUint = BigUint { data: Vec::with_capacity(2) };
|
||||||
// FIXME(#5992): assignment operator overloads
|
|
||||||
// prod -= &b_unit;
|
for j in (0..q_len).rev() {
|
||||||
prod = prod - &b_unit
|
/*
|
||||||
}
|
* When calculating our next guess q0, we don't need to consider the digits below j
|
||||||
if d0.is_zero() {
|
* + b.data.len() - 1: we're guessing digit j of the quotient (i.e. q0 << j) from
|
||||||
n = 2;
|
* digit bn of the divisor (i.e. bn << (b.data.len() - 1) - so the product of those
|
||||||
|
* two numbers will be zero in all digits up to (j + b.data.len() - 1).
|
||||||
|
*/
|
||||||
|
let offset = j + b.data.len() - 1;
|
||||||
|
if offset >= a.data.len() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
n = 1;
|
|
||||||
// FIXME(#5992): assignment operator overloads
|
|
||||||
// d += d0;
|
|
||||||
d = d + d0;
|
|
||||||
// FIXME(#5992): assignment operator overloads
|
|
||||||
// m -= prod;
|
|
||||||
m = m - prod;
|
|
||||||
}
|
|
||||||
return (d, m);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
/* just avoiding a heap allocation: */
|
||||||
|
let mut a0 = tmp;
|
||||||
|
a0.data.truncate(0);
|
||||||
|
a0.data.extend(a.data[offset..].iter().cloned());
|
||||||
|
|
||||||
fn div_estimate(a: &BigUint, b: &BigUint, n: usize)
|
/*
|
||||||
-> (BigUint, BigUint, BigUint) {
|
* q0 << j * big_digit::BITS is our actual quotient estimate - we do the shifts
|
||||||
if a.data.len() < n {
|
* implicitly at the end, when adding and subtracting to a and q. Not only do we
|
||||||
return (Zero::zero(), Zero::zero(), (*a).clone());
|
* save the cost of the shifts, the rest of the arithmetic gets to work with
|
||||||
}
|
* smaller numbers.
|
||||||
|
*/
|
||||||
|
let (mut q0, _) = div_rem_digit(a0, bn);
|
||||||
|
let mut prod = &b * &q0;
|
||||||
|
|
||||||
let an = &a.data[a.data.len() - n ..];
|
while cmp_slice(&prod.data[..], &a.data[j..]) == Greater {
|
||||||
let bn = *b.data.last().unwrap();
|
|
||||||
let mut d = Vec::with_capacity(an.len());
|
|
||||||
let mut carry = 0;
|
|
||||||
for elt in an.iter().rev() {
|
|
||||||
let ai = big_digit::to_doublebigdigit(carry, *elt);
|
|
||||||
let di = ai / (bn as DoubleBigDigit);
|
|
||||||
assert!(di < big_digit::BASE);
|
|
||||||
carry = (ai % (bn as DoubleBigDigit)) as BigDigit;
|
|
||||||
d.push(di as BigDigit)
|
|
||||||
}
|
|
||||||
d.reverse();
|
|
||||||
|
|
||||||
let shift = (a.data.len() - an.len()) - (b.data.len() - 1);
|
|
||||||
if shift == 0 {
|
|
||||||
return (BigUint::new(d), One::one(), (*b).clone());
|
|
||||||
}
|
|
||||||
let one: BigUint = One::one();
|
let one: BigUint = One::one();
|
||||||
return (BigUint::new(d).shl_unit(shift),
|
q0 = q0 - one;
|
||||||
one.shl_unit(shift),
|
prod = prod - &b;
|
||||||
b.shl_unit(shift));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
add2(&mut q.data[j..], &q0.data[..]);
|
||||||
|
sub2(&mut a.data[j..], &prod.data[..]);
|
||||||
|
a = a.normalize();
|
||||||
|
|
||||||
|
tmp = q0;
|
||||||
|
}
|
||||||
|
|
||||||
|
debug_assert!(a < b);
|
||||||
|
|
||||||
|
(q.normalize(), a >> shift)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculates the Greatest Common Divisor (GCD) of the number and `other`.
|
/// Calculates the Greatest Common Divisor (GCD) of the number and `other`.
|
||||||
|
@ -1146,43 +1183,18 @@ fn to_str_radix_reversed(u: &BigUint, radix: u32) -> Vec<u8> {
|
||||||
vec![b'0']
|
vec![b'0']
|
||||||
} else {
|
} else {
|
||||||
let mut res = Vec::new();
|
let mut res = Vec::new();
|
||||||
let mut digits = u.data.to_vec();
|
let mut digits = u.clone();
|
||||||
|
|
||||||
while !digits.is_empty() {
|
while digits != Zero::zero() {
|
||||||
let rem = div_rem_in_place(&mut digits, radix);
|
let (q, r) = div_rem_digit(digits, radix as BigDigit);
|
||||||
res.push(to_digit(rem as u8));
|
res.push(to_digit(r as u8));
|
||||||
|
digits = q;
|
||||||
// If we finished the most significant digit, drop it
|
|
||||||
if let Some(&0) = digits.last() {
|
|
||||||
digits.pop();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
res
|
res
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn div_rem_in_place(digits: &mut [BigDigit], divisor: BigDigit) -> BigDigit {
|
|
||||||
let mut rem = 0;
|
|
||||||
|
|
||||||
for d in digits.iter_mut().rev() {
|
|
||||||
let (q, r) = full_div_rem(*d, divisor, rem);
|
|
||||||
*d = q;
|
|
||||||
rem = r;
|
|
||||||
}
|
|
||||||
|
|
||||||
rem
|
|
||||||
}
|
|
||||||
|
|
||||||
fn full_div_rem(a: BigDigit, b: BigDigit, borrow: BigDigit) -> (BigDigit, BigDigit) {
|
|
||||||
let lo = a as DoubleBigDigit;
|
|
||||||
let hi = borrow as DoubleBigDigit;
|
|
||||||
|
|
||||||
let lhs = lo | (hi << big_digit::BITS);
|
|
||||||
let rhs = b as DoubleBigDigit;
|
|
||||||
((lhs / rhs) as BigDigit, (lhs % rhs) as BigDigit)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn to_digit(b: u8) -> u8 {
|
fn to_digit(b: u8) -> u8 {
|
||||||
match b {
|
match b {
|
||||||
0 ... 9 => b'0' + b,
|
0 ... 9 => b'0' + b,
|
||||||
|
|
Loading…
Reference in New Issue