arrow_buffer/bigint/
div.rs1pub fn div_rem<const N: usize>(numerator: &[u64; N], divisor: &[u64; N]) -> ([u64; N], [u64; N]) {
30 let numerator_bits = bits(numerator);
31 let divisor_bits = bits(divisor);
32 assert_ne!(divisor_bits, 0, "division by zero");
33
34 if numerator_bits < divisor_bits {
35 return ([0; N], *numerator);
36 }
37
38 if divisor_bits <= 64 {
39 return div_rem_small(numerator, divisor[0]);
40 }
41
42 let numerator_words = (numerator_bits + 63) / 64;
43 let divisor_words = (divisor_bits + 63) / 64;
44 let n = divisor_words;
45 let m = numerator_words - divisor_words;
46
47 div_rem_knuth(numerator, divisor, n, m)
48}
49
50fn bits(arr: &[u64]) -> usize {
52 for (idx, v) in arr.iter().enumerate().rev() {
53 if *v > 0 {
54 return 64 - v.leading_zeros() as usize + 64 * idx;
55 }
56 }
57 0
58}
59
60fn div_rem_small<const N: usize>(numerator: &[u64; N], divisor: u64) -> ([u64; N], [u64; N]) {
62 let mut rem = 0u64;
63 let mut numerator = *numerator;
64 numerator.iter_mut().rev().for_each(|d| {
65 let (q, r) = div_rem_word(rem, *d, divisor);
66 *d = q;
67 rem = r;
68 });
69
70 let mut rem_padded = [0; N];
71 rem_padded[0] = rem;
72 (numerator, rem_padded)
73}
74
75fn div_rem_knuth<const N: usize>(
84 numerator: &[u64; N],
85 divisor: &[u64; N],
86 n: usize,
87 m: usize,
88) -> ([u64; N], [u64; N]) {
89 assert!(n + m <= N);
90
91 let shift = divisor[n - 1].leading_zeros();
109 let divisor = shl_word(divisor, shift);
111 let mut numerator = full_shl(numerator, shift);
113
114 let b0 = divisor[n - 1];
116 let b1 = divisor[n - 2];
117
118 let mut q = [0; N];
119
120 for j in (0..=m).rev() {
121 let a0 = numerator[j + n];
122 let a1 = numerator[j + n - 1];
123
124 let mut q_hat = if a0 < b0 {
125 let (mut q_hat, mut r_hat) = div_rem_word(a0, a1, b0);
127
128 let a2 = numerator[j + n - 2];
137 loop {
138 let r = u128::from(q_hat) * u128::from(b1);
139 let (lo, hi) = (r as u64, (r >> 64) as u64);
140 if (hi, lo) <= (r_hat, a2) {
141 break;
142 }
143
144 q_hat -= 1;
145 let (new_r_hat, overflow) = r_hat.overflowing_add(b0);
146 r_hat = new_r_hat;
147
148 if overflow {
149 break;
150 }
151 }
152 q_hat
153 } else {
154 u64::MAX
155 };
156
157 let q_hat_v = full_mul_u64(&divisor, q_hat);
161 let c = sub_assign(&mut numerator[j..], &q_hat_v[..n + 1]);
162
163 if c {
165 q_hat -= 1;
167
168 let c = add_assign(&mut numerator[j..], &divisor[..n]);
170 numerator[j + n] = numerator[j + n].wrapping_add(u64::from(c));
171 }
172
173 q[j] = q_hat;
175 }
176
177 let remainder = full_shr(&numerator, shift);
179 (q, remainder)
180}
181
182fn div_rem_word(hi: u64, lo: u64, divisor: u64) -> (u64, u64) {
187 debug_assert!(hi < divisor);
188 debug_assert_ne!(divisor, 0);
189
190 #[cfg(all(target_arch = "x86_64", not(miri)))]
193 unsafe {
194 let mut quot = lo;
195 let mut rem = hi;
196 std::arch::asm!(
197 "div {divisor}",
198 divisor = in(reg) divisor,
199 inout("rax") quot,
200 inout("rdx") rem,
201 options(pure, nomem, nostack)
202 );
203 (quot, rem)
204 }
205 #[cfg(any(not(target_arch = "x86_64"), miri))]
206 {
207 let x = (u128::from(hi) << 64) + u128::from(lo);
208 let y = u128::from(divisor);
209 ((x / y) as u64, (x % y) as u64)
210 }
211}
212
213fn add_assign(a: &mut [u64], b: &[u64]) -> bool {
215 binop_slice(a, b, u64::overflowing_add)
216}
217
218fn sub_assign(a: &mut [u64], b: &[u64]) -> bool {
220 binop_slice(a, b, u64::overflowing_sub)
221}
222
223fn binop_slice(a: &mut [u64], b: &[u64], binop: impl Fn(u64, u64) -> (u64, bool) + Copy) -> bool {
225 let mut c = false;
226 a.iter_mut().zip(b.iter()).for_each(|(x, y)| {
227 let (res1, overflow1) = y.overflowing_add(u64::from(c));
228 let (res2, overflow2) = binop(*x, res1);
229 *x = res2;
230 c = overflow1 || overflow2;
231 });
232 c
233}
234
235fn full_mul_u64<const N: usize>(a: &[u64; N], b: u64) -> ArrayPlusOne<u64, N> {
237 let mut carry = 0;
238 let mut out = [0; N];
239 out.iter_mut().zip(a).for_each(|(o, v)| {
240 let r = *v as u128 * b as u128 + carry as u128;
241 *o = r as u64;
242 carry = (r >> 64) as u64;
243 });
244 ArrayPlusOne(out, carry)
245}
246
247fn shl_word<const N: usize>(v: &[u64; N], shift: u32) -> [u64; N] {
249 full_shl(v, shift).0
250}
251
252fn full_shl<const N: usize>(v: &[u64; N], shift: u32) -> ArrayPlusOne<u64, N> {
254 debug_assert!(shift < 64);
255 if shift == 0 {
256 return ArrayPlusOne(*v, 0);
257 }
258 let mut out = [0u64; N];
259 out[0] = v[0] << shift;
260 for i in 1..N {
261 out[i] = (v[i - 1] >> (64 - shift)) | (v[i] << shift)
262 }
263 let carry = v[N - 1] >> (64 - shift);
264 ArrayPlusOne(out, carry)
265}
266
267fn full_shr<const N: usize>(a: &ArrayPlusOne<u64, N>, shift: u32) -> [u64; N] {
269 debug_assert!(shift < 64);
270 if shift == 0 {
271 return a.0;
272 }
273 let mut out = [0; N];
274 for i in 0..N - 1 {
275 out[i] = (a[i] >> shift) | (a[i + 1] << (64 - shift))
276 }
277 out[N - 1] = a[N - 1] >> shift;
278 out
279}
280
281#[repr(C)]
285struct ArrayPlusOne<T, const N: usize>([T; N], T);
286
287impl<T, const N: usize> std::ops::Deref for ArrayPlusOne<T, N> {
288 type Target = [T];
289
290 #[inline]
291 fn deref(&self) -> &Self::Target {
292 let x = self as *const Self;
293 unsafe { std::slice::from_raw_parts(x as *const T, N + 1) }
294 }
295}
296
297impl<T, const N: usize> std::ops::DerefMut for ArrayPlusOne<T, N> {
298 fn deref_mut(&mut self) -> &mut Self::Target {
299 let x = self as *mut Self;
300 unsafe { std::slice::from_raw_parts_mut(x as *mut T, N + 1) }
301 }
302}