*** longobject.c Mon Sep 8 11:22:47 2008
--- longobject33.c Mon Sep 8 10:25:01 2008
***************
*** 16,21 ****
--- 16,26 ----
#define KARATSUBA_CUTOFF 70
#define KARATSUBA_SQUARE_CUTOFF (2 * KARATSUBA_CUTOFF)
+ /* For long division, use the O(N**2) school algorithm unless the
+ * denominator contains more than DIV_LIMIT digits.
+ */
+ const int DIV_LIMIT = KARATSUBA_CUTOFF;
+
/* For exponentiation, use the binary left-to-right algorithm
* unless the exponent contains more than FIVEARY_CUTOFF digits.
* In that case, do 5 bits at a time. The potential drawback is that
***************
*** 1762,1767 ****
--- 1767,1774 ----
static PyObject *long_long(PyObject *v);
static int long_divrem(PyLongObject *, PyLongObject *,
PyLongObject **, PyLongObject **);
+ static PyLongObject * divmod_pos
+ (PyLongObject *, PyLongObject *, PyLongObject **);
/* Long division with remainder, top-level routine */
***************
*** 1800,1806 ****
}
}
else {
! z = x_divrem(a, b, prem);
if (z == NULL)
return -1;
}
--- 1807,1822 ----
}
}
else {
! if ((size_b < 2*DIV_LIMIT) ||
! ((size_b < 4*DIV_LIMIT) &&
! (size_a < 0.897 * size_b + 44.97)) ||
! (size_a > 0.00343 * pow(size_b,2.9)))
! {
! z = x_divrem(a, b, prem);
! }
! else {
! z = divmod_pos(a, b, prem);
! }
if (z == NULL)
return -1;
}
***************
*** 1916,1921 ****
--- 1932,2267 ----
return a;
}
+
+ /* utilities for the long division algorithm */
+ // shift left by shiftby limbs
+ static PyLongObject *
+ _long_int_lshift(PyLongObject *a, Py_ssize_t shiftby)
+ {
+ PyLongObject *z = NULL;
+ Py_ssize_t oldsize, newsize, wordshift;
+
+ assert(shiftby >= 0);
+ wordshift = shiftby;
+ oldsize = Py_SIZE(a);
+ newsize = oldsize + wordshift;
+ z = _PyLong_New(newsize);
+ if (z == NULL)
+ goto lshift_error;
+ bzero(z->ob_digit, wordshift * sizeof(digit));
+ memcpy(z->ob_digit + wordshift, a->ob_digit, oldsize * sizeof(digit));
+ z = long_normalize(z);
+ lshift_error:
+ return z;
+ }
+
+
+ // shift right by shiftby limbs
+ static PyLongObject *
+ _long_int_rshift(PyLongObject *a, Py_ssize_t shiftby)
+ {
+ PyLongObject *z = NULL;
+ Py_ssize_t newsize, wordshift;
+
+ assert(shiftby >= 0);
+ wordshift = shiftby;
+ newsize = Py_SIZE(a) - wordshift;
+ if (newsize <= 0) {
+ z = _PyLong_New(0);
+ return z;
+ }
+ z = _PyLong_New(newsize);
+ if (z == NULL)
+ goto rshift_error;
+ memcpy(z->ob_digit, a->ob_digit + wordshift, newsize * sizeof(digit));
+ z = long_normalize(z);
+ rshift_error:
+ return z;
+ }
+
+ // mask for len limbs starting from start
+ static PyLongObject *
+ _long_int_mask(PyLongObject *a, Py_ssize_t start, Py_ssize_t len)
+ {
+ PyLongObject *z = NULL;
+ Py_ssize_t wordshift, newsize;
+ assert(start >=0);
+ assert(len >= 0);
+ wordshift = start;
+ newsize = MIN(len, (long)Py_SIZE(a) - wordshift);
+ if (newsize < 0)
+ newsize = 0;
+ if (newsize == 0) {
+ z = _PyLong_New(0);
+ return z;
+ }
+ z = _PyLong_New(newsize);
+ if (z == NULL)
+ goto int_mask1_error;
+ memcpy(z->ob_digit, a->ob_digit + wordshift, newsize * sizeof(digit));
+ z = long_normalize(z);
+ int_mask1_error:
+ return z;
+ }
+
+ static int
+ _long_divrem1(PyLongObject *a, PyLongObject *b,
+ PyLongObject **pdiv, PyLongObject **prem)
+ {
+ Py_ssize_t size_a = Py_SIZE(a), size_b = Py_SIZE(b);
+ PyLongObject *z;
+ if (size_a < size_b ||
+ (size_a == size_b &&
+ a->ob_digit[size_a-1] < b->ob_digit[size_b-1])) {
+ /* |a| < |b|. */
+ *pdiv = _PyLong_New(0);
+ if (*pdiv == NULL)
+ return -1;
+ Py_INCREF(a);
+ *prem = (PyLongObject *) a;
+ return 0;
+ }
+ z = x_divrem(a, b, prem);
+ if (z == NULL)
+ return -1;
+ *pdiv = z;
+ return 0;
+ }
+
+ /* For long division a/b with with n = PySIZE(q), n > DIV_LIMIT use the
+ * binary splitting algorithm by Burnikel and Ziegler
+ * http://cr.yp.to/bib/1998/burnikel.ps
+ * n is required to be even.
+ */
+ #define STUB_EXC(a) if ((a) == NULL) return NULL;
+ static PyObject * long_add(PyLongObject *, PyLongObject *);
+ static PyObject * long_sub(PyLongObject *, PyLongObject *);
+ static PyObject * long_mul(PyLongObject *, PyLongObject *);
+ static PyObject * long_div(PyObject *, PyObject *);
+ static int long_compare(PyLongObject *, PyLongObject *);
+ static PyObject * long_rshift(PyLongObject *, PyLongObject *);
+ static PyObject * long_lshift(PyObject *, PyObject *);
+ static PyObject * long_or(PyObject *, PyObject *);
+ static PyObject * long_and(PyObject *, PyObject *);
+ static PyObject * long_lshift(PyObject *, PyObject *);
+ static PyLongObject * div3n2n(PyLongObject *, digit *, PyLongObject *,
+ PyLongObject *, PyLongObject *, Py_ssize_t , PyLongObject **);
+
+ static PyLongObject *
+ div2n1n(PyLongObject *a, PyLongObject *b, Py_ssize_t n, PyLongObject **prem)
+ {
+ PyLongObject *q = NULL, *r;
+ if (n <= DIV_LIMIT || Py_SIZE(a) < n){
+ _long_divrem1(a, b, &q, &r);
+ *prem = r;
+ return q;
+ }
+ PyLongObject *q1, *q2, *t1, *a1, *b1, *b2;
+ Py_ssize_t half_n = n >> 1;
+ STUB_EXC(b1 = _long_int_rshift(b, half_n));
+ STUB_EXC(b2 = _long_int_mask(b, 0, half_n));
+ STUB_EXC(a1 = _long_int_rshift(a, n));
+ STUB_EXC(q1 = div3n2n(a1, a->ob_digit+half_n, b, b1, b2, half_n, &t1));
+ Py_DECREF(a1);
+ STUB_EXC(q2 = div3n2n(t1, a->ob_digit, b, b1, b2, half_n, &r));
+ Py_DECREF(b1);
+ Py_DECREF(b2);
+ Py_DECREF(t1);
+ if(Py_SIZE(q1)) {
+ STUB_EXC(q = _long_int_lshift(q1, half_n));
+ memcpy(q->ob_digit, q2->ob_digit, Py_SIZE(q2) * sizeof(digit));
+ }
+ else {
+ q = q2;
+ Py_INCREF(q2);
+ }
+ Py_DECREF(q1);
+ Py_DECREF(q2);
+ *prem = r;
+ return q;
+ }
+
+ static PyLongObject * k_mul(PyLongObject *, PyLongObject *);
+ static PyLongObject * x_add(PyLongObject *, PyLongObject *);
+ static PyLongObject * x_sub(PyLongObject *, PyLongObject *);
+
+ /* Helper function for div2n1n; not intended to be called directly. */
+ static PyLongObject *
+ div3n2n(PyLongObject *a12, digit *a3, PyLongObject *b,
+ PyLongObject *b1, PyLongObject *b2, Py_ssize_t n, PyLongObject **prem)
+ {
+ PyLongObject *q, *r, *t1, *t2, *t3;
+ STUB_EXC(t1 = _long_int_rshift(a12, n));
+ if (long_compare(t1, b1) == 0){
+ PyLongObject * one;
+ STUB_EXC(one = (PyLongObject*) PyLong_FromLong(1));
+ STUB_EXC(t2 = _long_int_lshift(one, n));
+ STUB_EXC(q = (PyLongObject *) long_sub(t2, one));
+ Py_DECREF(t2);
+ STUB_EXC(t3 = _long_int_lshift(b1, n));
+ Py_DECREF(t1);
+ STUB_EXC(t1 = (PyLongObject *) long_sub(a12, t3));
+ Py_DECREF(t3);
+ STUB_EXC(r = (PyLongObject *) long_add(t1, b1));
+ }
+ else {
+ STUB_EXC(q = div2n1n(a12, b1, n, &r));
+ }
+ Py_DECREF(t1);
+ if(Py_SIZE(r)) {
+ STUB_EXC(t2 = _long_int_lshift(r, n));
+ memcpy(t2->ob_digit, a3, n * sizeof(digit));
+ }
+ else {
+ STUB_EXC(t2 = _PyLong_New(n));
+ memcpy(t2->ob_digit, a3, n * sizeof(digit));
+ }
+
+ STUB_EXC(t3 = (PyLongObject *) k_mul(q, b2));
+ Py_DECREF(r);
+ STUB_EXC(r = x_sub(t2, t3));
+ Py_DECREF(t2);
+ Py_DECREF(t3);
+ while (Py_SIZE(r) < 0) {
+ if (q->ob_digit[0] > 0)
+ q->ob_digit[0]--;
+ else {
+ digit borrow = 1;
+ q->ob_digit[0] = (-1) & MASK;
+ int i;
+ for (i=1; (i < q->ob_size) && borrow; ++i) {
+ borrow = q->ob_digit[i] - borrow;
+ q->ob_digit[i] = borrow & MASK;
+ borrow >>= SHIFT;
+ borrow &= 1;
+ }
+ assert(borrow == 0);
+ }
+ t1 = r; STUB_EXC(r = (PyLongObject *) long_add(r, b));
+ Py_DECREF(t1);
+ }
+ *prem = r;
+ return q;
+ }
+
+ /* To perform long division a/b where a has more than 2*n, where n is
+ * the number of bits of b, split a in chunks of n nits, then call the
+ * div2n1n algorithm. Since n is required to be even in div2n1n,
+ * in case it is not pad a and b with zeros on the right till n is
+ * a multiple of 2**N, where N is the number of times n must be
+ * divided in the div2n1n algorithm.
+ */
+ static PyLongObject *
+ divmod_pos(PyLongObject *a, PyLongObject *b, PyLongObject **prem)
+ {
+ PyLongObject *a0, *a1, *b1, *q, *r, *q_digit, *t0, *t1, *t2;
+ int asign = (Py_SIZE(a) < 0);
+ int bsign = (Py_SIZE(b) < 0);
+ if (asign) Py_SIZE(a) = - Py_SIZE(a);
+ if (bsign) Py_SIZE(b) = - Py_SIZE(b);
+
+ int na = _PyLong_NumBits((PyObject *) a);
+ int n = _PyLong_NumBits((PyObject *) b);
+ // make n a multiple of PyLong_SHIFT
+ int nr = n%PyLong_SHIFT;
+ int pad;
+ STUB_EXC(t0 = (PyLongObject*) PyLong_FromLong(PyLong_SHIFT - nr));
+ if (nr > 0) {
+ pad = 1;
+ n = n + PyLong_SHIFT - n%PyLong_SHIFT;
+ na = na + PyLong_SHIFT - n%PyLong_SHIFT;
+ STUB_EXC(a1 = (PyLongObject *) long_lshift((PyObject *) a,
+ (PyObject *) t0));
+ STUB_EXC(b1 = (PyLongObject *) long_lshift((PyObject *) b,
+ (PyObject *) t0));
+ }
+ else {
+ pad = 0;
+ a1 = a;
+ Py_INCREF(a);
+ b1 = b;
+ Py_INCREF(b);
+ }
+
+ /* estimates the number of times n must be divided by to by div2n1n
+ * before falling back to x_divrem; increase till it can be divided
+ * that number of times
+ */
+ int nab = na/n + 1;
+ int n_S = n/PyLong_SHIFT;
+ PyLongObject *lnn;
+ STUB_EXC(lnn = (PyLongObject*) PyLong_FromLong(n_S/DIV_LIMIT));
+ int nn = _PyLong_NumBits((PyObject *) lnn);
+ int mask_n = (1 << nn) - 1;
+ int n1 = n_S;
+ while(n1 & mask_n)
+ n1++;
+ int shift_n = n1 - n_S;
+ Py_DECREF(lnn);
+ n_S = n1;
+ t1 = a1; STUB_EXC(a1 = _long_int_lshift(a1, shift_n)); Py_DECREF(t1);
+ t1 = b1; STUB_EXC(b1 = _long_int_lshift(b1, shift_n)); Py_DECREF(t1);
+
+ // slit a in chunks sized n_S
+ PyLongObject ** a_digits =
+ (PyLongObject **) calloc(nab, sizeof(PyLongObject *));
+ STUB_EXC(a_digits);
+ STUB_EXC(a0 = (PyLongObject *) _PyLong_Copy(a1));
+
+ int i, top;
+ for(i=0; i < nab; i++) {
+ STUB_EXC(a_digits[i] = _long_int_mask(a0, 0, n_S));
+ t1 = a0; STUB_EXC(a0 = _long_int_rshift(a0, n_S));
+ Py_DECREF(t1);
+ if (Py_SIZE(a0) == 0) {
+ break;
+ }
+ }
+ top = i;
+ Py_DECREF(a0);
+ if (long_compare(a_digits[i], b1) >= 0) {
+ STUB_EXC(r = (PyLongObject*) PyLong_FromLong(0));
+ }
+ else {
+ STUB_EXC(r = (PyLongObject *) _PyLong_Copy(a_digits[i--]));
+ }
+ STUB_EXC(q = (PyLongObject*) PyLong_FromLong(0));
+ while(i >= 0) {
+ STUB_EXC(t1 = _long_int_lshift(r, n_S));
+ STUB_EXC(t2 = (PyLongObject *) long_add(t1, a_digits[i--]));
+ Py_DECREF(t1);
+ Py_DECREF(r);
+ STUB_EXC(q_digit = div2n1n(t2, b1, n_S, &r));
+ Py_DECREF(t2);
+ STUB_EXC(t1 = _long_int_lshift(q, n_S));
+ Py_DECREF(q);
+ STUB_EXC(q = (PyLongObject *) long_add(t1, q_digit));
+ Py_DECREF(t1);
+ Py_DECREF(q_digit);
+ }
+
+ for(i=top; i >= 0; i--) {
+ Py_XDECREF(a_digits[i]);
+ }
+ free(a_digits);
+
+ if (pad) {
+ t1 = r;
+ STUB_EXC(r = (PyLongObject *) long_rshift(r, t0));
+ Py_DECREF(t1);
+ }
+ t1 = r; STUB_EXC(r = _long_int_rshift(r, shift_n)); Py_DECREF(t1);
+
+ if (asign) Py_SIZE(a) = - Py_SIZE(a);
+ if (bsign) Py_SIZE(b) = - Py_SIZE(b);
+ Py_DECREF(a1);
+ Py_DECREF(b1);
+ Py_DECREF(t0);
+ *prem = r;
+ return q;
+ }
+
+
/* Methods */
static void