Blame


1 b3f61791 2004-03-21 devnull #include "os.h"
2 b3f61791 2004-03-21 devnull #include <mp.h>
3 b3f61791 2004-03-21 devnull #include "dat.h"
4 b3f61791 2004-03-21 devnull
5 cbeb0b26 2006-04-01 devnull /* */
6 cbeb0b26 2006-04-01 devnull /* from knuth's 1969 seminumberical algorithms, pp 233-235 and pp 258-260 */
7 cbeb0b26 2006-04-01 devnull /* */
8 cbeb0b26 2006-04-01 devnull /* mpvecmul is an assembly language routine that performs the inner */
9 cbeb0b26 2006-04-01 devnull /* loop. */
10 cbeb0b26 2006-04-01 devnull /* */
11 cbeb0b26 2006-04-01 devnull /* the karatsuba trade off is set empiricly by measuring the algs on */
12 cbeb0b26 2006-04-01 devnull /* a 400 MHz Pentium II. */
13 cbeb0b26 2006-04-01 devnull /* */
14 b3f61791 2004-03-21 devnull
15 cbeb0b26 2006-04-01 devnull /* karatsuba like (see knuth pg 258) */
16 cbeb0b26 2006-04-01 devnull /* prereq: p is already zeroed */
17 b3f61791 2004-03-21 devnull static void
18 b3f61791 2004-03-21 devnull mpkaratsuba(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *p)
19 b3f61791 2004-03-21 devnull {
20 b3f61791 2004-03-21 devnull mpdigit *t, *u0, *u1, *v0, *v1, *u0v0, *u1v1, *res, *diffprod;
21 b3f61791 2004-03-21 devnull int u0len, u1len, v0len, v1len, reslen;
22 b3f61791 2004-03-21 devnull int sign, n;
23 b3f61791 2004-03-21 devnull
24 cbeb0b26 2006-04-01 devnull /* divide each piece in half */
25 b3f61791 2004-03-21 devnull n = alen/2;
26 b3f61791 2004-03-21 devnull if(alen&1)
27 b3f61791 2004-03-21 devnull n++;
28 b3f61791 2004-03-21 devnull u0len = n;
29 b3f61791 2004-03-21 devnull u1len = alen-n;
30 b3f61791 2004-03-21 devnull if(blen > n){
31 b3f61791 2004-03-21 devnull v0len = n;
32 b3f61791 2004-03-21 devnull v1len = blen-n;
33 b3f61791 2004-03-21 devnull } else {
34 b3f61791 2004-03-21 devnull v0len = blen;
35 b3f61791 2004-03-21 devnull v1len = 0;
36 b3f61791 2004-03-21 devnull }
37 b3f61791 2004-03-21 devnull u0 = a;
38 b3f61791 2004-03-21 devnull u1 = a + u0len;
39 b3f61791 2004-03-21 devnull v0 = b;
40 b3f61791 2004-03-21 devnull v1 = b + v0len;
41 b3f61791 2004-03-21 devnull
42 cbeb0b26 2006-04-01 devnull /* room for the partial products */
43 b3f61791 2004-03-21 devnull t = mallocz(Dbytes*5*(2*n+1), 1);
44 b3f61791 2004-03-21 devnull if(t == nil)
45 b3f61791 2004-03-21 devnull sysfatal("mpkaratsuba: %r");
46 b3f61791 2004-03-21 devnull u0v0 = t;
47 b3f61791 2004-03-21 devnull u1v1 = t + (2*n+1);
48 b3f61791 2004-03-21 devnull diffprod = t + 2*(2*n+1);
49 b3f61791 2004-03-21 devnull res = t + 3*(2*n+1);
50 b3f61791 2004-03-21 devnull reslen = 4*n+1;
51 b3f61791 2004-03-21 devnull
52 cbeb0b26 2006-04-01 devnull /* t[0] = (u1-u0) */
53 b3f61791 2004-03-21 devnull sign = 1;
54 b3f61791 2004-03-21 devnull if(mpveccmp(u1, u1len, u0, u0len) < 0){
55 b3f61791 2004-03-21 devnull sign = -1;
56 b3f61791 2004-03-21 devnull mpvecsub(u0, u0len, u1, u1len, u0v0);
57 b3f61791 2004-03-21 devnull } else
58 b3f61791 2004-03-21 devnull mpvecsub(u1, u1len, u0, u1len, u0v0);
59 b3f61791 2004-03-21 devnull
60 cbeb0b26 2006-04-01 devnull /* t[1] = (v0-v1) */
61 b3f61791 2004-03-21 devnull if(mpveccmp(v0, v0len, v1, v1len) < 0){
62 b3f61791 2004-03-21 devnull sign *= -1;
63 b3f61791 2004-03-21 devnull mpvecsub(v1, v1len, v0, v1len, u1v1);
64 b3f61791 2004-03-21 devnull } else
65 b3f61791 2004-03-21 devnull mpvecsub(v0, v0len, v1, v1len, u1v1);
66 b3f61791 2004-03-21 devnull
67 cbeb0b26 2006-04-01 devnull /* t[4:5] = (u1-u0)*(v0-v1) */
68 b3f61791 2004-03-21 devnull mpvecmul(u0v0, u0len, u1v1, v0len, diffprod);
69 b3f61791 2004-03-21 devnull
70 cbeb0b26 2006-04-01 devnull /* t[0:1] = u1*v1 */
71 b3f61791 2004-03-21 devnull memset(t, 0, 2*(2*n+1)*Dbytes);
72 b3f61791 2004-03-21 devnull if(v1len > 0)
73 b3f61791 2004-03-21 devnull mpvecmul(u1, u1len, v1, v1len, u1v1);
74 b3f61791 2004-03-21 devnull
75 cbeb0b26 2006-04-01 devnull /* t[2:3] = u0v0 */
76 b3f61791 2004-03-21 devnull mpvecmul(u0, u0len, v0, v0len, u0v0);
77 b3f61791 2004-03-21 devnull
78 cbeb0b26 2006-04-01 devnull /* res = u0*v0<<n + u0*v0 */
79 b3f61791 2004-03-21 devnull mpvecadd(res, reslen, u0v0, u0len+v0len, res);
80 b3f61791 2004-03-21 devnull mpvecadd(res+n, reslen-n, u0v0, u0len+v0len, res+n);
81 b3f61791 2004-03-21 devnull
82 cbeb0b26 2006-04-01 devnull /* res += u1*v1<<n + u1*v1<<2*n */
83 b3f61791 2004-03-21 devnull if(v1len > 0){
84 b3f61791 2004-03-21 devnull mpvecadd(res+n, reslen-n, u1v1, u1len+v1len, res+n);
85 b3f61791 2004-03-21 devnull mpvecadd(res+2*n, reslen-2*n, u1v1, u1len+v1len, res+2*n);
86 b3f61791 2004-03-21 devnull }
87 b3f61791 2004-03-21 devnull
88 cbeb0b26 2006-04-01 devnull /* res += (u1-u0)*(v0-v1)<<n */
89 b3f61791 2004-03-21 devnull if(sign < 0)
90 b3f61791 2004-03-21 devnull mpvecsub(res+n, reslen-n, diffprod, u0len+v0len, res+n);
91 b3f61791 2004-03-21 devnull else
92 b3f61791 2004-03-21 devnull mpvecadd(res+n, reslen-n, diffprod, u0len+v0len, res+n);
93 b3f61791 2004-03-21 devnull memmove(p, res, (alen+blen)*Dbytes);
94 b3f61791 2004-03-21 devnull
95 b3f61791 2004-03-21 devnull free(t);
96 b3f61791 2004-03-21 devnull }
97 b3f61791 2004-03-21 devnull
98 b3f61791 2004-03-21 devnull #define KARATSUBAMIN 32
99 b3f61791 2004-03-21 devnull
100 b3f61791 2004-03-21 devnull void
101 b3f61791 2004-03-21 devnull mpvecmul(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *p)
102 b3f61791 2004-03-21 devnull {
103 b3f61791 2004-03-21 devnull int i;
104 b3f61791 2004-03-21 devnull mpdigit d;
105 b3f61791 2004-03-21 devnull mpdigit *t;
106 b3f61791 2004-03-21 devnull
107 cbeb0b26 2006-04-01 devnull /* both mpvecdigmuladd and karatsuba are fastest when a is the longer vector */
108 b3f61791 2004-03-21 devnull if(alen < blen){
109 b3f61791 2004-03-21 devnull i = alen;
110 b3f61791 2004-03-21 devnull alen = blen;
111 b3f61791 2004-03-21 devnull blen = i;
112 b3f61791 2004-03-21 devnull t = a;
113 b3f61791 2004-03-21 devnull a = b;
114 b3f61791 2004-03-21 devnull b = t;
115 b3f61791 2004-03-21 devnull }
116 b3f61791 2004-03-21 devnull if(blen == 0){
117 b3f61791 2004-03-21 devnull memset(p, 0, Dbytes*(alen+blen));
118 b3f61791 2004-03-21 devnull return;
119 b3f61791 2004-03-21 devnull }
120 b3f61791 2004-03-21 devnull
121 b3f61791 2004-03-21 devnull if(alen >= KARATSUBAMIN && blen > 1){
122 cbeb0b26 2006-04-01 devnull /* O(n^1.585) */
123 b3f61791 2004-03-21 devnull mpkaratsuba(a, alen, b, blen, p);
124 b3f61791 2004-03-21 devnull } else {
125 cbeb0b26 2006-04-01 devnull /* O(n^2) */
126 b3f61791 2004-03-21 devnull for(i = 0; i < blen; i++){
127 b3f61791 2004-03-21 devnull d = b[i];
128 b3f61791 2004-03-21 devnull if(d != 0)
129 b3f61791 2004-03-21 devnull mpvecdigmuladd(a, alen, d, &p[i]);
130 b3f61791 2004-03-21 devnull }
131 b3f61791 2004-03-21 devnull }
132 b3f61791 2004-03-21 devnull }
133 b3f61791 2004-03-21 devnull
134 b3f61791 2004-03-21 devnull void
135 b3f61791 2004-03-21 devnull mpmul(mpint *b1, mpint *b2, mpint *prod)
136 b3f61791 2004-03-21 devnull {
137 b3f61791 2004-03-21 devnull mpint *oprod;
138 b3f61791 2004-03-21 devnull
139 b3f61791 2004-03-21 devnull oprod = nil;
140 b3f61791 2004-03-21 devnull if(prod == b1 || prod == b2){
141 b3f61791 2004-03-21 devnull oprod = prod;
142 b3f61791 2004-03-21 devnull prod = mpnew(0);
143 b3f61791 2004-03-21 devnull }
144 b3f61791 2004-03-21 devnull
145 b3f61791 2004-03-21 devnull prod->top = 0;
146 b3f61791 2004-03-21 devnull mpbits(prod, (b1->top+b2->top+1)*Dbits);
147 b3f61791 2004-03-21 devnull mpvecmul(b1->p, b1->top, b2->p, b2->top, prod->p);
148 b3f61791 2004-03-21 devnull prod->top = b1->top+b2->top+1;
149 b3f61791 2004-03-21 devnull prod->sign = b1->sign*b2->sign;
150 b3f61791 2004-03-21 devnull mpnorm(prod);
151 b3f61791 2004-03-21 devnull
152 b3f61791 2004-03-21 devnull if(oprod != nil){
153 b3f61791 2004-03-21 devnull mpassign(prod, oprod);
154 b3f61791 2004-03-21 devnull mpfree(prod);
155 b3f61791 2004-03-21 devnull }
156 b3f61791 2004-03-21 devnull }