Blob


1 /*
2 * mpvecdigmul(mpdigit *b, int n, mpdigit m, mpdigit *p)
3 *
4 * p += b*m
5 *
6 * each step look like:
7 * hi,lo = m*b[i]
8 * lo += oldhi + carry
9 * hi += carry
10 * p[i] += lo
11 * oldhi = hi
12 *
13 * the registers are:
14 * hi = DX - constrained by hardware
15 * lo = AX - constrained by hardware
16 * b+n = SI - can't be BP
17 * p+n = DI - can't be BP
18 * i-n = BP
19 * m = BX
20 * oldhi = CX
21 *
22 */
23 .text
25 .globl _mpvecdigmuladd
26 _mpvecdigmuladd:
27 /* Prelude */
28 pushl %ebp /* save on stack */
29 pushl %ebx
30 pushl %esi
31 pushl %edi
33 leal 20(%esp), %ebp /* %ebp = FP for now */
34 movl 0(%ebp), %esi /* b */
35 movl 4(%ebp), %ecx /* n */
36 movl 8(%ebp), %ebx /* m */
37 movl 12(%ebp), %edi /* p */
38 movl %ecx, %ebp
39 negl %ebp /* BP = -n */
40 shll $2, %ecx
41 addl %ecx, %esi /* SI = b + n */
42 addl %ecx, %edi /* DI = p + n */
43 xorl %ecx, %ecx
44 1:
45 movl (%esi, %ebp, 4), %eax /* lo = b[i] */
46 mull %ebx /* hi, lo = b[i] * m */
47 addl %ecx,%eax /* lo += oldhi */
48 jae 2f
49 incl %edx /* hi += carry */
50 2:
51 addl %eax, (%edi, %ebp, 4) /* p[i] += lo */
52 jae 3f
53 incl %edx /* hi += carry */
54 3:
55 movl %edx, %ecx /* oldhi = hi */
56 incl %ebp /* i++ */
57 jnz 1b
58 xorl %eax, %eax
59 addl %ecx, (%edi, %ebp, 4) /* p[n] + oldhi */
60 adcl %eax, %eax /* return carry out of p[n] */
62 /* Postlude */
63 popl %edi
64 popl %esi
65 popl %ebx
66 popl %ebp
67 ret