Blob


1 /*
2 * mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p)
3 *
4 * p -= b*m
5 *
6 * each step look like:
7 * hi,lo = m*b[i]
8 * lo += oldhi + carry
9 * hi += carry
10 * p[i] += lo
11 * oldhi = hi
12 *
13 * the registers are:
14 * hi = DX - constrained by hardware
15 * lo = AX - constrained by hardware
16 * b = SI - can't be BP
17 * p = DI - can't be BP
18 * i = BP
19 * n = CX - constrained by LOOP instr
20 * m = BX
21 * oldhi = EX
22 *
23 */
24 .text
26 .p2align 2,0x90
27 .globl mpvecdigmulsub
28 mpvecdigmulsub:
29 /* Prelude */
30 pushl %ebp /* save on stack */
31 pushl %ebx
32 pushl %esi
33 pushl %edi
35 leal 20(%esp), %ebp /* %ebp = FP for now */
36 movl 0(%ebp), %esi /* b */
37 movl 4(%ebp), %ecx /* n */
38 movl 8(%ebp), %ebx /* m */
39 movl 12(%ebp), %edi /* p */
40 xorl %ebp, %ebp
41 pushl %ebp
42 _mulsubloop:
43 movl (%esi, %ebp, 4),%eax /* lo = b[i] */
44 mull %ebx /* hi, lo = b[i] * m */
45 addl 0(%esp), %eax /* lo += oldhi */
46 jae _mulsubnocarry1
47 incl %edx /* hi += carry */
48 _mulsubnocarry1:
49 subl %eax, (%edi, %ebp, 4)
50 jae _mulsubnocarry2
51 incl %edx /* hi += carry */
52 _mulsubnocarry2:
53 movl %edx, 0(%esp)
54 incl %ebp
55 loop _mulsubloop
56 popl %eax
57 subl %eax, (%edi, %ebp, 4)
58 jae _mulsubnocarry3
59 movl $-1, %eax
60 jmp done
61 _mulsubnocarry3:
62 movl $1, %eax
63 done:
64 /* Postlude */
65 popl %edi
66 popl %esi
67 popl %ebx
68 popl %ebp
69 ret