Blob


1 /*
2 * mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p)
3 *
4 * p -= b*m
5 *
6 * each step look like:
7 * hi,lo = m*b[i]
8 * lo += oldhi + carry
9 * hi += carry
10 * p[i] += lo
11 * oldhi = hi
12 *
13 * the registers are:
14 * hi = DX - constrained by hardware
15 * lo = AX - constrained by hardware
16 * b = SI - can't be BP
17 * p = DI - can't be BP
18 * i = BP
19 * n = CX - constrained by LOOP instr
20 * m = BX
21 * oldhi = EX
22 *
23 */
24 .text
26 /* XXX: had to use "-4(%esp)" kludge to get around inability to
27 * push/pop without first adjusting %esp. This may not be
28 * as fast as using push/pop (and accessing pushed element
29 * with "(%esp)".)
30 */
32 .p2align 2,0x90
33 .globl mpvecdigmulsub
34 .type mpvecdigmulsub, @function
35 mpvecdigmulsub:
36 /* Prelude */
37 pushl %ebp
38 movl %ebx, -8(%esp) /* save on stack */
39 movl %esi, -12(%esp)
40 movl %edi, -16(%esp)
42 movl 8(%esp), %esi /* b */
43 movl 12(%esp), %ecx /* n */
44 movl 16(%esp), %ebx /* m */
45 movl 20(%esp), %edi /* p */
46 xorl %ebp, %ebp
47 movl %ebp, -4(%esp)
48 _mulsubloop:
49 movl (%esi, %ebp, 4),%eax /* lo = b[i] */
50 mull %ebx /* hi, lo = b[i] * m */
51 addl -4(%esp), %eax /* lo += oldhi */
52 jae _mulsubnocarry1
53 incl %edx /* hi += carry */
54 _mulsubnocarry1:
55 subl %eax, (%edi, %ebp, 4)
56 jae _mulsubnocarry2
57 incl %edx /* hi += carry */
58 _mulsubnocarry2:
59 movl %edx, -4(%esp)
60 incl %ebp
61 loop _mulsubloop
62 movl -4(%esp), %eax
63 subl %eax, (%edi, %ebp, 4)
64 jae _mulsubnocarry3
65 movl $-1, %eax
66 jmp done
68 _mulsubnocarry3:
69 movl $1, %eax
71 done:
72 /* Postlude */
73 movl -8(%esp), %ebx /* restore from stack */
74 movl -12(%esp), %esi
75 movl -16(%esp), %edi
76 movl %esp, %ebp
77 leave
78 ret