Blame


1 ac0e2db6 2004-04-21 devnull /*
2 ac0e2db6 2004-04-21 devnull * mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p)
3 ac0e2db6 2004-04-21 devnull *
4 ac0e2db6 2004-04-21 devnull * p -= b*m
5 ac0e2db6 2004-04-21 devnull *
6 ac0e2db6 2004-04-21 devnull * each step look like:
7 ac0e2db6 2004-04-21 devnull * hi,lo = m*b[i]
8 ac0e2db6 2004-04-21 devnull * lo += oldhi + carry
9 ac0e2db6 2004-04-21 devnull * hi += carry
10 ac0e2db6 2004-04-21 devnull * p[i] += lo
11 ac0e2db6 2004-04-21 devnull * oldhi = hi
12 ac0e2db6 2004-04-21 devnull *
13 ac0e2db6 2004-04-21 devnull * the registers are:
14 ac0e2db6 2004-04-21 devnull * hi = DX - constrained by hardware
15 ac0e2db6 2004-04-21 devnull * lo = AX - constrained by hardware
16 ac0e2db6 2004-04-21 devnull * b = SI - can't be BP
17 ac0e2db6 2004-04-21 devnull * p = DI - can't be BP
18 ac0e2db6 2004-04-21 devnull * i = BP
19 ac0e2db6 2004-04-21 devnull * n = CX - constrained by LOOP instr
20 ac0e2db6 2004-04-21 devnull * m = BX
21 ac0e2db6 2004-04-21 devnull * oldhi = EX
22 ac0e2db6 2004-04-21 devnull *
23 ac0e2db6 2004-04-21 devnull */
24 ac0e2db6 2004-04-21 devnull .text
25 ac0e2db6 2004-04-21 devnull
26 ac0e2db6 2004-04-21 devnull /* XXX: had to use "-4(%esp)" kludge to get around inability to
27 ac0e2db6 2004-04-21 devnull * push/pop without first adjusting %esp. This may not be
28 ac0e2db6 2004-04-21 devnull * as fast as using push/pop (and accessing pushed element
29 ac0e2db6 2004-04-21 devnull * with "(%esp)".)
30 ac0e2db6 2004-04-21 devnull */
31 ac0e2db6 2004-04-21 devnull
32 ac0e2db6 2004-04-21 devnull .p2align 2,0x90
33 ac0e2db6 2004-04-21 devnull .globl mpvecdigmulsub
34 ac0e2db6 2004-04-21 devnull .type mpvecdigmulsub, @function
35 ac0e2db6 2004-04-21 devnull mpvecdigmulsub:
36 ac0e2db6 2004-04-21 devnull /* Prelude */
37 ac0e2db6 2004-04-21 devnull pushl %ebp
38 ac0e2db6 2004-04-21 devnull movl %ebx, -8(%esp) /* save on stack */
39 ac0e2db6 2004-04-21 devnull movl %esi, -12(%esp)
40 ac0e2db6 2004-04-21 devnull movl %edi, -16(%esp)
41 ac0e2db6 2004-04-21 devnull
42 ac0e2db6 2004-04-21 devnull movl 8(%esp), %esi /* b */
43 ac0e2db6 2004-04-21 devnull movl 12(%esp), %ecx /* n */
44 ac0e2db6 2004-04-21 devnull movl 16(%esp), %ebx /* m */
45 ac0e2db6 2004-04-21 devnull movl 20(%esp), %edi /* p */
46 ac0e2db6 2004-04-21 devnull xorl %ebp, %ebp
47 ac0e2db6 2004-04-21 devnull movl %ebp, -4(%esp)
48 ac0e2db6 2004-04-21 devnull _mulsubloop:
49 ac0e2db6 2004-04-21 devnull movl (%esi, %ebp, 4),%eax /* lo = b[i] */
50 ac0e2db6 2004-04-21 devnull mull %ebx /* hi, lo = b[i] * m */
51 ac0e2db6 2004-04-21 devnull addl -4(%esp), %eax /* lo += oldhi */
52 ac0e2db6 2004-04-21 devnull jae _mulsubnocarry1
53 ac0e2db6 2004-04-21 devnull incl %edx /* hi += carry */
54 ac0e2db6 2004-04-21 devnull _mulsubnocarry1:
55 ac0e2db6 2004-04-21 devnull subl %eax, (%edi, %ebp, 4)
56 ac0e2db6 2004-04-21 devnull jae _mulsubnocarry2
57 ac0e2db6 2004-04-21 devnull incl %edx /* hi += carry */
58 ac0e2db6 2004-04-21 devnull _mulsubnocarry2:
59 ac0e2db6 2004-04-21 devnull movl %edx, -4(%esp)
60 ac0e2db6 2004-04-21 devnull incl %ebp
61 ac0e2db6 2004-04-21 devnull loop _mulsubloop
62 ac0e2db6 2004-04-21 devnull movl -4(%esp), %eax
63 ac0e2db6 2004-04-21 devnull subl %eax, (%edi, %ebp, 4)
64 ac0e2db6 2004-04-21 devnull jae _mulsubnocarry3
65 ac0e2db6 2004-04-21 devnull movl $-1, %eax
66 ac0e2db6 2004-04-21 devnull jmp done
67 ac0e2db6 2004-04-21 devnull
68 ac0e2db6 2004-04-21 devnull _mulsubnocarry3:
69 ac0e2db6 2004-04-21 devnull movl $1, %eax
70 ac0e2db6 2004-04-21 devnull
71 ac0e2db6 2004-04-21 devnull done:
72 ac0e2db6 2004-04-21 devnull /* Postlude */
73 ac0e2db6 2004-04-21 devnull movl -8(%esp), %ebx /* restore from stack */
74 ac0e2db6 2004-04-21 devnull movl -12(%esp), %esi
75 ac0e2db6 2004-04-21 devnull movl -16(%esp), %edi
76 ac0e2db6 2004-04-21 devnull movl %esp, %ebp
77 ac0e2db6 2004-04-21 devnull leave
78 ac0e2db6 2004-04-21 devnull ret