Blob


1 #
2 # mpvecdigmul(mpdigit *b, int n, mpdigit m, mpdigit *p)
3 #
4 # p += b*m
5 #
6 # each step look like:
7 # hi,lo = m*b[i]
8 # lo += oldhi + carry
9 # hi += carry
10 # p[i] += lo
11 # oldhi = hi
12 #
13 # the registers are:
14 # hi = DX - constrained by hardware
15 # lo = AX - constrained by hardware
16 # b+n = SI - can't be BP
17 # p+n = DI - can't be BP
18 # i-n = BP
19 # m = BX
20 # oldhi = CX
21 #
23 .text
25 .p2align 2,0x90
26 .globl mpvecdigmuladd
27 mpvecdigmuladd:
28 # Prelude
29 pushl %ebp # save on stack
30 pushl %ebx
31 pushl %esi
32 pushl %edi
34 leal 20(%esp), %ebp # %ebp = FP for now
35 movl 0(%ebp), %esi # b
36 movl 4(%ebp), %ecx # n
37 movl 8(%ebp), %ebx # m
38 movl 12(%ebp), %edi # p
39 movl %ecx, %ebp
40 negl %ebp # BP = -n
41 shll $2, %ecx
42 addl %ecx, %esi # SI = b + n
43 addl %ecx, %edi # DI = p + n
44 xorl %ecx, %ecx
45 _muladdloop:
46 movl (%esi, %ebp, 4), %eax # lo = b[i]
47 mull %ebx # hi, lo = b[i] * m
48 addl %ecx,%eax # lo += oldhi
49 jae _muladdnocarry1
50 incl %edx # hi += carry
51 _muladdnocarry1:
52 addl %eax, (%edi, %ebp, 4) # p[i] += lo
53 jae _muladdnocarry2
54 incl %edx # hi += carry
55 _muladdnocarry2:
56 movl %edx, %ecx # oldhi = hi
57 incl %ebp # i++
58 jnz _muladdloop
59 xorl %eax, %eax
60 addl %ecx, (%edi, %ebp, 4) # p[n] + oldhi
61 adcl %eax, %eax # return carry out of p[n]
63 # Postlude
64 popl %edi
65 popl %esi
66 popl %ebx
67 popl %ebp
68 ret