~ubuntu-branches/ubuntu/intrepid/ecl/intrepid

« back to all changes in this revision

Viewing changes to src/gmp/mpn/alpha/ev5/lshift.asm

  • Committer: Bazaar Package Importer
  • Author(s): Peter Van Eynde
  • Date: 2006-05-17 02:46:26 UTC
  • Revision ID: james.westby@ubuntu.com-20060517024626-lljr08ftv9g9vefl
Tags: upstream-0.9h-20060510
ImportĀ upstreamĀ versionĀ 0.9h-20060510

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
dnl  Alpha EV5 __gmpn_lshift -- Shift a number left.
 
2
 
 
3
dnl  Copyright 1994, 1995, 2000 Free Software Foundation, Inc.
 
4
 
 
5
dnl  This file is part of the GNU MP Library.
 
6
 
 
7
dnl  The GNU MP Library is free software; you can redistribute it and/or modify
 
8
dnl  it under the terms of the GNU Lesser General Public License as published
 
9
dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
 
10
dnl  your option) any later version.
 
11
 
 
12
dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 
13
dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 
14
dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 
15
dnl  License for more details.
 
16
 
 
17
dnl  You should have received a copy of the GNU Lesser General Public License
 
18
dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
 
19
dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 
20
dnl  MA 02111-1307, USA.
 
21
 
 
22
include(`../config.m4')
 
23
 
 
24
dnl  INPUT PARAMETERS
 
25
dnl  res_ptr    r16
 
26
dnl  s1_ptr     r17
 
27
dnl  size       r18
 
28
dnl  cnt        r19
 
29
 
 
30
dnl  This code runs at 3.25 cycles/limb on the EV5.
 
31
 
 
32
ASM_START()
 
33
PROLOGUE(mpn_lshift)
 
34
        s8addq  r18,r17,r17     C make r17 point at end of s1
 
35
        ldq     r4,-8(r17)      C load first limb
 
36
        subq    r31,r19,r20
 
37
        s8addq  r18,r16,r16     C make r16 point at end of RES
 
38
        subq    r18,1,r18
 
39
        and     r18,4-1,r28     C number of limbs in first loop
 
40
        srl     r4,r20,r0       C compute function result
 
41
 
 
42
        beq     r28,$L0
 
43
        subq    r18,r28,r18
 
44
 
 
45
        ALIGN(8)
 
46
$Loop0: ldq     r3,-16(r17)
 
47
        subq    r16,8,r16
 
48
        sll     r4,r19,r5
 
49
        subq    r17,8,r17
 
50
        subq    r28,1,r28
 
51
        srl     r3,r20,r6
 
52
        bis     r3,r3,r4
 
53
        bis     r5,r6,r8
 
54
        stq     r8,0(r16)
 
55
        bne     r28,$Loop0
 
56
 
 
57
$L0:    sll     r4,r19,r24
 
58
        beq     r18,$Lend
 
59
C warm up phase 1
 
60
        ldq     r1,-16(r17)
 
61
        subq    r18,4,r18
 
62
        ldq     r2,-24(r17)
 
63
        ldq     r3,-32(r17)
 
64
        ldq     r4,-40(r17)
 
65
        beq     r18,$Lend1
 
66
C warm up phase 2
 
67
        srl     r1,r20,r7
 
68
        sll     r1,r19,r21
 
69
        srl     r2,r20,r8
 
70
        ldq     r1,-48(r17)
 
71
        sll     r2,r19,r22
 
72
        ldq     r2,-56(r17)
 
73
        srl     r3,r20,r5
 
74
        bis     r7,r24,r7
 
75
        sll     r3,r19,r23
 
76
        bis     r8,r21,r8
 
77
        srl     r4,r20,r6
 
78
        ldq     r3,-64(r17)
 
79
        sll     r4,r19,r24
 
80
        ldq     r4,-72(r17)
 
81
        subq    r18,4,r18
 
82
        beq     r18,$Lend2
 
83
        ALIGN(16)
 
84
C main loop
 
85
$Loop:  stq     r7,-8(r16)
 
86
        bis     r5,r22,r5
 
87
        stq     r8,-16(r16)
 
88
        bis     r6,r23,r6
 
89
 
 
90
        srl     r1,r20,r7
 
91
        subq    r18,4,r18
 
92
        sll     r1,r19,r21
 
93
        unop    C ldq   r31,-96(r17)
 
94
 
 
95
        srl     r2,r20,r8
 
96
        ldq     r1,-80(r17)
 
97
        sll     r2,r19,r22
 
98
        ldq     r2,-88(r17)
 
99
 
 
100
        stq     r5,-24(r16)
 
101
        bis     r7,r24,r7
 
102
        stq     r6,-32(r16)
 
103
        bis     r8,r21,r8
 
104
 
 
105
        srl     r3,r20,r5
 
106
        unop    C ldq   r31,-96(r17)
 
107
        sll     r3,r19,r23
 
108
        subq    r16,32,r16
 
109
 
 
110
        srl     r4,r20,r6
 
111
        ldq     r3,-96(r17)
 
112
        sll     r4,r19,r24
 
113
        ldq     r4,-104(r17)
 
114
 
 
115
        subq    r17,32,r17
 
116
        bne     r18,$Loop
 
117
C cool down phase 2/1
 
118
$Lend2: stq     r7,-8(r16)
 
119
        bis     r5,r22,r5
 
120
        stq     r8,-16(r16)
 
121
        bis     r6,r23,r6
 
122
        srl     r1,r20,r7
 
123
        sll     r1,r19,r21
 
124
        srl     r2,r20,r8
 
125
        sll     r2,r19,r22
 
126
        stq     r5,-24(r16)
 
127
        bis     r7,r24,r7
 
128
        stq     r6,-32(r16)
 
129
        bis     r8,r21,r8
 
130
        srl     r3,r20,r5
 
131
        sll     r3,r19,r23
 
132
        srl     r4,r20,r6
 
133
        sll     r4,r19,r24
 
134
C cool down phase 2/2
 
135
        stq     r7,-40(r16)
 
136
        bis     r5,r22,r5
 
137
        stq     r8,-48(r16)
 
138
        bis     r6,r23,r6
 
139
        stq     r5,-56(r16)
 
140
        stq     r6,-64(r16)
 
141
C cool down phase 2/3
 
142
        stq     r24,-72(r16)
 
143
        ret     r31,(r26),1
 
144
 
 
145
C cool down phase 1/1
 
146
$Lend1: srl     r1,r20,r7
 
147
        sll     r1,r19,r21
 
148
        srl     r2,r20,r8
 
149
        sll     r2,r19,r22
 
150
        srl     r3,r20,r5
 
151
        bis     r7,r24,r7
 
152
        sll     r3,r19,r23
 
153
        bis     r8,r21,r8
 
154
        srl     r4,r20,r6
 
155
        sll     r4,r19,r24
 
156
C cool down phase 1/2
 
157
        stq     r7,-8(r16)
 
158
        bis     r5,r22,r5
 
159
        stq     r8,-16(r16)
 
160
        bis     r6,r23,r6
 
161
        stq     r5,-24(r16)
 
162
        stq     r6,-32(r16)
 
163
        stq     r24,-40(r16)
 
164
        ret     r31,(r26),1
 
165
 
 
166
$Lend:  stq     r24,-8(r16)
 
167
        ret     r31,(r26),1
 
168
EPILOGUE(mpn_lshift)
 
169
ASM_END()