3
# ====================================================================
4
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5
# project. The module is, however, dual licensed under OpenSSL and
6
# CRYPTOGAMS licenses depending on where you obtain it. For further
7
# details see http://www.openssl.org/~appro/cryptogams/.
8
# ====================================================================
10
# I let hardware handle unaligned input(*), except on page boundaries
11
# (see below for details). Otherwise straightforward implementation
12
# with X vector in register bank. The module is big-endian [which is
13
# not big deal as there're no little-endian targets left around].
15
# (*) this means that this module is inappropriate for PPC403? Does
16
# anybody know if pre-POWER3 can sustain unaligned load?
19
# ----------------------------------
20
# PPC970,gcc-4.0.0 +76% +59%
21
# Power6,xlc-7 +68% +33%
25
if ($flavour =~ /64/) {
32
} elsif ($flavour =~ /32/) {
39
} else { die "nonsense $flavour"; }
41
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
42
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
43
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
44
die "can't locate ppc-xlate.pl";
46
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
67
@V=($A,$B,$C,$D,$E,$T);
68
@X=("r16","r17","r18","r19","r20","r21","r22","r23",
69
"r24","r25","r26","r27","r28","r29","r30","r31");
72
my ($i,$a,$b,$c,$d,$e,$f)=@_;
74
$code.=<<___ if ($i==0);
75
lwz @X[$i],`$i*4`($inp)
77
$code.=<<___ if ($i<15);
78
lwz @X[$j],`$j*4`($inp)
89
$code.=<<___ if ($i>=15);
92
xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
95
xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
100
xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
102
rotlwi @X[$j%16],@X[$j%16],1
107
my ($i,$a,$b,$c,$d,$e,$f)=@_;
109
$code.=<<___ if ($i<79);
112
xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
115
xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
119
xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
121
rotlwi @X[$j%16],@X[$j%16],1
123
$code.=<<___ if ($i==79);
141
my ($i,$a,$b,$c,$d,$e,$f)=@_;
146
xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
149
xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
153
xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
156
rotlwi @X[$j%16],@X[$j%16],1
165
.globl .sha1_block_data_order
167
.sha1_block_data_order:
168
$STU $sp,-$FRAME($sp)
170
$PUSH r15,`$FRAME-$SIZE_T*17`($sp)
171
$PUSH r16,`$FRAME-$SIZE_T*16`($sp)
172
$PUSH r17,`$FRAME-$SIZE_T*15`($sp)
173
$PUSH r18,`$FRAME-$SIZE_T*14`($sp)
174
$PUSH r19,`$FRAME-$SIZE_T*13`($sp)
175
$PUSH r20,`$FRAME-$SIZE_T*12`($sp)
176
$PUSH r21,`$FRAME-$SIZE_T*11`($sp)
177
$PUSH r22,`$FRAME-$SIZE_T*10`($sp)
178
$PUSH r23,`$FRAME-$SIZE_T*9`($sp)
179
$PUSH r24,`$FRAME-$SIZE_T*8`($sp)
180
$PUSH r25,`$FRAME-$SIZE_T*7`($sp)
181
$PUSH r26,`$FRAME-$SIZE_T*6`($sp)
182
$PUSH r27,`$FRAME-$SIZE_T*5`($sp)
183
$PUSH r28,`$FRAME-$SIZE_T*4`($sp)
184
$PUSH r29,`$FRAME-$SIZE_T*3`($sp)
185
$PUSH r30,`$FRAME-$SIZE_T*2`($sp)
186
$PUSH r31,`$FRAME-$SIZE_T*1`($sp)
187
$PUSH r0,`$FRAME+$LRSAVE`($sp)
197
bl Lsha1_block_private
200
; PowerPC specification allows an implementation to be ill-behaved
201
; upon unaligned access which crosses page boundary. "Better safe
202
; than sorry" principle makes me treat it specially. But I don't
203
; look for particular offending word, but rather for 64-byte input
204
; block which crosses the boundary. Once found that block is aligned
205
; and hashed separately...
209
andi. $t1,$t1,4095 ; distance to closest page boundary
210
srwi. $t1,$t1,6 ; t1/=64
213
ble- Laligned ; didn't cross the page boundary
216
bl Lsha1_block_private
220
addi r20,$sp,$LOCALS ; spot within the frame
234
$PUSH $inp,`$FRAME-$SIZE_T*18`($sp)
236
addi $inp,$sp,$LOCALS
238
bl Lsha1_block_private
239
$POP $inp,`$FRAME-$SIZE_T*18`($sp)
244
$POP r0,`$FRAME+$LRSAVE`($sp)
245
$POP r15,`$FRAME-$SIZE_T*17`($sp)
246
$POP r16,`$FRAME-$SIZE_T*16`($sp)
247
$POP r17,`$FRAME-$SIZE_T*15`($sp)
248
$POP r18,`$FRAME-$SIZE_T*14`($sp)
249
$POP r19,`$FRAME-$SIZE_T*13`($sp)
250
$POP r20,`$FRAME-$SIZE_T*12`($sp)
251
$POP r21,`$FRAME-$SIZE_T*11`($sp)
252
$POP r22,`$FRAME-$SIZE_T*10`($sp)
253
$POP r23,`$FRAME-$SIZE_T*9`($sp)
254
$POP r24,`$FRAME-$SIZE_T*8`($sp)
255
$POP r25,`$FRAME-$SIZE_T*7`($sp)
256
$POP r26,`$FRAME-$SIZE_T*6`($sp)
257
$POP r27,`$FRAME-$SIZE_T*5`($sp)
258
$POP r28,`$FRAME-$SIZE_T*4`($sp)
259
$POP r29,`$FRAME-$SIZE_T*3`($sp)
260
$POP r30,`$FRAME-$SIZE_T*2`($sp)
261
$POP r31,`$FRAME-$SIZE_T*1`($sp)
266
.byte 0,12,4,1,0x80,18,3,0
270
# This is private block function, which uses tailored calling
271
# interface, namely upon entry SHA_CTX is pre-loaded to given
272
# registers and counter register contains amount of chunks to
278
$code.=<<___; # load K_00_19
282
for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
283
$code.=<<___; # load K_20_39
287
for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
288
$code.=<<___; # load K_40_59
292
for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
293
$code.=<<___; # load K_60_79
297
for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
314
addi $inp,$inp,`16*4`
315
bdnz- Lsha1_block_private
318
.byte 0,12,0x14,0,0,0,0,0
321
.asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
324
$code =~ s/\`([^\`]*)\`/eval $1/gem;