3
# ====================================================================
4
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5
# project. The module is, however, dual licensed under OpenSSL and
6
# CRYPTOGAMS licenses depending on where you obtain it. For further
7
# details see http://www.openssl.org/~appro/cryptogams/.
8
# ====================================================================
10
# SHA256/512 block procedure for PA-RISC.
14
# SHA256 performance is >75% better than gcc 3.2 generated code on
15
# PA-7100LC. Compared to code generated by vendor compiler this
16
# implementation is almost 70% faster in 64-bit build, but delivers
17
# virtually same performance in 32-bit build on PA-8600.
19
# SHA512 performance is >2.9x better than gcc 3.2 generated code on
20
# PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the
21
# code is executed on PA-RISC 2.0 processor and switches to 64-bit
22
# code path delivering adequate peformance even in "blended" 32-bit
23
# build. Though 64-bit code is not any faster than code generated by
24
# vendor compiler on PA-8600...
26
# Special thanks to polarhome.com for providing HP-UX account.
30
open STDOUT,">$output";
32
if ($flavour =~ /64/) {
52
if ($output =~ /512/) {
53
$func="sha512_block_data_order";
65
$func="sha256_block_data_order";
78
$FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker
79
# [+ argument transfer]
80
$XOFF=16*$SZ+32; # local variables
82
$XOFF+=$FRAME_MARKER; # distance between %sp and local variables
84
$ctx="%r26"; # zapped by $a0
85
$inp="%r25"; # zapped by $a1
86
$num="%r24"; # zapped by $t0
94
@V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r28");
96
@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
97
"%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp);
100
my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
102
_ror $e,$Sigma1[0],$a0
104
_ror $e,$Sigma1[1],$a1
108
_ror $a1,`$Sigma1[2]-$Sigma1[1]`,$a1
109
or $t0,$t1,$t1 ; Ch(e,f,g)
111
xor $a0,$a1,$a1 ; Sigma1(e)
113
_ror $a,$Sigma0[0],$a0
116
_ror $a,$Sigma0[1],$a1
120
_ror $a1,`$Sigma0[2]-$Sigma0[1]`,$a1
123
xor $a0,$a1,$a1 ; Sigma0(a)
125
xor $t1,$t0,$t0 ; Maj(a,b,c)
126
`"$LDM $SZ($Tbl),$t1" if ($i<15)`
134
my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
137
_ror @X[($i+1)%16],$sigma0[0],$a0
138
_ror @X[($i+1)%16],$sigma0[1],$a1
139
addl @X[($i+9)%16],@X[$i],@X[$i]
140
_ror @X[($i+14)%16],$sigma1[0],$t0
141
_ror @X[($i+14)%16],$sigma1[1],$t1
143
_shr @X[($i+1)%16],$sigma0[2],$a1
145
_shr @X[($i+14)%16],$sigma1[2],$t1
146
xor $a1,$a0,$a0 ; sigma0(X[(i+1)&0x0f])
147
xor $t1,$t0,$t0 ; sigma1(X[(i+14)&0x0f])
149
addl $a0,@X[$i],@X[$i]
150
addl $t0,@X[$i],@X[$i]
152
$code.=<<___ if ($i==15);
154
comiclr,<> $LAST10BITS,$a1,%r0
155
ldo 1($Tbl),$Tbl ; signal end of $Tbl
157
&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h);
163
.SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
168
$code.=<<___ if ($SZ==8);
169
.WORD 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
170
.WORD 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc
171
.WORD 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019
172
.WORD 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118
173
.WORD 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe
174
.WORD 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2
175
.WORD 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1
176
.WORD 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694
177
.WORD 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3
178
.WORD 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65
179
.WORD 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483
180
.WORD 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5
181
.WORD 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210
182
.WORD 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4
183
.WORD 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725
184
.WORD 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70
185
.WORD 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926
186
.WORD 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df
187
.WORD 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8
188
.WORD 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b
189
.WORD 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001
190
.WORD 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30
191
.WORD 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910
192
.WORD 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8
193
.WORD 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53
194
.WORD 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8
195
.WORD 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb
196
.WORD 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3
197
.WORD 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60
198
.WORD 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec
199
.WORD 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9
200
.WORD 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b
201
.WORD 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207
202
.WORD 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178
203
.WORD 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6
204
.WORD 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b
205
.WORD 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493
206
.WORD 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c
207
.WORD 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a
208
.WORD 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817
210
$code.=<<___ if ($SZ==4);
211
.WORD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
212
.WORD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
213
.WORD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
214
.WORD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
215
.WORD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
216
.WORD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
217
.WORD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
218
.WORD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
219
.WORD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
220
.WORD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
221
.WORD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
222
.WORD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
223
.WORD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
224
.WORD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
225
.WORD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
226
.WORD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
230
.EXPORT $func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
234
.CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18
236
$PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
237
$PUSHMA %r3,$FRAME(%sp)
238
$PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
239
$PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
240
$PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
241
$PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
242
$PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
243
$PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
244
$PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
245
$PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
246
$PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
247
$PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
248
$PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
249
$PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
250
$PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
251
$PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp)
252
$PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp)
254
_shl $num,`log(16*$SZ)/log(2)`,$num
255
addl $inp,$num,$num ; $num to point at the end of $inp
257
$PUSH $num,`-$FRAME_MARKER-4*$SIZE_T`(%sp) ; save arguments
258
$PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp)
259
$PUSH $ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp)
264
andcm $Tbl,$t1,$Tbl ; wipe privilege level
265
ldo L\$table-L\$pic($Tbl),$Tbl
267
$code.=<<___ if ($SZ==8 && $SIZE_T==4);
270
extrd,u,*= $t1,%sar,1,$t1 ; executes on PA-RISC 1.0
275
$LD `0*$SZ`($ctx),$A ; load context
284
extru $inp,31,`log($SZ)/log(2)`,$t0
287
mtctl $t0,%cr11 ; load %sar with align factor
292
andcm $inp,$t0,$t0 ; align $inp
294
for ($i=0;$i<15;$i++) { # load input block
295
$code.="\t$LD `$SZ*$i`($t0),@X[$i]\n"; }
297
cmpb,*= $inp,$t0,L\$aligned
298
$LD `$SZ*15`($t0),@X[15]
299
$LD `$SZ*16`($t0),@X[16]
301
for ($i=0;$i<16;$i++) { # align data
302
$code.="\t_align @X[$i],@X[$i+1],@X[$i]\n"; }
305
nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
308
for($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); }
311
nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
313
for(;$i<32;$i++) { &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); }
315
bb,>= $Tbl,31,L\$rounds ; end of $Tbl signalled?
318
$POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
319
$POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
320
$POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
321
ldo `-$rounds*$SZ-1`($Tbl),$Tbl ; rewind $Tbl
323
$LD `0*$SZ`($ctx),@X[0] ; load context
324
$LD `1*$SZ`($ctx),@X[1]
325
$LD `2*$SZ`($ctx),@X[2]
326
$LD `3*$SZ`($ctx),@X[3]
327
$LD `4*$SZ`($ctx),@X[4]
328
$LD `5*$SZ`($ctx),@X[5]
330
$LD `6*$SZ`($ctx),@X[6]
332
$LD `7*$SZ`($ctx),@X[7]
333
ldo `16*$SZ`($inp),$inp ; advance $inp
335
$ST $A,`0*$SZ`($ctx) ; save context
350
cmpb,*<>,n $inp,$num,L\$oop
351
$PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
353
if ($SZ==8 && $SIZE_T==4) # SHA512 for 32-bit PA-RISC 1.0
363
@V=( $Ahi, $Alo, $Bhi, $Blo, $Chi, $Clo, $Dhi, $Dlo,
364
$Ehi, $Elo, $Fhi, $Flo, $Ghi, $Glo, $Hhi, $Hlo) =
365
( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
366
"%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16");
377
@X=("%r23","%r24","%r25","%r26"); # zaps $num,$inp,$ctx
379
sub ROUND_00_15_pa1 {
380
my ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
381
$ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_;
382
my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
384
$code.=<<___ if (!$flag);
385
ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
386
ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
389
shd $ehi,$elo,$Sigma1[0],$t0
391
shd $elo,$ehi,$Sigma1[0],$t1
392
addc $Xhi,$hhi,$hhi ; h += X[i]
393
shd $ehi,$elo,$Sigma1[1],$t2
395
shd $elo,$ehi,$Sigma1[1],$t3
396
ldw -4($Tbl),$Xlo ; load K[i]
401
shd $ehi,$elo,$Sigma1[2],$t2
403
shd $elo,$ehi,$Sigma1[2],$t3
406
xor $t3,$t1,$t1 ; Sigma1(e)
409
addc $Xhi,$hhi,$hhi ; h += K[i]
410
xor $a3,$a1,$a1 ; Ch(e,f,g)
413
shd $ahi,$alo,$Sigma0[0],$t0
414
addc $t1,$hhi,$hhi ; h += Sigma1(e)
415
shd $alo,$ahi,$Sigma0[0],$t1
417
shd $ahi,$alo,$Sigma0[1],$t2
418
addc $a1,$hhi,$hhi ; h += Ch(e,f,g)
419
shd $alo,$ahi,$Sigma0[1],$t3
423
shd $ahi,$alo,$Sigma0[2],$t2
425
shd $alo,$ahi,$Sigma0[2],$t3
428
xor $t3,$t1,$t1 ; Sigma0(a)
435
addc $hhi,$dhi,$dhi ; d += h
439
addc $t1,$hhi,$hhi ; h += Sigma0(a)
442
xor $a3,$a1,$a1 ; Maj(a,b,c)
443
addc $a1,$hhi,$hhi ; h += Maj(a,b,c)
446
$code.=<<___ if ($i==15 && $flag);
447
extru $Xlo,31,10,$Xlo
448
comiclr,= $LAST10BITS,$Xlo,%r0
452
push(@X,shift(@X)); push(@X,shift(@X));
455
sub ROUND_16_xx_pa1 {
456
my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
460
ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
461
ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
462
ldw `-$XOFF+8*(($i+9)%16)`(%sp),$a1
463
ldw `-$XOFF+8*(($i+9)%16)+4`(%sp),$a0 ; load X[i+9]
464
ldw `-$XOFF+8*(($i+14)%16)`(%sp),$a3
465
ldw `-$XOFF+8*(($i+14)%16)+4`(%sp),$a2 ; load X[i+14]
466
shd $Xnhi,$Xnlo,$sigma0[0],$t0
467
shd $Xnlo,$Xnhi,$sigma0[0],$t1
469
shd $Xnhi,$Xnlo,$sigma0[1],$t2
471
shd $Xnlo,$Xnhi,$sigma0[1],$t3
473
shd $Xnhi,$Xnlo,$sigma0[2],$t2
475
extru $Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3
477
shd $a3,$a2,$sigma1[0],$a0
478
xor $t3,$t1,$t1 ; sigma0(X[i+1)&0x0f])
479
shd $a2,$a3,$sigma1[0],$a1
481
shd $a3,$a2,$sigma1[1],$t2
483
shd $a2,$a3,$sigma1[1],$t3
485
shd $a3,$a2,$sigma1[2],$t2
487
extru $a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3
489
xor $t3,$a1,$a1 ; sigma0(X[i+14)&0x0f])
493
stw $Xhi,`-$XOFF+8*($i%16)`(%sp)
494
stw $Xlo,`-$XOFF+8*($i%16)+4`(%sp)
496
&ROUND_00_15_pa1($i,@_,1);
499
ldw `0*4`($ctx),$Ahi ; load context
509
ldw `10*4`($ctx),$Fhi
510
ldw `11*4`($ctx),$Flo
511
ldw `12*4`($ctx),$Ghi
512
ldw `13*4`($ctx),$Glo
513
ldw `14*4`($ctx),$Hhi
514
ldw `15*4`($ctx),$Hlo
519
mtctl $t0,%cr11 ; load %sar with align factor
523
comib,= 0,$a3,L\$aligned_pa1
526
ldw `0*4`($inp),$X[0]
527
ldw `1*4`($inp),$X[1]
534
vshd $X[0],$X[1],$X[0]
536
stw $X[0],`-$XOFF+0*4`(%sp)
539
stw $X[1],`-$XOFF+1*4`(%sp)
544
my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
545
for ($i=2;$i<=(128/4-8);$i++) {
547
stw $t[0],`-$XOFF+$i*4`(%sp)
548
ldw `(8+$i)*4`($inp),$t[0]
549
vshd $t[1],$t[2],$t[1]
553
for (;$i<(128/4-1);$i++) {
555
stw $t[0],`-$XOFF+$i*4`(%sp)
556
vshd $t[1],$t[2],$t[1]
562
stw $t[0],`-$XOFF+$i*4`(%sp)
568
ldw `0*4`($inp),$X[0]
569
ldw `1*4`($inp),$X[1]
576
stw $X[0],`-$XOFF+0*4`(%sp)
578
stw $X[1],`-$XOFF+1*4`(%sp)
582
my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
583
for ($i=2;$i<(128/4-8);$i++) {
585
stw $t[0],`-$XOFF+$i*4`(%sp)
586
ldw `(8+$i)*4`($inp),$t[0]
590
for (;$i<128/4;$i++) {
592
stw $t[0],`-$XOFF+$i*4`(%sp)
596
$code.="L\$collected_pa1\n";
599
for($i=0;$i<16;$i++) { &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
600
$code.="L\$rounds_pa1\n";
601
for(;$i<32;$i++) { &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
604
$POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
605
$POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
606
$POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
607
ldo `-$rounds*$SZ`($Tbl),$Tbl ; rewind $Tbl
609
ldw `0*4`($ctx),$t1 ; update context
648
ldo `16*$SZ`($inp),$inp ; advance $inp
652
stw $Fhi,`10*4`($ctx)
653
stw $Flo,`11*4`($ctx)
654
stw $Ghi,`12*4`($ctx)
655
stw $Glo,`13*4`($ctx)
656
stw $Hhi,`14*4`($ctx)
657
comb,= $inp,$num,L\$done
658
stw $Hlo,`15*4`($ctx)
660
$PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
665
$POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
666
$POP `-$FRAME+1*$SIZE_T`(%sp),%r4
667
$POP `-$FRAME+2*$SIZE_T`(%sp),%r5
668
$POP `-$FRAME+3*$SIZE_T`(%sp),%r6
669
$POP `-$FRAME+4*$SIZE_T`(%sp),%r7
670
$POP `-$FRAME+5*$SIZE_T`(%sp),%r8
671
$POP `-$FRAME+6*$SIZE_T`(%sp),%r9
672
$POP `-$FRAME+7*$SIZE_T`(%sp),%r10
673
$POP `-$FRAME+8*$SIZE_T`(%sp),%r11
674
$POP `-$FRAME+9*$SIZE_T`(%sp),%r12
675
$POP `-$FRAME+10*$SIZE_T`(%sp),%r13
676
$POP `-$FRAME+11*$SIZE_T`(%sp),%r14
677
$POP `-$FRAME+12*$SIZE_T`(%sp),%r15
678
$POP `-$FRAME+13*$SIZE_T`(%sp),%r16
679
$POP `-$FRAME+14*$SIZE_T`(%sp),%r17
680
$POP `-$FRAME+15*$SIZE_T`(%sp),%r18
683
$POPMB -$FRAME(%sp),%r3
685
.STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
688
# Explicitly encode PA-RISC 2.0 instructions used in this module, so
689
# that it can be compiled with .LEVEL 1.0. It should be noted that I
690
# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
694
my ($mod,$args) = @_;
695
my $orig = "ldd$mod\t$args";
697
if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices
698
{ my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1);
699
$opcode|=(1<<3) if ($mod =~ /^,m/);
700
$opcode|=(1<<2) if ($mod =~ /^,mb/);
701
sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
707
my ($mod,$args) = @_;
708
my $orig = "std$mod\t$args";
710
if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
711
{ my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
712
sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
718
my ($mod,$args) = @_;
719
my $orig = "extrd$mod\t$args";
721
# I only have ",u" completer, it's implicitly encoded...
722
if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
723
{ my $opcode=(0x36<<26)|($1<<21)|($4<<16);
725
$opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
726
$opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
727
sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
729
elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
730
{ my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
732
$opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
733
$opcode |= (1<<13) if ($mod =~ /,\**=/);
734
sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
740
my ($mod,$args) = @_;
741
my $orig = "shrpd$mod\t$args";
743
if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
744
{ my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
746
$opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
747
sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
749
elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
750
{ sprintf "\t.WORD\t0x%08x\t; %s",
751
(0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
757
my ($mnemonic,$mod,$args)=@_;
758
my $opcode = eval("\$$mnemonic");
760
ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
763
foreach (split("\n",$code)) {
764
s/\`([^\`]*)\`/eval $1/ge;
766
s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/
767
$3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32) # rotation for >=32
768
: sprintf("shd\t%$1,%$2,%d",$3)/e or
769
# translate made up instructons: _ror, _shr, _align, _shl
770
s/_ror(\s+)(%r[0-9]+),/
771
($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e or
773
s/_shr(\s+%r[0-9]+),([0-9]+),/
774
$SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2)
775
: sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e or
777
s/_align(\s+%r[0-9]+,%r[0-9]+),/
778
($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e or
780
s/_shl(\s+%r[0-9]+),([0-9]+),/
781
$SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2)
782
: sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e;
784
s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4);
786
s/cmpb,\*/comb,/ if ($SIZE_T==4);