2
/*--------------------------------------------------------------------*/
3
/*--- The core dispatch loop, for jumping to a code address. ---*/
4
/*--- dispatch-ppc64.S ---*/
5
/*--------------------------------------------------------------------*/
8
This file is part of Valgrind, a dynamic binary instrumentation
11
Copyright (C) 2005 Cerion Armour-Brown <cerion@open-works.co.uk>
13
This program is free software; you can redistribute it and/or
14
modify it under the terms of the GNU General Public License as
15
published by the Free Software Foundation; either version 2 of the
16
License, or (at your option) any later version.
18
This program is distributed in the hope that it will be useful, but
19
WITHOUT ANY WARRANTY; without even the implied warranty of
20
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
General Public License for more details.
23
You should have received a copy of the GNU General Public License
24
along with this program; if not, write to the Free Software
25
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28
The GNU General Public License is contained in the file COPYING.
31
#include "pub_core_basics_asm.h"
32
#include "pub_core_dispatch_asm.h"
33
#include "pub_core_transtab_asm.h"
34
#include "libvex_guest_offsets.h" /* for OFFSET_ppc64_CIA */
37
/* References to globals via the TOC */
40
.globl vgPlain_tt_fast
41
.lcomm vgPlain_tt_fast,4,4
42
.type vgPlain_tt_fast, @object
45
.tocent__vgPlain_tt_fast:
46
.tc vgPlain_tt_fast[TC],vgPlain_tt_fast
47
.tocent__vgPlain_tt_fastN:
48
.tc vgPlain_tt_fastN[TC],vgPlain_tt_fastN
49
.tocent__vgPlain_dispatch_ctr:
50
.tc vgPlain_dispatch_ctr[TC],vgPlain_dispatch_ctr
51
.tocent__vgPlain_machine_ppc64_has_VMX:
52
.tc vgPlain_machine_ppc64_has_VMX[TC],vgPlain_machine_ppc64_has_VMX
54
/*------------------------------------------------------------*/
56
/*--- The dispatch loop. VG_(run_innerloop) is used to ---*/
57
/*--- run all translations except no-redir ones. ---*/
59
/*------------------------------------------------------------*/
61
/*----------------------------------------------------*/
62
/*--- Preamble (set everything up) ---*/
63
/*----------------------------------------------------*/
66
UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
71
.globl VG_(run_innerloop)
75
.quad .VG_(run_innerloop),.TOC.@tocbase,0
77
.type .VG_(run_innerloop),@function
78
.globl .VG_(run_innerloop)
80
/* r3 holds guest_state */
81
/* r4 holds do_profiling */
83
/* ----- entry point to VG_(run_innerloop) ----- */
84
/* PPC64 ABI saves LR->16(prt_sp), CR->8(prt_sp)) */
93
stdu 1,-624(1) /* sp should maintain 16-byte alignment */
95
/* Save callee-saved registers... */
97
/* Floating-point reg save area : 144 bytes */
117
/* General reg save area : 144 bytes */
136
/* Probably not necessary to save r13 (thread-specific ptr),
137
as VEX stays clear of it... but what the hey. */
140
/* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
141
The Linux kernel might not actually use VRSAVE for its intended
142
purpose, but it should be harmless to preserve anyway. */
143
/* r3, r4 are live here, so use r5 */
144
ld 5,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
149
/* VRSAVE save word : 32 bytes */
150
mfspr 5,256 /* vrsave reg is spr number 256 */
153
/* Alignment padding : 4 bytes */
155
/* Vector reg save area (quadword aligned) : 192 bytes */
182
/* Local variable space... */
184
/* r3 holds guest_state */
185
/* r4 holds do_profiling */
187
std 3,104(1) /* spill orig guest_state ptr */
189
/* 96(sp) used later to check FPSCR[RM] */
190
/* 88(sp) used later to load fpscr with zero */
193
/* Linkage Area (reserved)
195
32(sp) : link editor doubleword
196
24(sp) : compiler doubleword
202
// CAB TODO: Use a caller-saved reg for orig guest_state ptr
203
// - rem to set non-allocateable in isel.c
205
/* hold dispatch_ctr (=32bit value) in r29 */
206
ld 29,.tocent__vgPlain_dispatch_ctr@toc(2)
209
/* set host FPU control word to the default mode expected
210
by VEX-generated code. See comments in libvex.h for
212
/* => get zero into f3 (tedious)
213
fsub 3,3,3 is not a reliable way to do this, since if
214
f3 holds a NaN or similar then we don't necessarily
215
wind up with zero. */
219
mtfsf 0xFF,3 /* fpscr = lo32 of f3 */
221
/* set host AltiVec control word to the default mode expected
222
by VEX-generated code. */
223
ld 5,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
228
vspltisw 3,0x0 /* generate zero */
232
/* make a stack frame for the code we are calling */
235
/* fetch %CIA into r3 */
236
ld 3,OFFSET_ppc64_CIA(31)
238
/* fall into main loop (the right one) */
239
/* r4 = do_profiling. It's probably trashed after here,
240
but that's OK: we don't need it after here. */
242
beq .VG_(run_innerloop__dispatch_unprofiled)
243
b .VG_(run_innerloop__dispatch_profiled)
247
/*----------------------------------------------------*/
248
/*--- NO-PROFILING (standard) dispatcher ---*/
249
/*----------------------------------------------------*/
253
.globl VG_(run_innerloop__dispatch_unprofiled)
256
VG_(run_innerloop__dispatch_unprofiled):
257
.quad .VG_(run_innerloop__dispatch_unprofiled),.TOC.@tocbase,0
259
.type .VG_(run_innerloop__dispatch_unprofiled),@function
260
.globl .VG_(run_innerloop__dispatch_unprofiled)
261
.VG_(run_innerloop__dispatch_unprofiled):
262
/* At entry: Live regs:
265
r3 (=CIA = next guest address)
269
152(r1) (=orig guest_state)
270
144(r1) (=var space for FPSCR[RM])
273
/* Has the guest state ptr been messed with? If yes, exit. */
274
ld 5,152(1) /* original guest_state ptr */
278
/* save the jump address in the guest state */
279
std 3,OFFSET_ppc64_CIA(31)
281
/* Are we out of timeslice? If yes, defer to scheduler. */
286
/* try a fast lookup in the translation cache */
287
/* r4 = VG_TT_FAST_HASH(addr) * sizeof(ULong*)
288
= ((r3 >>u 2) & VG_TT_FAST_MASK) << 3 */
289
rldicl 4,3, 62, 64-VG_TT_FAST_BITS
292
ld 5, .tocent__vgPlain_tt_fast@toc(2)
293
ldx 5, 5,4 /* r5 = VG_(tt_fast)[VG_TT_FAST_HASH(addr)] */
294
ld 6, 0(5) /* r6 = (r5)->orig_addr */
296
bne .fast_lookup_failed
298
/* Found a match. Call tce[1], which is 8 bytes along, since
299
each tce element is a 64-bit int. */
303
/* run the translation */
306
/* On return from guest code:
307
r3 holds destination (original) address.
308
r31 may be unchanged (guest_state), or may indicate further
309
details of the control transfer requested to *r3.
313
b .VG_(run_innerloop__dispatch_unprofiled)
315
.size VG_(run_innerloop), .-VG_(run_innerloop)
318
/*----------------------------------------------------*/
319
/*--- PROFILING dispatcher (can be much slower) ---*/
320
/*----------------------------------------------------*/
324
.globl VG_(run_innerloop__dispatch_profiled)
327
VG_(run_innerloop__dispatch_profiled):
328
.quad .VG_(run_innerloop__dispatch_profiled),.TOC.@tocbase,0
330
.type .VG_(run_innerloop__dispatch_profiled),@function
331
.globl .VG_(run_innerloop__dispatch_profiled)
332
.VG_(run_innerloop__dispatch_profiled):
333
/* At entry: Live regs:
336
r3 (=CIA = next guest address)
340
152(r1) (=orig guest_state)
341
144(r1) (=var space for FPSCR[RM])
344
/* Has the guest state ptr been messed with? If yes, exit. */
345
ld 5,152(1) /* original guest_state ptr */
349
/* save the jump address in the guest state */
350
std 3,OFFSET_ppc64_CIA(31)
352
/* Are we out of timeslice? If yes, defer to scheduler. */
357
/* try a fast lookup in the translation cache */
358
/* r4 = VG_TT_FAST_HASH(addr) * sizeof(ULong*)
359
= ((r3 >>u 2) & VG_TT_FAST_MASK) << 3 */
360
rldicl 4,3, 62, 64-VG_TT_FAST_BITS
363
ld 5, .tocent__vgPlain_tt_fast@toc(2)
364
ldx 5, 5,4 /* r5 = VG_(tt_fast)[VG_TT_FAST_HASH(addr)] */
365
ld 6, 0(5) /* r6 = (r5)->orig_addr */
367
bne .fast_lookup_failed
369
/* increment bb profile counter VG_(tt_fastN)[x] (=32bit val) */
370
ld 7, .tocent__vgPlain_tt_fastN@toc(2)
371
ldx 7, 7,4 /* r7 = VG_(tt_fastN)[VG_TT_HASH(addr)] */
372
lwz 6, 0(7) /* *(UInt*)r7 ++ */
376
/* Found a match. Call tce[1], which is 8 bytes along, since
377
each tce element is a 64-bit int. */
381
/* run the translation */
384
/* On return from guest code:
385
r3 holds destination (original) address.
386
r31 may be unchanged (guest_state), or may indicate further
387
details of the control transfer requested to *r3.
391
b .VG_(run_innerloop__dispatch_profiled)
393
.size VG_(run_a_noredir_translation), .-VG_(run_a_noredir_translation)
396
/*----------------------------------------------------*/
397
/*--- exit points ---*/
398
/*----------------------------------------------------*/
401
/* Someone messed with the gsp (in r31). Have to
402
defer to scheduler to resolve this. dispatch ctr
403
is not yet decremented, so no need to increment. */
404
/* %CIA is NOT up to date here. First, need to write
405
%r3 back to %CIA, but without trashing %r31 since
406
that holds the value we want to return to the scheduler.
407
Hence use %r5 transiently for the guest state pointer. */
408
ld 5,152(1) /* original guest_state ptr */
409
std 3,OFFSET_ppc64_CIA(5)
410
mr 3,31 /* r3 = new gsp value */
411
b .run_innerloop_exit
415
/* %CIA is up to date */
416
/* back out decrement of the dispatch counter */
418
li 3,VG_TRC_INNER_COUNTERZERO
419
b .run_innerloop_exit
422
/* %CIA is up to date */
423
/* back out decrement of the dispatch counter */
425
li 3,VG_TRC_INNER_FASTMISS
426
b .run_innerloop_exit
430
/* All exits from the dispatcher go through here.
431
r3 holds the return value.
434
/* We're leaving. Check that nobody messed with
437
/* Set fpscr back to a known state, since vex-generated code
438
may have messed with fpscr[rm]. */
444
mtfsf 0xFF,3 /* fpscr = f3 */
446
/* Using r11 - value used again further on, so don't trash! */
447
ld 11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
452
/* Check VSCR[NJ] == 1 */
453
/* first generate 4x 0x00010000 */
454
vspltisw 4,0x1 /* 4x 0x00000001 */
455
vspltisw 5,0x0 /* zero */
456
vsldoi 6,4,5,0x2 /* <<2*8 => 4x 0x00010000 */
457
/* retrieve VSCR and mask wanted bits */
459
vand 7,7,6 /* gives NJ flag */
460
vspltw 7,7,0x3 /* flags-word to all lanes */
461
vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */
462
bt 24,.invariant_violation /* branch if all_equal */
465
/* otherwise we're OK */
466
b .run_innerloop_exit_REALLY
469
.invariant_violation:
470
li 3,VG_TRC_INVARIANT_FAILED
471
b .run_innerloop_exit_REALLY
473
.run_innerloop_exit_REALLY:
474
/* r3 holds VG_TRC_* value to return */
476
/* Return to parent stack */
479
/* Write ctr to VG_(dispatch_ctr) (=32bit value) */
480
ld 5,.tocent__vgPlain_dispatch_ctr@toc(2)
487
/* Restore callee-saved registers... */
489
/* Floating-point regs */
530
/* r11 already holds VG_(machine_ppc64_has_VMX) value */
536
mfspr 4,256 /* VRSAVE reg is spr number 256 */
565
/* reset cr, lr, sp */
566
ld 0,632(1) /* stack_size + 8 */
568
ld 0,640(1) /* stack_size + 16 */
570
addi 1,1,624 /* stack_size */
574
/*------------------------------------------------------------*/
576
/*--- A special dispatcher, for running no-redir ---*/
577
/*--- translations. Just runs the given translation once. ---*/
579
/*------------------------------------------------------------*/
582
void VG_(run_a_noredir_translation) ( UWord* argblock );
585
/* Run a no-redir translation. argblock points to 4 UWords, 2 to carry args
586
and 2 to carry results:
587
0: input: ptr to translation
588
1: input: ptr to guest state
589
2: output: next guest PC
590
3: output: guest state pointer afterwards (== thread return code)
594
.globl VG_(run_a_noredir_translation)
597
VG_(run_a_noredir_translation):
598
.quad .VG_(run_a_noredir_translation),.TOC.@tocbase,0
600
.type .VG_(run_a_noredir_translation),@function
601
.globl .VG_(run_a_noredir_translation)
602
.VG_(run_a_noredir_translation):
603
/* save callee-save int regs, & lr */
625
std 2,408(1) /* also preserve R2, just in case .. */
657
ld 2,408(1) /* also preserve R2, just in case .. */
663
/* Let the linker know we don't need an executable stack */
664
.section .note.GNU-stack,"",@progbits
666
/*--------------------------------------------------------------------*/
668
/*--------------------------------------------------------------------*/