2
/* Test for a number of SSE instructions which were seen in the wild
3
with a bogus (irrelevant) REX.W bit in their prefixes. Some just
4
have REX = 0x48 where REX.W is irrelevant, hence the whole REX
5
prefix is pointless. Probably related to #133962. */
9
#include <malloc.h> /* for memalign */
12
typedef unsigned char UChar;
15
struct { __attribute__((aligned(16))) UChar b[16]; }
19
struct { UWord128 reg[16]; }
23
struct { UWord128 dqw[5]; }
26
void pp_UWord128 ( UWord128* w ) {
29
for (i = 15; i >= 0; i--) {
31
sprintf(buf, "%02x", (unsigned int)w->b[i]);
33
if (buf[0] == '0') buf[0] = '.';
34
if (buf[1] == '0') buf[1] = '.';
39
void pp_XMMRegs ( char* who, XMMRegs* regs ) {
41
printf ("%s (xmms in order [15..0]) {\n", who );
42
for (i = 0; i < 16; i++) {
43
printf(" %%xmm%2d ", i);
44
pp_UWord128( ®s->reg[i] );
50
void pp_Mem ( char* who, Mem* mem ) {
52
printf ("%s (dqws in order [15 .. 0]) {\n", who );
53
for (i = 0; i < 5; i++) {
55
pp_UWord128( &mem->dqw[i] );
61
void xor_UWord128( UWord128* src, UWord128* dst ) {
63
for (i = 0; i < 16; i++)
64
dst->b[i] ^= src->b[i];
66
void xor_XMMRegs ( XMMRegs* src, XMMRegs* dst ) {
68
for (i = 0; i < 16; i++)
69
xor_UWord128( &src->reg[i], &dst->reg[i] );
72
void xor_Mem ( Mem* src, Mem* dst ) {
74
for (i = 0; i < 5; i++)
75
xor_UWord128( &src->dqw[i], &dst->dqw[i] );
78
void setup_regs_mem ( XMMRegs* regs, Mem* mem ) {
81
for (i = 0; i < 16; i++) {
82
for (j = 0; j < 16; j++)
83
regs->reg[i].b[j] = 0x51 + (ctr++ % 7);
85
for (i = 0; i < 5; i++) {
86
for (j = 0; j < 16; j++)
87
mem->dqw[i].b[j] = 0x52 + (ctr++ % 13);
91
void before_test ( XMMRegs* regs, Mem* mem ) {
92
setup_regs_mem( regs, mem );
95
void after_test ( char* who, XMMRegs* regs, Mem* mem ) {
99
setup_regs_mem( &rdiff, &mdiff );
100
xor_XMMRegs( regs, &rdiff );
101
xor_Mem( mem, &mdiff );
102
sprintf(s, "after \"%s\"", who );
104
pp_XMMRegs( s, &rdiff );
108
#define LOAD_XMMREGS_from_r14 \
109
"\tmovupd 0(%%r14), %%xmm0\n" \
110
"\tmovupd 16(%%r14), %%xmm1\n" \
111
"\tmovupd 32(%%r14), %%xmm2\n" \
112
"\tmovupd 48(%%r14), %%xmm3\n" \
113
"\tmovupd 64(%%r14), %%xmm4\n" \
114
"\tmovupd 80(%%r14), %%xmm5\n" \
115
"\tmovupd 96(%%r14), %%xmm6\n" \
116
"\tmovupd 112(%%r14), %%xmm7\n" \
117
"\tmovupd 128(%%r14), %%xmm8\n" \
118
"\tmovupd 144(%%r14), %%xmm9\n" \
119
"\tmovupd 160(%%r14), %%xmm10\n" \
120
"\tmovupd 176(%%r14), %%xmm11\n" \
121
"\tmovupd 192(%%r14), %%xmm12\n" \
122
"\tmovupd 208(%%r14), %%xmm13\n" \
123
"\tmovupd 224(%%r14), %%xmm14\n" \
124
"\tmovupd 240(%%r14), %%xmm15\n"
126
#define SAVE_XMMREGS_to_r14 \
127
"\tmovupd %%xmm0, 0(%%r14)\n" \
128
"\tmovupd %%xmm1, 16(%%r14)\n" \
129
"\tmovupd %%xmm2, 32(%%r14)\n" \
130
"\tmovupd %%xmm3, 48(%%r14)\n" \
131
"\tmovupd %%xmm4, 64(%%r14)\n" \
132
"\tmovupd %%xmm5, 80(%%r14)\n" \
133
"\tmovupd %%xmm6, 96(%%r14)\n" \
134
"\tmovupd %%xmm7, 112(%%r14)\n" \
135
"\tmovupd %%xmm8, 128(%%r14)\n" \
136
"\tmovupd %%xmm9, 144(%%r14)\n" \
137
"\tmovupd %%xmm10, 160(%%r14)\n" \
138
"\tmovupd %%xmm11, 176(%%r14)\n" \
139
"\tmovupd %%xmm12, 192(%%r14)\n" \
140
"\tmovupd %%xmm13, 208(%%r14)\n" \
141
"\tmovupd %%xmm14, 224(%%r14)\n" \
142
"\tmovupd %%xmm15, 240(%%r14)"
145
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", \
146
"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
149
/* Boilerplate for test */
151
before_test( regs, mem );
152
__asm__ __volatile__(
155
LOAD_XMMREGS_from_r14
156
"\tmovq %%r15, %%rx\n"
159
: /*out*/ : /*in*/ "r"(regs), "r"( -x + (char*)&mem->dqw[2] )
160
: /*trash*/ "r14","r15","memory", XMMREGS,
163
after_test( "", regs, mem );
171
regs = memalign(16, sizeof(XMMRegs)); assert(regs);
172
mem = memalign(16, sizeof(Mem)); assert(mem);
174
/* Both have to be 16-aligned so we can do movapd et al */
175
assert( 0 == (0xFL & (unsigned long int)regs) );
176
assert( 0 == (0xFL & (unsigned long int)mem) );
178
/* addpd mem, reg 66 49 0f 58 48 00 rex.WB addpd 0x0(%r8),%xmm1 */
180
before_test( regs, mem );
181
__asm__ __volatile__(
184
LOAD_XMMREGS_from_r14
185
"\tmovq %%r15, %%r8\n"
186
"\t.byte 0x66,0x49,0x0f,0x58,0x48,0x00\n"
188
: /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
189
: /*trash*/ "r14","r15","memory", XMMREGS,
192
after_test( "rex.WB addpd 0x0(%r8),%xmm1", regs, mem );
195
/* addsd mem, reg f2 48 0f 58 27 rex.W addsd (%rdi),%xmm4 */
197
before_test( regs, mem );
198
__asm__ __volatile__(
201
LOAD_XMMREGS_from_r14
202
"\tmovq %%r15, %%rdi\n"
203
"\t.byte 0xf2,0x48,0x0f,0x58,0x27\n"
205
: /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
206
: /*trash*/ "r14","r15","memory", XMMREGS,
209
after_test( "rex.W addsd (%rdi),%xmm4", regs, mem );
212
/* movapd mem, reg 66 48 0f 28 0a rex.W movapd (%rdx),%xmm1 */
214
before_test( regs, mem );
215
__asm__ __volatile__(
218
LOAD_XMMREGS_from_r14
219
"\tmovq %%r15, %%rdx\n"
220
"\t.byte 0x66,0x48,0x0f,0x28,0x0a\n"
222
: /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
223
: /*trash*/ "r14","r15","memory", XMMREGS,
226
after_test( "rex.W movapd (%rdx),%xmm1", regs, mem );
229
/* movapd reg, mem 66 48 0f 29 0a rex.W movapd %xmm1,(%rdx) */
231
before_test( regs, mem );
232
__asm__ __volatile__(
235
LOAD_XMMREGS_from_r14
236
"\tmovq %%r15, %%rdx\n"
237
"\t.byte 0x66,0x48,0x0f,0x29,0x0a\n"
239
: /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
240
: /*trash*/ "r14","r15","memory", XMMREGS,
243
after_test( "rex.W movapd %xmm1,(%rdx)", regs, mem );
246
/* movaps mem, reg 48 0f 28 42 30 rex.W movaps 0x30(%rdx),%xmm0 */
248
before_test( regs, mem );
249
__asm__ __volatile__(
252
LOAD_XMMREGS_from_r14
253
"\tmovq %%r15, %%rdx\n"
254
"\t.byte 0x48,0x0f,0x28,0x42,0x30\n"
256
: /*out*/ : /*in*/ "r"(regs), "r"( -0x30 + (char*)&mem->dqw[2] )
257
: /*trash*/ "r14","r15","memory", XMMREGS,
260
after_test( "movaps 0x30(%rdx),%xmm0", regs, mem );
263
/* movaps reg, mem 49 0f 29 48 00 rex.WB movaps %xmm1,0x0(%r8) */
265
before_test( regs, mem );
266
__asm__ __volatile__(
269
LOAD_XMMREGS_from_r14
270
"\tmovq %%r15, %%r8\n"
271
"\t.byte 0x49,0x0f,0x29,0x48,0x00\n"
273
: /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
274
: /*trash*/ "r14","r15","memory", XMMREGS,
277
after_test( "rex.WB movaps %xmm1,0x0(%r8)", regs, mem );
280
/* movddup mem, reg f2 48 0f 12 2a rex.W movddup (%rdx),%xmm5 */
282
before_test( regs, mem );
283
__asm__ __volatile__(
286
LOAD_XMMREGS_from_r14
287
"\tmovq %%r15, %%rdx\n"
288
"\t.byte 0xf2,0x48,0x0f,0x12,0x2a\n"
290
: /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
291
: /*trash*/ "r14","r15","memory", XMMREGS,
294
after_test( "movddup (%rdx),%xmm5", regs, mem );
297
/* movhpd mem, reg 66 48 0f 16 06 rex.W movhpd (%rsi),%xmm0 */
299
before_test( regs, mem );
300
__asm__ __volatile__(
303
LOAD_XMMREGS_from_r14
304
"\tmovq %%r15, %%rsi\n"
305
"\t.byte 0x66,0x48,0x0f,0x16,0x06\n"
307
: /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
308
: /*trash*/ "r14","r15","memory", XMMREGS,
311
after_test( "rex.W movhpd (%rsi),%xmm0", regs, mem );
314
/* movhpd reg, mem 66 48 0f 17 07 rex.W movhpd %xmm0,(%rdi) */
316
before_test( regs, mem );
317
__asm__ __volatile__(
320
LOAD_XMMREGS_from_r14
321
"\tmovq %%r15, %%rdi\n"
322
"\t.byte 0x66,0x48,0x0f,0x17,0x07\n"
324
: /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
325
: /*trash*/ "r14","r15","memory", XMMREGS,
328
after_test( "rex.W movhpd %xmm0,(%rdi)", regs, mem );
331
/* movhps mem, reg 48 0f 16 36 rex.W movhps (%rsi),%xmm6 */
333
before_test( regs, mem );
334
__asm__ __volatile__(
337
LOAD_XMMREGS_from_r14
338
"\tmovq %%r15, %%rsi\n"
339
"\t.byte 0x48,0x0f,0x16,0x36\n"
341
: /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
342
: /*trash*/ "r14","r15","memory", XMMREGS,
345
after_test( "rex.W movhps (%rsi),%xmm6", regs, mem );
347
/* movhps reg, mem 49 0f 17 03 rex.WB movhps %xmm0,(%r11) */
349
before_test( regs, mem );
350
__asm__ __volatile__(
353
LOAD_XMMREGS_from_r14
354
"\tmovq %%r15, %%r11\n"
355
"\t.byte 0x49,0x0F,0x17,0x03\n" /* rex.WB movhps %xmm0,(%r11) */
357
: /*out*/ : /*in*/ "r"(regs), "r"( 0 + (char*)&mem->dqw[2] )
358
: /*trash*/ "r14","r15","memory", XMMREGS,
361
after_test( "rex.WB movhps %xmm0,(%r11)", regs, mem );
364
/* movlpd mem, reg 66 48 0f 12 4a 00 rex.W movlpd 0x0(%rdx),%xmm1 */
366
before_test( regs, mem );
367
__asm__ __volatile__(
370
LOAD_XMMREGS_from_r14
371
"\tmovq %%r15, %%rdx\n"
372
"\t.byte 0x66,0x48,0x0f,0x12,0x4a,0x00\n"
374
: /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
375
: /*trash*/ "r14","r15","memory", XMMREGS,
378
after_test( "rex.W movlpd 0x0(%rdx),%xmm1", regs, mem );
381
/* movlpd reg, mem 66 48 0f 13 30 rex.W movlpd %xmm6,(%rax) */
383
before_test( regs, mem );
384
__asm__ __volatile__(
387
LOAD_XMMREGS_from_r14
388
"\tmovq %%r15, %%rax\n"
389
"\t.byte 0x66,0x48,0x0f,0x13,0x30\n"
391
: /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
392
: /*trash*/ "r14","r15","memory", XMMREGS,
395
after_test( "rex.W movlpd %xmm6,(%rax)", regs, mem );
398
/* movlps mem, reg 48 0f 12 07 rex.W movlps (%rdi),%xmm0 */
400
before_test( regs, mem );
401
__asm__ __volatile__(
404
LOAD_XMMREGS_from_r14
405
"\tmovq %%r15, %%rdi\n"
406
"\t.byte 0x48,0x0f,0x12,0x07\n"
408
: /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
409
: /*trash*/ "r14","r15","memory", XMMREGS,
412
after_test( "rex.W movlps (%rdi),%xmm0", regs, mem );
415
/* movlps reg, mem 49 0f 13 02 rex.WB movlps %xmm0,(%r10) */
417
before_test( regs, mem );
418
__asm__ __volatile__(
421
LOAD_XMMREGS_from_r14
422
"\tmovq %%r15, %%r10\n"
423
"\t.byte 0x49,0x0f,0x13,0x02\n"
425
: /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
426
: /*trash*/ "r14","r15","memory", XMMREGS,
429
after_test( "rex.WB movlps %xmm0,(%r10)", regs, mem );
432
/* movq mem, reg f3 48 0f 7e 00 rex.W movq (%rax),%xmm0 */
434
before_test( regs, mem );
435
__asm__ __volatile__(
438
LOAD_XMMREGS_from_r14
439
"\tmovq %%r15, %%rax\n"
440
"\t.byte 0xf3,0x48,0x0f,0x7e,0x00\n"
442
: /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
443
: /*trash*/ "r14","r15","memory", XMMREGS,
446
after_test( "rex.W movq (%rax),%xmm0", regs, mem );
449
/* movq reg, mem 66 48 0f d6 00 rex.W movq %xmm0,(%rax) */
451
before_test( regs, mem );
452
__asm__ __volatile__(
455
LOAD_XMMREGS_from_r14
456
"\tmovq %%r15, %%rax\n"
457
"\t.byte 0x66,0x48,0x0f,0xd6,0x00\n"
459
: /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
460
: /*trash*/ "r14","r15","memory", XMMREGS,
463
after_test( "rex.W movq %xmm0,(%rax)", regs, mem );
466
/* movsd mem, reg f2 48 0f 10 11 rex.W movsd (%rcx),%xmm2 */
468
before_test( regs, mem );
469
__asm__ __volatile__(
472
LOAD_XMMREGS_from_r14
473
"\tmovq %%r15, %%rcx\n"
474
"\t.byte 0xf2,0x48,0x0f,0x10,0x11\n"
476
: /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
477
: /*trash*/ "r14","r15","memory", XMMREGS,
480
after_test( "rex.W movsd (%rcx),%xmm2", regs, mem );
483
/* movsd reg, mem f2 48 0f 11 3f rex.W movsd %xmm7,(%rdi) */
485
before_test( regs, mem );
486
__asm__ __volatile__(
489
LOAD_XMMREGS_from_r14
490
"\tmovq %%r15, %%rdi\n"
491
"\t.byte 0xf2,0x48,0x0f,0x11,0x3f\n"
493
: /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
494
: /*trash*/ "r14","r15","memory", XMMREGS,
497
after_test( "rex.W movsd %xmm7,(%rdi)", regs, mem );
500
/* movss mem, reg f3 48 0f 10 5e 04 rex.W movss 0x4(%rsi),%xmm3 */
502
before_test( regs, mem );
503
__asm__ __volatile__(
506
LOAD_XMMREGS_from_r14
507
"\tmovq %%r15, %%rsi\n"
508
"\t.byte 0xf3,0x48,0x0f,0x10,0x5e,0x04\n"
510
: /*out*/ : /*in*/ "r"(regs), "r"( -0x4 + (char*)&mem->dqw[2] )
511
: /*trash*/ "r14","r15","memory", XMMREGS,
514
after_test( "rex.W movss 0x4(%rsi),%xmm3", regs, mem );
517
/* movupd reg, mem 66 48 0f 11 07 rex.W movupd %xmm0,(%rdi) */
519
before_test( regs, mem );
520
__asm__ __volatile__(
523
LOAD_XMMREGS_from_r14
524
"\tmovq %%r15, %%rdi\n"
525
"\t.byte 0x66,0x48,0x0f,0x11,0x07\n"
527
: /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
528
: /*trash*/ "r14","r15","memory", XMMREGS,
531
after_test( "rex.W movupd %xmm0,(%rdi)", regs, mem );
534
/* mulpd mem, reg 66 48 0f 59 61 00 rex.W mulpd 0x0(%rcx),%xmm4 */
536
before_test( regs, mem );
537
__asm__ __volatile__(
540
LOAD_XMMREGS_from_r14
541
"\tmovq %%r15, %%rcx\n"
542
"\t.byte 0x66,0x48,0x0f,0x59,0x61,0x00\n"
544
: /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
545
: /*trash*/ "r14","r15","memory", XMMREGS,
548
after_test( "rex.W mulpd 0x0(%rcx),%xmm4", regs, mem );
551
/* mulsd mem, reg f2 48 0f 59 1f rex.W mulsd (%rdi),%xmm3 */
553
before_test( regs, mem );
554
__asm__ __volatile__(
557
LOAD_XMMREGS_from_r14
558
"\tmovq %%r15, %%rdi\n"
559
"\t.byte 0xf2,0x48,0x0f,0x59,0x1f\n"
561
: /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
562
: /*trash*/ "r14","r15","memory", XMMREGS,
565
after_test( "rex.W mulsd (%rdi),%xmm3", regs, mem );
568
/* prefetchnt0 49 0f 18 4c f2 a0 rex.WB prefetcht0 -0x60(%r10,%rsi,8) */
570
before_test( regs, mem );
571
__asm__ __volatile__(
574
LOAD_XMMREGS_from_r14
575
"\tmovq %%r15, %%r10\n"
576
"\txorq %%rsi, %%rsi\n"
577
"\t.byte 0x49,0x0f,0x18,0x4c,0xf2,0xa0\n"
579
: /*out*/ : /*in*/ "r"(regs), "r"( - -0x60 + (char*)&mem->dqw[2] )
580
: /*trash*/ "r14","r15","memory", XMMREGS,
583
after_test( "rex.WB prefetcht0 -0x60(%r10,%rsi,8)", regs, mem );
586
/* subsd mem, reg f2 49 0f 5c 4d f8 rex.WB subsd -0x8(%r13),%xmm1 */
588
before_test( regs, mem );
589
__asm__ __volatile__(
592
LOAD_XMMREGS_from_r14
593
"\tmovq %%r15, %%r13\n"
594
"\t.byte 0xf2,0x49,0x0f,0x5c,0x4d,0xf8\n"
596
: /*out*/ : /*in*/ "r"(regs), "r"( - -0x8 + (char*)&mem->dqw[2] )
597
: /*trash*/ "r14","r15","memory", XMMREGS,
600
after_test( "rex.WB subsd -0x8(%r13),%xmm1", regs, mem );